1
1
mirror of https://github.com/github/semantic.git synced 2025-01-01 19:55:34 +03:00

Merge 'origin/master' into one-json-to-rule-them-all

This commit is contained in:
joshvera 2016-08-10 17:33:11 -04:00
parent 59932277f9
commit 21cb7f71d7
14 changed files with 193 additions and 154 deletions

View File

@ -118,16 +118,17 @@ test-suite semantic-diff-test
, deepseq
, dlist
, filepath
, free
, Glob
, hspec >= 2.1.10
, mtl
, QuickCheck >= 2.8.1
, quickcheck-text
, recursion-schemes >= 4.1
, semantic-diff
, text >= 1.2.1.3
, these
, free
, recursion-schemes >= 4.1
, vector
, wl-pprint-text
, protolude
if os(darwin)

View File

@ -1,15 +1,21 @@
{-# LANGUAGE RankNTypes #-}
module Data.RandomWalkSimilarity where
{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-}
module Data.RandomWalkSimilarity
( rws
, pqGramDecorator
, featureVectorDecorator
, Gram(..)
) where
import Control.Applicative
import Control.Arrow ((&&&))
import Control.Monad.Random
import Control.Monad.State
import qualified Data.DList as DList
import Data.Functor.Both hiding (fst, snd)
import Data.Functor.Foldable as Foldable
import Data.Hashable
import qualified Data.KdTree.Static as KdTree
import qualified Data.List as List
import Data.Record
import qualified Data.Vector as Vector
import Patch
import Prologue
@ -18,25 +24,23 @@ import Test.QuickCheck hiding (Fixed)
import Test.QuickCheck.Random
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
rws :: (Hashable label, Eq annotation, Prologue.Foldable f, Functor f, Eq (f (Cofree f annotation)))
=> (Cofree f annotation -> Cofree f annotation -> Maybe (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation)))) -- ^ A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
-> (forall b. CofreeF f annotation b -> label) -- ^ A function to compute a label for an unpacked term.
-> [Cofree f annotation] -- ^ The list of old terms.
-> [Cofree f annotation] -- ^ The list of new terms.
-> [Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))]
rws compare getLabel as bs
rws :: (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), HasField fields (Vector.Vector Double))
=> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function which compares a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
-> [Cofree f (Record fields)] -- ^ The list of old terms.
-> [Cofree f (Record fields)] -- ^ The list of new terms.
-> [Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))] -- ^ The resulting list of similarity-matched diffs.
rws compare as bs
| null as, null bs = []
| null as = inserting <$> bs
| null bs = deleting <$> as
| otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas)) $ traverse findNearestNeighbourTo fbs
where (p, q, d) = (2, 2, 15)
fas = zipWith featurize [0..] as
where fas = zipWith featurize [0..] as
fbs = zipWith featurize [0..] bs
kdas = KdTree.build (Vector.toList . feature) fas
featurize index term = UnmappedTerm index (featureVector d (pqGrams p q getLabel term)) term
featurize index term = UnmappedTerm index (getField (extract term)) term
findNearestNeighbourTo kv@(UnmappedTerm _ _ v) = do
(previous, unmapped) <- get
let (UnmappedTerm i _ _) = KdTree.nearest kdas kv
let UnmappedTerm i _ _ = KdTree.nearest kdas kv
fromMaybe (pure (negate 1, inserting v)) $ do
found <- find ((== i) . termIndex) unmapped
guard (i >= previous)
@ -55,38 +59,40 @@ data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature :
data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] }
deriving (Eq, Show)
-- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term.
pqGrams :: (Prologue.Foldable f, Functor f) => Int -> Int -> (forall b. CofreeF f annotation b -> label) -> Cofree f annotation -> DList.DList (Gram label)
pqGrams p q getLabel = uncurry DList.cons . cata merge . setRootBase . setRootStem . cata go
where go c = cofree (Gram [] [ Just (getLabel c) ] :< (assignParent (Just (getLabel c)) p <$> tailF c))
merge (head :< tail) = let tail' = toList tail in (head, DList.fromList (windowed q setBases [] (fst <$> tail')) <> foldMap snd tail')
assignParent parentLabel n tree
| n > 0 = let gram :< functor = runCofree tree in cofree $ prependParent parentLabel gram :< (assignParent parentLabel (pred n) <$> functor)
| otherwise = tree
prependParent parentLabel gram = gram { stem = parentLabel : stem gram }
setBases gram siblings rest = setBase gram (siblings >>= base) : rest
setBase gram newBase = gram { base = take q (newBase <> repeat Nothing) }
setRootBase term = let (a :< f) = runCofree term in cofree (setBase a (base a) :< f)
setRootStem = foldr (\ p rest -> assignParent Nothing p . rest) identity [0..p]
-- | Annotates a term with the corresponding p,q-gram at each node.
pqGramDecorator :: Traversable f
=> (forall b. CofreeF f (Record fields) b -> label) -- ^ A function computing the label from an arbitrary unpacked term. This function can use the annotation and functors constructor, but not any recursive values inside the functor (since theyre held parametric in 'b').
-> Int -- ^ 'p'; the desired stem length for the grams.
-> Int -- ^ 'q'; the desired base length for the grams.
-> Cofree f (Record fields) -- ^ The term to decorate.
-> Cofree f (Record (Gram label ': fields)) -- ^ The decorated term.
pqGramDecorator getLabel p q = cata algebra
where algebra term = let label = getLabel term in
cofree ((gram label .: headF term) :< assignParentAndSiblingLabels (tailF term) label)
gram label = Gram (padToSize p []) (padToSize q (pure (Just label)))
assignParentAndSiblingLabels functor label = (`evalState` (siblingLabels functor)) (for functor (assignLabels label))
assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields)))
assignLabels label a = case runCofree a of
RCons gram rest :< functor -> do
labels <- get
put (drop 1 labels)
pure $! cofree ((gram { stem = padToSize p (Just label : stem gram), base = padToSize q labels } .: rest) :< functor)
siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label]
siblingLabels = foldMap (base . rhead . extract)
padToSize n list = take n (list <> repeat empty)
-- | A sliding-window fold over _n_ items of a list per iteration.
windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b
windowed n f seed = para alg
where alg xs = case xs of
Cons a (as, b) -> f a (take n $ a : as) b
Nil -> seed
-- | Computes a unit vector of the specified dimension from a hash.
unitVector :: Int -> Int -> Vector.Vector Double
unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom)))
where normalize vec = fmap (/ vmagnitude vec) vec
vmagnitude = sqrtDouble . Vector.sum . fmap (** 2)
-- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree.
featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vector Double
featureVector d bag = sumVectors $ unitDVector . hash <$> bag
where unitDVector hash = normalize . (`evalRand` mkQCGen hash) $ Prologue.sequence (Vector.replicate d getRandom)
normalize vec = fmap (/ vmagnitude vec) vec
sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0)
-- | The magnitude of a Euclidean vector, i.e. its distance from the origin.
vmagnitude :: Vector.Vector Double -> Double
vmagnitude = sqrtDouble . Vector.sum . fmap (** 2)
-- | Annotates a term with a feature vector at each node.
featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields))
featureVectorDecorator getLabel p q d
= cata (\ (RCons gram rest :< functor) ->
cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor))
. pqGramDecorator getLabel p q
-- Instances

View File

@ -18,6 +18,10 @@ infixr 0 .:
(.:) :: h -> Record t -> Record (h ': t)
(.:) = RCons
-- | Get the first element of a non-empty record.
rhead :: Record (head ': tail) -> head
rhead (RCons head _) = head
-- Classes

View File

@ -1,15 +1,18 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-}
module Diffing where
import qualified Prologue
import Prologue hiding (fst, snd)
import qualified Data.ByteString.Char8 as B1
import Data.Functor.Both
import Data.Functor.Foldable
import Data.RandomWalkSimilarity
import Data.Record
import qualified Data.Text.IO as TextIO
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import Data.These
import qualified Data.Vector as Vector
import Diff
import Info
import Interpreter
@ -38,18 +41,16 @@ import Data.Aeson.Encoding (encodingToLazyByteString)
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser fields -> Renderer (Record fields) -> Both SourceBlob -> IO Output
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Output
diffFiles parser renderer sourceBlobs = do
let sources = source <$> sourceBlobs
terms <- sequence $ parser <$> sourceBlobs
terms <- traverse (fmap (featureVectorDecorator getLabel p q d) . parser) sourceBlobs
let replaceLeaves = breakDownLeavesByWord <$> sources
let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs
let textDiff = case areNullOids of
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) ->
runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) (replaceLeaves <*> terms)
runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) terms
pure $! renderer sourceBlobs textDiff
@ -58,10 +59,13 @@ diffFiles parser renderer sourceBlobs = do
getCost diff = case runFree diff of
Free (info :< _) -> cost <$> info
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
(p, q, d) = (2, 2, 15)
-- | Return a parser based on the file extension (including the ".").
parserForType :: Text -> Parser '[Range, Category, Cost]
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category])
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
@ -69,28 +73,29 @@ parserForType mediaType = case languageForType mediaType of
_ -> lineByLineParser
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser '[Range, Category, Cost]
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category])
lineByLineParser blob = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
input = source blob
lines = actualLines input
root children = let cost = 1 + fromIntegral (length children) in
(Range 0 (length input) .: Other "program" .: cost .: RNil) :< Indexed children
leaf charIndex line = (Range charIndex (charIndex + T.length line) .: Other "program" .: 1 .: RNil) :< Leaf line
root children = ((Range 0 $ length input) .: Program .: RNil) :< Indexed children
leaf charIndex line = ((Range charIndex $ charIndex + T.length line) .: Program .: RNil) :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum <> [ leaf charIndex (toText line) ] , charIndex + length line)
toText = T.pack . Source.toString
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser '[Range, Category, Cost]
parserForFilepath = parserForType . toS . takeExtension
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category])
parserForFilepath path blob = decorateTerm termCostDecorator <$> do
parsed <- parserForType (toS (takeExtension path)) blob
pure $! breakDownLeavesByWord (source blob) parsed
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: (HasField fields Category, HasField fields Cost, HasField fields Range) => Source Char -> Term Text (Record fields) -> Term Text (Record fields)
breakDownLeavesByWord :: (HasField fields Category, HasField fields Range) => Source Char -> Term Text (Record fields) -> Term Text (Record fields)
breakDownLeavesByWord source = cata replaceIn
where
replaceIn (info :< syntax) = let cost' = 1 + sum (cost . extract <$> syntax') in cofree $ setCost info cost' :< syntax'
replaceIn (info :< syntax) = cofree $ info :< syntax'
where syntax' = case (ranges, syntax) of
(_:_:_, Leaf _) | category info /= Regex -> Indexed (makeLeaf info <$> ranges)
_ -> syntax
@ -111,6 +116,18 @@ readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
decorateTerm :: Functor f => TermDecorator f fields field -> Cofree f (Record fields) -> Cofree f (Record (field ': fields))
decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c)
-- | Term decorator computing the cost of an unpacked term.
termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator c = 1 + sum (cost <$> tailF c)
-- | Determine whether two terms are comparable based on the equality of their categories.
compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Term leaf (Record fields) -> Bool
compareCategoryEq = (==) `on` category . extract
@ -121,14 +138,12 @@ diffCostWithCachedTermCosts diff = unCost $ case runFree diff of
Pure patch -> sum (cost . extract <$> patch)
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser fields -> DiffArguments -> Both SourceBlob -> IO Output
textDiff parser arguments = diffFiles parser renderer
where
renderer = case format arguments of
Split -> split
Patch -> patch
JSON -> json
Summary -> summary
textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Output
textDiff parser arguments = diffFiles parser $ case format arguments of
Split -> split
Patch -> patch
JSON -> json
Summary -> summary
-- | Returns a truncated diff given diff arguments and two source blobs.
truncatedDiff :: DiffArguments -> Both SourceBlob -> IO Output
@ -139,7 +154,7 @@ truncatedDiff arguments sources = pure $ case format arguments of
Summary -> SummaryOutput mempty
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser fields -> DiffArguments -> Both SourceBlob -> IO ()
printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO ()
printDiff parser arguments sources = do
rendered <- textDiff parser arguments sources
let renderedText = case rendered of

View File

@ -5,10 +5,10 @@ import Algorithm
import Data.Align.Generic
import Data.Functor.Foldable
import Data.Functor.Both
import Data.Hashable
import Data.RandomWalkSimilarity
import Data.Record
import Data.These
import qualified Data.Vector as Vector
import Diff
import qualified Control.Monad.Free.Church as F
import Info
@ -25,7 +25,7 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation -
type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation
-- | Diff two terms recursively, given functions characterizing the diffing.
diffTerms :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category)
diffTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double))
=> DiffConstructor leaf (Record fields) -- ^ A function to wrap up & possibly annotate every produced diff.
-> Comparable leaf (Record fields) -- ^ A function to determine whether or not two terms should even be compared.
-> SES.Cost (Diff leaf (Record fields)) -- ^ A function to compute the cost of a given diff node.
@ -35,15 +35,12 @@ diffTerms :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Catego
diffTerms construct comparable cost a b = fromMaybe (replacing a b) $ diffComparableTerms construct comparable cost a b
-- | Diff two terms recursively, given functions characterizing the diffing. If the terms are incomparable, returns 'Nothing'.
diffComparableTerms :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
diffComparableTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
diffComparableTerms construct comparable cost = recur
where recur a b
| (category <$> a) == (category <$> b) = hylo construct runCofree <$> zipTerms a b
| comparable a b = runAlgorithm construct recur cost getLabel (Just <$> algorithmWithTerms construct a b)
| comparable a b = runAlgorithm construct recur cost (Just <$> algorithmWithTerms construct a b)
| otherwise = Nothing
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Construct an algorithm to diff a pair of terms.
algorithmWithTerms :: (TermF leaf (Both a) diff -> diff) -> Term leaf a -> Term leaf a -> Algorithm (Term leaf a) diff diff
@ -74,16 +71,15 @@ algorithmWithTerms construct t1 t2 = case (unwrap t1, unwrap t2) of
byIndex constructor a b = Algorithm.byIndex a b >>= annotate . constructor
-- | Run an algorithm, given functions characterizing the evaluation.
runAlgorithm :: (Functor f, GAlign f, Eq a, Eq annotation, Eq (f (Cofree f annotation)), Prologue.Foldable f, Traversable f, Hashable label)
=> (CofreeF f (Both annotation) (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) -> Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) -- ^ A function to wrap up & possibly annotate every produced diff.
-> (Cofree f annotation -> Cofree f annotation -> Maybe (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation)))) -- ^ A function to diff two subterms recursively, if they are comparable, or else return 'Nothing'.
-> SES.Cost (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) -- ^ A function to compute the cost of a given diff node.
-> (forall b. CofreeF f annotation b -> label) -- ^ A function to compute a label for a given term.
-> Algorithm (Cofree f annotation) (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) a -- ^ The algorithm to run.
runAlgorithm :: (Functor f, GAlign f, Eq a, Eq (Record fields), Eq (f (Cofree f (Record fields))), Prologue.Foldable f, Traversable f, HasField fields (Vector.Vector Double))
=> (CofreeF f (Both (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to wrap up & possibly annotate every produced diff.
-> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function to diff two subterms recursively, if they are comparable, or else return 'Nothing'.
-> SES.Cost (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to compute the cost of a given diff node.
-> Algorithm (Cofree f (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) a -- ^ The algorithm to run.
-> a
runAlgorithm construct recur cost getLabel = F.iter $ \case
runAlgorithm construct recur cost = F.iter $ \case
Recursive a b f -> f (maybe (replacing a b) (construct . (both (extract a) (extract b) :<)) $ do
aligned <- galign (unwrap a) (unwrap b)
traverse (these (Just . deleting) (Just . inserting) recur) aligned)
ByIndex as bs f -> f (ses recur cost as bs)
BySimilarity as bs f -> f (rws recur getLabel as bs)
BySimilarity as bs f -> f (rws recur as bs)

View File

@ -12,10 +12,10 @@ import qualified Data.Set as Set
import Source
import SourceSpan
-- | A function that takes a source file and returns an annotated AST.
-- | A function that takes a source blob and returns an annotated AST.
-- | The return is in the IO monad because some of the parsers are written in C
-- | and aren't pure.
type Parser fields = SourceBlob -> IO (Term Text (Record fields))
type Parser f a = SourceBlob -> IO (Cofree f a)
-- | Whether a category is an Operator Category
isOperator :: Category -> Bool
@ -141,4 +141,4 @@ methodDefinitions :: HasField fields Category => Term Text (Record fields) -> [T
methodDefinitions definitions |
Other "class_body" == category (extract definitions),
S.Indexed definitions' <- unwrap definitions = definitions'
methodDefinitions _ = mempty
methodDefinitions _ = mempty

View File

@ -4,10 +4,11 @@ module TreeSitter where
import Prologue hiding (Constructor)
import Data.Record
import Category
import Info
import Language
import Parser
import Range
import Source
import qualified Syntax
import Foreign
import Foreign.C.String
import Text.Parser.TreeSitter hiding (Language(..))
@ -15,7 +16,7 @@ import qualified Text.Parser.TreeSitter as TS
import SourceSpan
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Language -> Ptr TS.Language -> Parser '[Range, Category, Cost]
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category])
treeSitterParser language grammar blob = do
document <- ts_document_make
ts_document_set_language document grammar
@ -94,7 +95,7 @@ defaultCategoryForNodeName name = case name of
_ -> Other name
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Language -> Ptr Document -> Parser '[Range, Category, Cost]
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category])
documentToTerm language document blob = alloca $ \ root -> do
ts_document_root_node_p document root
toTerm root
@ -110,8 +111,7 @@ documentToTerm language document blob = alloca $ \ root -> do
, spanStart = SourcePos (fromIntegral $ ts_node_p_start_point_row node) (fromIntegral $ ts_node_p_start_point_column node)
, spanEnd = SourcePos (fromIntegral $ ts_node_p_end_point_row node) (fromIntegral $ ts_node_p_end_point_column node) }
let cost' = 1 + sum (cost . extract <$> children)
let info = range .: (categoriesForLanguage language (toS name)) .: cost' .: RNil
let info = range .: (categoriesForLanguage language (toS name)) .: RNil
pure $! termConstructor (source blob) sourceSpan info children
getChild node n out = do
_ <- ts_node_p_named_child node n out

View File

@ -1,31 +1,31 @@
{-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-}
module CorpusSpec where
import Unsafe (unsafeFromJust)
import Diffing
import Renderer
import qualified Renderer.JSON as J
import qualified Renderer.Patch as P
import qualified Renderer.Split as Split
import Category
import Control.DeepSeq
import Data.Functor.Both
import Data.Record
import Data.List (union)
import Data.Record
import qualified Data.Text as T
import qualified Data.Vector as Vector
import Diffing
import GHC.Show (Show(..))
import Info
import Prologue hiding (fst, snd, lookup)
import Renderer
import qualified Renderer.JSON as J
import qualified Renderer.Patch as P
import qualified Renderer.Split as Split
import qualified Source as S
import System.FilePath
import System.FilePath.Glob
import Test.Hspec
import GHC.Show (Show(..))
import Unsafe (unsafeFromJust)
spec :: Spec
spec = parallel $ do
describe "crashers crash" $ runTestsIn "test/crashers-todo/" (\ a b ->
a `deepseq` pure (a == b) `shouldThrow` anyException)
describe "crashers crash" . runTestsIn "test/crashers-todo/" $ \ a b ->
a `deepseq` pure (a == b) `shouldThrow` anyException
describe "crashers should not crash" $ runTestsIn "test/crashers/" shouldBe
describe "todos are incorrect" $ runTestsIn "test/diffs-todo/" shouldNotBe
describe "should produce the correct diff" $ runTestsIn "test/diffs/" shouldBe
@ -46,6 +46,7 @@ spec = parallel $ do
correctTests paths = filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths
testsForPaths (aPath, bPath, json, patch, split) = [ ("json", J.json, paths, json), ("patch", P.patch, paths, patch), ("split", Split.split, paths, split) ]
where paths = both aPath bPath
-- | Return all the examples from the given directory. Examples are expected to
-- | have the form "foo.A.js", "foo.B.js", "foo.patch.js". Diffs are not
-- | required as the test may be verifying that the inputs don't crash.
@ -73,7 +74,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte
-- | Given file paths for A, B, and, optionally, a diff, return whether diffing
-- | the files will produce the diff. If no diff is provided, then the result
-- | is true, but the diff will still be calculated.
testDiff :: Renderer (Record '[Range, Category, Cost]) -> Both (Maybe FilePath) -> Maybe FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> Expectation
testDiff :: Renderer (Record '[Vector.Vector Double, Cost, Range, Category]) -> Both (Maybe FilePath) -> Maybe FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> Expectation
testDiff renderer paths diff matcher = do
sources <- traverse (traverse readAndTranscodeFile) paths
actual <- fmap Verbatim <$> traverse ((pure . concatOutputs . pure) <=< diffFiles' sources) parser

View File

@ -1,9 +1,10 @@
{-# LANGUAGE DataKinds #-}
module Data.RandomWalkSimilarity.Spec where
import Category
import Data.DList as DList hiding (toList)
import Data.RandomWalkSimilarity
import Data.Record
import Diff
import Info
import Patch
import Prologue
import Syntax
@ -15,21 +16,22 @@ import Test.QuickCheck
spec :: Spec
spec = parallel $ do
describe "pqGrams" $ do
prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $
\ (term, p, q) -> pqGrams p q headF (toTerm term :: Term Text Text) `shouldSatisfy` all ((== p) . length . stem)
let positively = succ . abs
describe "pqGramDecorator" $ do
prop "produces grams with stems of the specified length" $
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively p)) . length . stem . rhead)
prop "produces grams with bases of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $
\ (term, p, q) -> pqGrams p q headF (toTerm term :: Term Text Text) `shouldSatisfy` all ((== q) . length . base)
prop "produces grams with bases of the specified width" $
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively q)) . length . base . rhead)
describe "featureVector" $ do
prop "produces a vector of the specified dimension" . forAll (arbitrary `suchThat` ((> 0) . Prologue.snd)) $
\ (grams, d) -> length (featureVector d (fromList (grams :: [Gram Text]))) `shouldBe` d
describe "featureVectorDecorator" $ do
prop "produces a vector of the specified dimension" $
\ (term, p, q, d) -> featureVectorDecorator (rhead . headF) (positively p) (positively q) (positively d) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively d)) . length . rhead)
describe "rws" $ do
let compare a b = if extract a == extract b then Just (pure (Replace a b)) else Nothing
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
\ (as, bs) -> let tas = toTerm <$> as
tbs = toTerm <$> bs
diff = free (Free (pure Program :< Indexed (rws compare headF tas tbs :: [Diff Text Category]))) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree (Program :< Indexed tas)), Just (cofree (Program :< Indexed tbs)))
\ (as, bs) -> let tas = featureVectorDecorator (category . headF) 2 2 15 . toTerm <$> (as :: [ArbitraryTerm Text (Record '[Category])])
tbs = featureVectorDecorator (category . headF) 2 2 15 . toTerm <$> (bs :: [ArbitraryTerm Text (Record '[Category])])
diff = free (Free (pure (pure 0 .: Program .: RNil) :< Indexed (rws compare tas tbs))) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tas)), Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tbs)))

View File

@ -2,10 +2,12 @@
module Diff.Spec where
import Category
import Data.RandomWalkSimilarity
import Data.Record
import Data.Text.Arbitrary ()
import Diff
import Diff.Arbitrary
import Info
import Interpreter
import Prologue
import Term.Arbitrary
@ -15,23 +17,24 @@ import Test.QuickCheck
spec :: Spec
spec = parallel $ do
let toTerm' = featureVectorDecorator (category . headF) 2 2 15 . toTerm
prop "equality is reflexive" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in
diff `shouldBe` diff
prop "equal terms produce identity diffs" $
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category])) in
\ a -> let term = toTerm' (a :: ArbitraryTerm Text (Record '[Category])) in
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
describe "beforeTerm" $ do
prop "recovers the before term" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
beforeTerm diff `shouldBe` Just (toTerm a)
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in
beforeTerm diff `shouldBe` Just (toTerm' a)
describe "afterTerm" $ do
prop "recovers the after term" $
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
afterTerm diff `shouldBe` Just (toTerm b)
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in
afterTerm diff `shouldBe` Just (toTerm' b)
describe "ArbitraryDiff" $ do
prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $

View File

@ -1,25 +1,24 @@
{-# LANGUAGE DataKinds #-}
module DiffSummarySpec where
import Prologue
import Category
import Data.Functor.Both
import Data.List (partition)
import Data.RandomWalkSimilarity
import Data.Record
import Test.Hspec
import Test.Hspec.QuickCheck
import Diff
import Diff.Arbitrary
import DiffSummary
import Info
import Interpreter
import Patch
import Prologue
import Source
import Syntax
import Term
import Patch
import Category
import DiffSummary
import Text.PrettyPrint.Leijen.Text (pretty)
import Test.Hspec.QuickCheck
import Diff.Arbitrary
import Data.List (partition)
import Term.Arbitrary
import Interpreter
import Info
import Source
import Data.Functor.Both
import Test.Hspec
import Test.Hspec.QuickCheck
arrayInfo :: Record '[Category, Range]
arrayInfo = ArrayLiteral .: Range 0 3 .: RNil
@ -46,7 +45,7 @@ spec = parallel $ do
diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotation = Nothing } ]
prop "equal terms produce identity diffs" $
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in
\ a -> let term = featureVectorDecorator (category . headF) 2 2 15 (toTerm (a :: ArbitraryTerm Text (Record '[Category, Range]))) in
diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` []
describe "annotatedSummaries" $ do
@ -99,7 +98,7 @@ isIndexedOrFixed' syntax = case syntax of
isBranchInfo :: DiffInfo -> Bool
isBranchInfo info = case info of
(BranchInfo _ _ _) -> True
(LeafInfo _ _) -> False
_ -> False
isBranchNode :: Patch DiffInfo -> Bool
isBranchNode = any isBranchInfo

View File

@ -2,8 +2,10 @@
module InterpreterSpec where
import Category
import Diff
import Data.RandomWalkSimilarity
import Data.Record
import Diff
import Info
import Interpreter
import Patch
import Prologue
@ -15,16 +17,17 @@ import Test.Hspec.QuickCheck
spec :: Spec
spec = parallel $ do
describe "interpret" $ do
let decorate = featureVectorDecorator (category . headF) 2 2 15
it "returns a replacement when comparing two unicode equivalent terms" $
let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text)
termB = cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in
diffTerms (free . Free) ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
let termA = decorate . cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text)
termB = decorate . cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in
diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
prop "produces correct diffs" $
\ a b -> let diff = diffTerms (free . Free) ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category]) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b))
\ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (decorate (toTerm a)) (decorate (toTerm (b :: ArbitraryTerm Text (Record '[Category])))) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (decorate (toTerm a)), Just (decorate (toTerm b)))
prop "constructs zero-cost diffs of equal terms" $
\ a -> let term = toTerm a
diff = diffTerms (free . Free) ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category]) in
\ a -> let term = decorate (toTerm (a :: ArbitraryTerm Text (Record '[Category])))
diff = diffTerms wrap ((==) `on` extract) diffCost term term in
diffCost diff `shouldBe` 0

View File

@ -7,11 +7,10 @@ import Range
import Renderer.Patch
import Source
import Syntax
import Category
import Test.Hspec
spec :: Spec
spec = parallel $
describe "hunks" $
spec = parallel $ do
describe "hunks" $ do
it "empty diffs have empty hunks" $
hunks (free . Free $ pure (Range 0 0 .: StringLiteral .: 1 .: 0 .: RNil) :< Leaf "") (both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob)) (SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = pure 0, changes = [], trailingContext = []}]
hunks (wrap $ pure (Range 0 0 .: RNil) :< Leaf ("" :: Text)) (both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob)) (SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = pure 0, changes = [], trailingContext = []}]

View File

@ -0,0 +1,10 @@
diff --git a/test/diffs/dictionary.A.js b/test/diffs/dictionary.B.js
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644
--- a/test/diffs/dictionary.A.js
+++ b/test/diffs/dictionary.B.js
@@ -1,5 +1,5 @@
{
- "b": 4,
+ "b": 5,
"a": 5
}