mirror of
https://github.com/github/semantic.git
synced 2025-01-01 19:55:34 +03:00
Merge 'origin/master' into one-json-to-rule-them-all
This commit is contained in:
parent
59932277f9
commit
21cb7f71d7
@ -118,16 +118,17 @@ test-suite semantic-diff-test
|
||||
, deepseq
|
||||
, dlist
|
||||
, filepath
|
||||
, free
|
||||
, Glob
|
||||
, hspec >= 2.1.10
|
||||
, mtl
|
||||
, QuickCheck >= 2.8.1
|
||||
, quickcheck-text
|
||||
, recursion-schemes >= 4.1
|
||||
, semantic-diff
|
||||
, text >= 1.2.1.3
|
||||
, these
|
||||
, free
|
||||
, recursion-schemes >= 4.1
|
||||
, vector
|
||||
, wl-pprint-text
|
||||
, protolude
|
||||
if os(darwin)
|
||||
|
@ -1,15 +1,21 @@
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
module Data.RandomWalkSimilarity where
|
||||
{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-}
|
||||
module Data.RandomWalkSimilarity
|
||||
( rws
|
||||
, pqGramDecorator
|
||||
, featureVectorDecorator
|
||||
, Gram(..)
|
||||
) where
|
||||
|
||||
import Control.Applicative
|
||||
import Control.Arrow ((&&&))
|
||||
import Control.Monad.Random
|
||||
import Control.Monad.State
|
||||
import qualified Data.DList as DList
|
||||
import Data.Functor.Both hiding (fst, snd)
|
||||
import Data.Functor.Foldable as Foldable
|
||||
import Data.Hashable
|
||||
import qualified Data.KdTree.Static as KdTree
|
||||
import qualified Data.List as List
|
||||
import Data.Record
|
||||
import qualified Data.Vector as Vector
|
||||
import Patch
|
||||
import Prologue
|
||||
@ -18,25 +24,23 @@ import Test.QuickCheck hiding (Fixed)
|
||||
import Test.QuickCheck.Random
|
||||
|
||||
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
|
||||
rws :: (Hashable label, Eq annotation, Prologue.Foldable f, Functor f, Eq (f (Cofree f annotation)))
|
||||
=> (Cofree f annotation -> Cofree f annotation -> Maybe (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation)))) -- ^ A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
|
||||
-> (forall b. CofreeF f annotation b -> label) -- ^ A function to compute a label for an unpacked term.
|
||||
-> [Cofree f annotation] -- ^ The list of old terms.
|
||||
-> [Cofree f annotation] -- ^ The list of new terms.
|
||||
-> [Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))]
|
||||
rws compare getLabel as bs
|
||||
rws :: (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), HasField fields (Vector.Vector Double))
|
||||
=> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function which compares a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
|
||||
-> [Cofree f (Record fields)] -- ^ The list of old terms.
|
||||
-> [Cofree f (Record fields)] -- ^ The list of new terms.
|
||||
-> [Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))] -- ^ The resulting list of similarity-matched diffs.
|
||||
rws compare as bs
|
||||
| null as, null bs = []
|
||||
| null as = inserting <$> bs
|
||||
| null bs = deleting <$> as
|
||||
| otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas)) $ traverse findNearestNeighbourTo fbs
|
||||
where (p, q, d) = (2, 2, 15)
|
||||
fas = zipWith featurize [0..] as
|
||||
where fas = zipWith featurize [0..] as
|
||||
fbs = zipWith featurize [0..] bs
|
||||
kdas = KdTree.build (Vector.toList . feature) fas
|
||||
featurize index term = UnmappedTerm index (featureVector d (pqGrams p q getLabel term)) term
|
||||
featurize index term = UnmappedTerm index (getField (extract term)) term
|
||||
findNearestNeighbourTo kv@(UnmappedTerm _ _ v) = do
|
||||
(previous, unmapped) <- get
|
||||
let (UnmappedTerm i _ _) = KdTree.nearest kdas kv
|
||||
let UnmappedTerm i _ _ = KdTree.nearest kdas kv
|
||||
fromMaybe (pure (negate 1, inserting v)) $ do
|
||||
found <- find ((== i) . termIndex) unmapped
|
||||
guard (i >= previous)
|
||||
@ -55,38 +59,40 @@ data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature :
|
||||
data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] }
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term.
|
||||
pqGrams :: (Prologue.Foldable f, Functor f) => Int -> Int -> (forall b. CofreeF f annotation b -> label) -> Cofree f annotation -> DList.DList (Gram label)
|
||||
pqGrams p q getLabel = uncurry DList.cons . cata merge . setRootBase . setRootStem . cata go
|
||||
where go c = cofree (Gram [] [ Just (getLabel c) ] :< (assignParent (Just (getLabel c)) p <$> tailF c))
|
||||
merge (head :< tail) = let tail' = toList tail in (head, DList.fromList (windowed q setBases [] (fst <$> tail')) <> foldMap snd tail')
|
||||
assignParent parentLabel n tree
|
||||
| n > 0 = let gram :< functor = runCofree tree in cofree $ prependParent parentLabel gram :< (assignParent parentLabel (pred n) <$> functor)
|
||||
| otherwise = tree
|
||||
prependParent parentLabel gram = gram { stem = parentLabel : stem gram }
|
||||
setBases gram siblings rest = setBase gram (siblings >>= base) : rest
|
||||
setBase gram newBase = gram { base = take q (newBase <> repeat Nothing) }
|
||||
setRootBase term = let (a :< f) = runCofree term in cofree (setBase a (base a) :< f)
|
||||
setRootStem = foldr (\ p rest -> assignParent Nothing p . rest) identity [0..p]
|
||||
-- | Annotates a term with the corresponding p,q-gram at each node.
|
||||
pqGramDecorator :: Traversable f
|
||||
=> (forall b. CofreeF f (Record fields) b -> label) -- ^ A function computing the label from an arbitrary unpacked term. This function can use the annotation and functor’s constructor, but not any recursive values inside the functor (since they’re held parametric in 'b').
|
||||
-> Int -- ^ 'p'; the desired stem length for the grams.
|
||||
-> Int -- ^ 'q'; the desired base length for the grams.
|
||||
-> Cofree f (Record fields) -- ^ The term to decorate.
|
||||
-> Cofree f (Record (Gram label ': fields)) -- ^ The decorated term.
|
||||
pqGramDecorator getLabel p q = cata algebra
|
||||
where algebra term = let label = getLabel term in
|
||||
cofree ((gram label .: headF term) :< assignParentAndSiblingLabels (tailF term) label)
|
||||
gram label = Gram (padToSize p []) (padToSize q (pure (Just label)))
|
||||
assignParentAndSiblingLabels functor label = (`evalState` (siblingLabels functor)) (for functor (assignLabels label))
|
||||
assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields)))
|
||||
assignLabels label a = case runCofree a of
|
||||
RCons gram rest :< functor -> do
|
||||
labels <- get
|
||||
put (drop 1 labels)
|
||||
pure $! cofree ((gram { stem = padToSize p (Just label : stem gram), base = padToSize q labels } .: rest) :< functor)
|
||||
siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label]
|
||||
siblingLabels = foldMap (base . rhead . extract)
|
||||
padToSize n list = take n (list <> repeat empty)
|
||||
|
||||
-- | A sliding-window fold over _n_ items of a list per iteration.
|
||||
windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b
|
||||
windowed n f seed = para alg
|
||||
where alg xs = case xs of
|
||||
Cons a (as, b) -> f a (take n $ a : as) b
|
||||
Nil -> seed
|
||||
-- | Computes a unit vector of the specified dimension from a hash.
|
||||
unitVector :: Int -> Int -> Vector.Vector Double
|
||||
unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom)))
|
||||
where normalize vec = fmap (/ vmagnitude vec) vec
|
||||
vmagnitude = sqrtDouble . Vector.sum . fmap (** 2)
|
||||
|
||||
|
||||
-- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree.
|
||||
featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vector Double
|
||||
featureVector d bag = sumVectors $ unitDVector . hash <$> bag
|
||||
where unitDVector hash = normalize . (`evalRand` mkQCGen hash) $ Prologue.sequence (Vector.replicate d getRandom)
|
||||
normalize vec = fmap (/ vmagnitude vec) vec
|
||||
sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0)
|
||||
|
||||
-- | The magnitude of a Euclidean vector, i.e. its distance from the origin.
|
||||
vmagnitude :: Vector.Vector Double -> Double
|
||||
vmagnitude = sqrtDouble . Vector.sum . fmap (** 2)
|
||||
-- | Annotates a term with a feature vector at each node.
|
||||
featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields))
|
||||
featureVectorDecorator getLabel p q d
|
||||
= cata (\ (RCons gram rest :< functor) ->
|
||||
cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor))
|
||||
. pqGramDecorator getLabel p q
|
||||
|
||||
|
||||
-- Instances
|
||||
|
@ -18,6 +18,10 @@ infixr 0 .:
|
||||
(.:) :: h -> Record t -> Record (h ': t)
|
||||
(.:) = RCons
|
||||
|
||||
-- | Get the first element of a non-empty record.
|
||||
rhead :: Record (head ': tail) -> head
|
||||
rhead (RCons head _) = head
|
||||
|
||||
|
||||
-- Classes
|
||||
|
||||
|
@ -1,15 +1,18 @@
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-}
|
||||
module Diffing where
|
||||
|
||||
import qualified Prologue
|
||||
import Prologue hiding (fst, snd)
|
||||
import qualified Data.ByteString.Char8 as B1
|
||||
import Data.Functor.Both
|
||||
import Data.Functor.Foldable
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import qualified Data.Text.IO as TextIO
|
||||
import qualified Data.Text.ICU.Detect as Detect
|
||||
import qualified Data.Text.ICU.Convert as Convert
|
||||
import Data.These
|
||||
import qualified Data.Vector as Vector
|
||||
import Diff
|
||||
import Info
|
||||
import Interpreter
|
||||
@ -38,18 +41,16 @@ import Data.Aeson.Encoding (encodingToLazyByteString)
|
||||
-- | result.
|
||||
-- | Returns the rendered result strictly, so it's always fully evaluated
|
||||
-- | with respect to other IO actions.
|
||||
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser fields -> Renderer (Record fields) -> Both SourceBlob -> IO Output
|
||||
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Output
|
||||
diffFiles parser renderer sourceBlobs = do
|
||||
let sources = source <$> sourceBlobs
|
||||
terms <- sequence $ parser <$> sourceBlobs
|
||||
terms <- traverse (fmap (featureVectorDecorator getLabel p q d) . parser) sourceBlobs
|
||||
|
||||
let replaceLeaves = breakDownLeavesByWord <$> sources
|
||||
let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs
|
||||
let textDiff = case areNullOids of
|
||||
(True, False) -> pure $ Insert (snd terms)
|
||||
(False, True) -> pure $ Delete (fst terms)
|
||||
(_, _) ->
|
||||
runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) (replaceLeaves <*> terms)
|
||||
runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) terms
|
||||
|
||||
pure $! renderer sourceBlobs textDiff
|
||||
|
||||
@ -58,10 +59,13 @@ diffFiles parser renderer sourceBlobs = do
|
||||
getCost diff = case runFree diff of
|
||||
Free (info :< _) -> cost <$> info
|
||||
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
|
||||
|
||||
getLabel (h :< t) = (category h, case t of
|
||||
Leaf s -> Just s
|
||||
_ -> Nothing)
|
||||
(p, q, d) = (2, 2, 15)
|
||||
|
||||
-- | Return a parser based on the file extension (including the ".").
|
||||
parserForType :: Text -> Parser '[Range, Category, Cost]
|
||||
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category])
|
||||
parserForType mediaType = case languageForType mediaType of
|
||||
Just C -> treeSitterParser C ts_language_c
|
||||
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
|
||||
@ -69,28 +73,29 @@ parserForType mediaType = case languageForType mediaType of
|
||||
_ -> lineByLineParser
|
||||
|
||||
-- | A fallback parser that treats a file simply as rows of strings.
|
||||
lineByLineParser :: Parser '[Range, Category, Cost]
|
||||
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category])
|
||||
lineByLineParser blob = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
|
||||
(leaves, _) -> cofree <$> leaves
|
||||
where
|
||||
input = source blob
|
||||
lines = actualLines input
|
||||
root children = let cost = 1 + fromIntegral (length children) in
|
||||
(Range 0 (length input) .: Other "program" .: cost .: RNil) :< Indexed children
|
||||
leaf charIndex line = (Range charIndex (charIndex + T.length line) .: Other "program" .: 1 .: RNil) :< Leaf line
|
||||
root children = ((Range 0 $ length input) .: Program .: RNil) :< Indexed children
|
||||
leaf charIndex line = ((Range charIndex $ charIndex + T.length line) .: Program .: RNil) :< Leaf line
|
||||
annotateLeaves (accum, charIndex) line =
|
||||
(accum <> [ leaf charIndex (toText line) ] , charIndex + length line)
|
||||
toText = T.pack . Source.toString
|
||||
|
||||
-- | Return the parser that should be used for a given path.
|
||||
parserForFilepath :: FilePath -> Parser '[Range, Category, Cost]
|
||||
parserForFilepath = parserForType . toS . takeExtension
|
||||
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category])
|
||||
parserForFilepath path blob = decorateTerm termCostDecorator <$> do
|
||||
parsed <- parserForType (toS (takeExtension path)) blob
|
||||
pure $! breakDownLeavesByWord (source blob) parsed
|
||||
|
||||
-- | Replace every string leaf with leaves of the words in the string.
|
||||
breakDownLeavesByWord :: (HasField fields Category, HasField fields Cost, HasField fields Range) => Source Char -> Term Text (Record fields) -> Term Text (Record fields)
|
||||
breakDownLeavesByWord :: (HasField fields Category, HasField fields Range) => Source Char -> Term Text (Record fields) -> Term Text (Record fields)
|
||||
breakDownLeavesByWord source = cata replaceIn
|
||||
where
|
||||
replaceIn (info :< syntax) = let cost' = 1 + sum (cost . extract <$> syntax') in cofree $ setCost info cost' :< syntax'
|
||||
replaceIn (info :< syntax) = cofree $ info :< syntax'
|
||||
where syntax' = case (ranges, syntax) of
|
||||
(_:_:_, Leaf _) | category info /= Regex -> Indexed (makeLeaf info <$> ranges)
|
||||
_ -> syntax
|
||||
@ -111,6 +116,18 @@ readAndTranscodeFile path = do
|
||||
text <- B1.readFile path
|
||||
transcode text
|
||||
|
||||
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
|
||||
type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field
|
||||
|
||||
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
|
||||
decorateTerm :: Functor f => TermDecorator f fields field -> Cofree f (Record fields) -> Cofree f (Record (field ': fields))
|
||||
decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c)
|
||||
|
||||
-- | Term decorator computing the cost of an unpacked term.
|
||||
termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost
|
||||
termCostDecorator c = 1 + sum (cost <$> tailF c)
|
||||
|
||||
-- | Determine whether two terms are comparable based on the equality of their categories.
|
||||
compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Term leaf (Record fields) -> Bool
|
||||
compareCategoryEq = (==) `on` category . extract
|
||||
|
||||
@ -121,14 +138,12 @@ diffCostWithCachedTermCosts diff = unCost $ case runFree diff of
|
||||
Pure patch -> sum (cost . extract <$> patch)
|
||||
|
||||
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
|
||||
textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser fields -> DiffArguments -> Both SourceBlob -> IO Output
|
||||
textDiff parser arguments = diffFiles parser renderer
|
||||
where
|
||||
renderer = case format arguments of
|
||||
Split -> split
|
||||
Patch -> patch
|
||||
JSON -> json
|
||||
Summary -> summary
|
||||
textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Output
|
||||
textDiff parser arguments = diffFiles parser $ case format arguments of
|
||||
Split -> split
|
||||
Patch -> patch
|
||||
JSON -> json
|
||||
Summary -> summary
|
||||
|
||||
-- | Returns a truncated diff given diff arguments and two source blobs.
|
||||
truncatedDiff :: DiffArguments -> Both SourceBlob -> IO Output
|
||||
@ -139,7 +154,7 @@ truncatedDiff arguments sources = pure $ case format arguments of
|
||||
Summary -> SummaryOutput mempty
|
||||
|
||||
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
|
||||
printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser fields -> DiffArguments -> Both SourceBlob -> IO ()
|
||||
printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO ()
|
||||
printDiff parser arguments sources = do
|
||||
rendered <- textDiff parser arguments sources
|
||||
let renderedText = case rendered of
|
||||
|
@ -5,10 +5,10 @@ import Algorithm
|
||||
import Data.Align.Generic
|
||||
import Data.Functor.Foldable
|
||||
import Data.Functor.Both
|
||||
import Data.Hashable
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import Data.These
|
||||
import qualified Data.Vector as Vector
|
||||
import Diff
|
||||
import qualified Control.Monad.Free.Church as F
|
||||
import Info
|
||||
@ -25,7 +25,7 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation -
|
||||
type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation
|
||||
|
||||
-- | Diff two terms recursively, given functions characterizing the diffing.
|
||||
diffTerms :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category)
|
||||
diffTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double))
|
||||
=> DiffConstructor leaf (Record fields) -- ^ A function to wrap up & possibly annotate every produced diff.
|
||||
-> Comparable leaf (Record fields) -- ^ A function to determine whether or not two terms should even be compared.
|
||||
-> SES.Cost (Diff leaf (Record fields)) -- ^ A function to compute the cost of a given diff node.
|
||||
@ -35,15 +35,12 @@ diffTerms :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Catego
|
||||
diffTerms construct comparable cost a b = fromMaybe (replacing a b) $ diffComparableTerms construct comparable cost a b
|
||||
|
||||
-- | Diff two terms recursively, given functions characterizing the diffing. If the terms are incomparable, returns 'Nothing'.
|
||||
diffComparableTerms :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
|
||||
diffComparableTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields))
|
||||
diffComparableTerms construct comparable cost = recur
|
||||
where recur a b
|
||||
| (category <$> a) == (category <$> b) = hylo construct runCofree <$> zipTerms a b
|
||||
| comparable a b = runAlgorithm construct recur cost getLabel (Just <$> algorithmWithTerms construct a b)
|
||||
| comparable a b = runAlgorithm construct recur cost (Just <$> algorithmWithTerms construct a b)
|
||||
| otherwise = Nothing
|
||||
getLabel (h :< t) = (category h, case t of
|
||||
Leaf s -> Just s
|
||||
_ -> Nothing)
|
||||
|
||||
-- | Construct an algorithm to diff a pair of terms.
|
||||
algorithmWithTerms :: (TermF leaf (Both a) diff -> diff) -> Term leaf a -> Term leaf a -> Algorithm (Term leaf a) diff diff
|
||||
@ -74,16 +71,15 @@ algorithmWithTerms construct t1 t2 = case (unwrap t1, unwrap t2) of
|
||||
byIndex constructor a b = Algorithm.byIndex a b >>= annotate . constructor
|
||||
|
||||
-- | Run an algorithm, given functions characterizing the evaluation.
|
||||
runAlgorithm :: (Functor f, GAlign f, Eq a, Eq annotation, Eq (f (Cofree f annotation)), Prologue.Foldable f, Traversable f, Hashable label)
|
||||
=> (CofreeF f (Both annotation) (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) -> Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) -- ^ A function to wrap up & possibly annotate every produced diff.
|
||||
-> (Cofree f annotation -> Cofree f annotation -> Maybe (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation)))) -- ^ A function to diff two subterms recursively, if they are comparable, or else return 'Nothing'.
|
||||
-> SES.Cost (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) -- ^ A function to compute the cost of a given diff node.
|
||||
-> (forall b. CofreeF f annotation b -> label) -- ^ A function to compute a label for a given term.
|
||||
-> Algorithm (Cofree f annotation) (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))) a -- ^ The algorithm to run.
|
||||
runAlgorithm :: (Functor f, GAlign f, Eq a, Eq (Record fields), Eq (f (Cofree f (Record fields))), Prologue.Foldable f, Traversable f, HasField fields (Vector.Vector Double))
|
||||
=> (CofreeF f (Both (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to wrap up & possibly annotate every produced diff.
|
||||
-> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function to diff two subterms recursively, if they are comparable, or else return 'Nothing'.
|
||||
-> SES.Cost (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to compute the cost of a given diff node.
|
||||
-> Algorithm (Cofree f (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) a -- ^ The algorithm to run.
|
||||
-> a
|
||||
runAlgorithm construct recur cost getLabel = F.iter $ \case
|
||||
runAlgorithm construct recur cost = F.iter $ \case
|
||||
Recursive a b f -> f (maybe (replacing a b) (construct . (both (extract a) (extract b) :<)) $ do
|
||||
aligned <- galign (unwrap a) (unwrap b)
|
||||
traverse (these (Just . deleting) (Just . inserting) recur) aligned)
|
||||
ByIndex as bs f -> f (ses recur cost as bs)
|
||||
BySimilarity as bs f -> f (rws recur getLabel as bs)
|
||||
BySimilarity as bs f -> f (rws recur as bs)
|
||||
|
@ -12,10 +12,10 @@ import qualified Data.Set as Set
|
||||
import Source
|
||||
import SourceSpan
|
||||
|
||||
-- | A function that takes a source file and returns an annotated AST.
|
||||
-- | A function that takes a source blob and returns an annotated AST.
|
||||
-- | The return is in the IO monad because some of the parsers are written in C
|
||||
-- | and aren't pure.
|
||||
type Parser fields = SourceBlob -> IO (Term Text (Record fields))
|
||||
type Parser f a = SourceBlob -> IO (Cofree f a)
|
||||
|
||||
-- | Whether a category is an Operator Category
|
||||
isOperator :: Category -> Bool
|
||||
@ -141,4 +141,4 @@ methodDefinitions :: HasField fields Category => Term Text (Record fields) -> [T
|
||||
methodDefinitions definitions |
|
||||
Other "class_body" == category (extract definitions),
|
||||
S.Indexed definitions' <- unwrap definitions = definitions'
|
||||
methodDefinitions _ = mempty
|
||||
methodDefinitions _ = mempty
|
||||
|
@ -4,10 +4,11 @@ module TreeSitter where
|
||||
import Prologue hiding (Constructor)
|
||||
import Data.Record
|
||||
import Category
|
||||
import Info
|
||||
import Language
|
||||
import Parser
|
||||
import Range
|
||||
import Source
|
||||
import qualified Syntax
|
||||
import Foreign
|
||||
import Foreign.C.String
|
||||
import Text.Parser.TreeSitter hiding (Language(..))
|
||||
@ -15,7 +16,7 @@ import qualified Text.Parser.TreeSitter as TS
|
||||
import SourceSpan
|
||||
|
||||
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
|
||||
treeSitterParser :: Language -> Ptr TS.Language -> Parser '[Range, Category, Cost]
|
||||
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category])
|
||||
treeSitterParser language grammar blob = do
|
||||
document <- ts_document_make
|
||||
ts_document_set_language document grammar
|
||||
@ -94,7 +95,7 @@ defaultCategoryForNodeName name = case name of
|
||||
_ -> Other name
|
||||
|
||||
-- | Return a parser for a tree sitter language & document.
|
||||
documentToTerm :: Language -> Ptr Document -> Parser '[Range, Category, Cost]
|
||||
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category])
|
||||
documentToTerm language document blob = alloca $ \ root -> do
|
||||
ts_document_root_node_p document root
|
||||
toTerm root
|
||||
@ -110,8 +111,7 @@ documentToTerm language document blob = alloca $ \ root -> do
|
||||
, spanStart = SourcePos (fromIntegral $ ts_node_p_start_point_row node) (fromIntegral $ ts_node_p_start_point_column node)
|
||||
, spanEnd = SourcePos (fromIntegral $ ts_node_p_end_point_row node) (fromIntegral $ ts_node_p_end_point_column node) }
|
||||
|
||||
let cost' = 1 + sum (cost . extract <$> children)
|
||||
let info = range .: (categoriesForLanguage language (toS name)) .: cost' .: RNil
|
||||
let info = range .: (categoriesForLanguage language (toS name)) .: RNil
|
||||
pure $! termConstructor (source blob) sourceSpan info children
|
||||
getChild node n out = do
|
||||
_ <- ts_node_p_named_child node n out
|
||||
|
@ -1,31 +1,31 @@
|
||||
{-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-}
|
||||
module CorpusSpec where
|
||||
|
||||
import Unsafe (unsafeFromJust)
|
||||
import Diffing
|
||||
import Renderer
|
||||
import qualified Renderer.JSON as J
|
||||
import qualified Renderer.Patch as P
|
||||
import qualified Renderer.Split as Split
|
||||
|
||||
import Category
|
||||
import Control.DeepSeq
|
||||
import Data.Functor.Both
|
||||
import Data.Record
|
||||
import Data.List (union)
|
||||
import Data.Record
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Vector as Vector
|
||||
import Diffing
|
||||
import GHC.Show (Show(..))
|
||||
import Info
|
||||
import Prologue hiding (fst, snd, lookup)
|
||||
import Renderer
|
||||
import qualified Renderer.JSON as J
|
||||
import qualified Renderer.Patch as P
|
||||
import qualified Renderer.Split as Split
|
||||
import qualified Source as S
|
||||
import System.FilePath
|
||||
import System.FilePath.Glob
|
||||
import Test.Hspec
|
||||
import GHC.Show (Show(..))
|
||||
import Unsafe (unsafeFromJust)
|
||||
|
||||
spec :: Spec
|
||||
spec = parallel $ do
|
||||
describe "crashers crash" $ runTestsIn "test/crashers-todo/" (\ a b ->
|
||||
a `deepseq` pure (a == b) `shouldThrow` anyException)
|
||||
describe "crashers crash" . runTestsIn "test/crashers-todo/" $ \ a b ->
|
||||
a `deepseq` pure (a == b) `shouldThrow` anyException
|
||||
describe "crashers should not crash" $ runTestsIn "test/crashers/" shouldBe
|
||||
describe "todos are incorrect" $ runTestsIn "test/diffs-todo/" shouldNotBe
|
||||
describe "should produce the correct diff" $ runTestsIn "test/diffs/" shouldBe
|
||||
@ -46,6 +46,7 @@ spec = parallel $ do
|
||||
correctTests paths = filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths
|
||||
testsForPaths (aPath, bPath, json, patch, split) = [ ("json", J.json, paths, json), ("patch", P.patch, paths, patch), ("split", Split.split, paths, split) ]
|
||||
where paths = both aPath bPath
|
||||
|
||||
-- | Return all the examples from the given directory. Examples are expected to
|
||||
-- | have the form "foo.A.js", "foo.B.js", "foo.patch.js". Diffs are not
|
||||
-- | required as the test may be verifying that the inputs don't crash.
|
||||
@ -73,7 +74,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte
|
||||
-- | Given file paths for A, B, and, optionally, a diff, return whether diffing
|
||||
-- | the files will produce the diff. If no diff is provided, then the result
|
||||
-- | is true, but the diff will still be calculated.
|
||||
testDiff :: Renderer (Record '[Range, Category, Cost]) -> Both (Maybe FilePath) -> Maybe FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> Expectation
|
||||
testDiff :: Renderer (Record '[Vector.Vector Double, Cost, Range, Category]) -> Both (Maybe FilePath) -> Maybe FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> Expectation
|
||||
testDiff renderer paths diff matcher = do
|
||||
sources <- traverse (traverse readAndTranscodeFile) paths
|
||||
actual <- fmap Verbatim <$> traverse ((pure . concatOutputs . pure) <=< diffFiles' sources) parser
|
||||
|
@ -1,9 +1,10 @@
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
module Data.RandomWalkSimilarity.Spec where
|
||||
|
||||
import Category
|
||||
import Data.DList as DList hiding (toList)
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import Diff
|
||||
import Info
|
||||
import Patch
|
||||
import Prologue
|
||||
import Syntax
|
||||
@ -15,21 +16,22 @@ import Test.QuickCheck
|
||||
|
||||
spec :: Spec
|
||||
spec = parallel $ do
|
||||
describe "pqGrams" $ do
|
||||
prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $
|
||||
\ (term, p, q) -> pqGrams p q headF (toTerm term :: Term Text Text) `shouldSatisfy` all ((== p) . length . stem)
|
||||
let positively = succ . abs
|
||||
describe "pqGramDecorator" $ do
|
||||
prop "produces grams with stems of the specified length" $
|
||||
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively p)) . length . stem . rhead)
|
||||
|
||||
prop "produces grams with bases of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $
|
||||
\ (term, p, q) -> pqGrams p q headF (toTerm term :: Term Text Text) `shouldSatisfy` all ((== q) . length . base)
|
||||
prop "produces grams with bases of the specified width" $
|
||||
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively q)) . length . base . rhead)
|
||||
|
||||
describe "featureVector" $ do
|
||||
prop "produces a vector of the specified dimension" . forAll (arbitrary `suchThat` ((> 0) . Prologue.snd)) $
|
||||
\ (grams, d) -> length (featureVector d (fromList (grams :: [Gram Text]))) `shouldBe` d
|
||||
describe "featureVectorDecorator" $ do
|
||||
prop "produces a vector of the specified dimension" $
|
||||
\ (term, p, q, d) -> featureVectorDecorator (rhead . headF) (positively p) (positively q) (positively d) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively d)) . length . rhead)
|
||||
|
||||
describe "rws" $ do
|
||||
let compare a b = if extract a == extract b then Just (pure (Replace a b)) else Nothing
|
||||
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
|
||||
\ (as, bs) -> let tas = toTerm <$> as
|
||||
tbs = toTerm <$> bs
|
||||
diff = free (Free (pure Program :< Indexed (rws compare headF tas tbs :: [Diff Text Category]))) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree (Program :< Indexed tas)), Just (cofree (Program :< Indexed tbs)))
|
||||
\ (as, bs) -> let tas = featureVectorDecorator (category . headF) 2 2 15 . toTerm <$> (as :: [ArbitraryTerm Text (Record '[Category])])
|
||||
tbs = featureVectorDecorator (category . headF) 2 2 15 . toTerm <$> (bs :: [ArbitraryTerm Text (Record '[Category])])
|
||||
diff = free (Free (pure (pure 0 .: Program .: RNil) :< Indexed (rws compare tas tbs))) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tas)), Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tbs)))
|
||||
|
@ -2,10 +2,12 @@
|
||||
module Diff.Spec where
|
||||
|
||||
import Category
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import Data.Text.Arbitrary ()
|
||||
import Diff
|
||||
import Diff.Arbitrary
|
||||
import Info
|
||||
import Interpreter
|
||||
import Prologue
|
||||
import Term.Arbitrary
|
||||
@ -15,23 +17,24 @@ import Test.QuickCheck
|
||||
|
||||
spec :: Spec
|
||||
spec = parallel $ do
|
||||
let toTerm' = featureVectorDecorator (category . headF) 2 2 15 . toTerm
|
||||
prop "equality is reflexive" $
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
diff `shouldBe` diff
|
||||
|
||||
prop "equal terms produce identity diffs" $
|
||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category])) in
|
||||
\ a -> let term = toTerm' (a :: ArbitraryTerm Text (Record '[Category])) in
|
||||
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
|
||||
|
||||
describe "beforeTerm" $ do
|
||||
prop "recovers the before term" $
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
beforeTerm diff `shouldBe` Just (toTerm a)
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
beforeTerm diff `shouldBe` Just (toTerm' a)
|
||||
|
||||
describe "afterTerm" $ do
|
||||
prop "recovers the after term" $
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
afterTerm diff `shouldBe` Just (toTerm b)
|
||||
\ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in
|
||||
afterTerm diff `shouldBe` Just (toTerm' b)
|
||||
|
||||
describe "ArbitraryDiff" $ do
|
||||
prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $
|
||||
|
@ -1,25 +1,24 @@
|
||||
{-# LANGUAGE DataKinds #-}
|
||||
module DiffSummarySpec where
|
||||
|
||||
import Prologue
|
||||
import Category
|
||||
import Data.Functor.Both
|
||||
import Data.List (partition)
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import Test.Hspec
|
||||
import Test.Hspec.QuickCheck
|
||||
import Diff
|
||||
import Diff.Arbitrary
|
||||
import DiffSummary
|
||||
import Info
|
||||
import Interpreter
|
||||
import Patch
|
||||
import Prologue
|
||||
import Source
|
||||
import Syntax
|
||||
import Term
|
||||
import Patch
|
||||
import Category
|
||||
import DiffSummary
|
||||
import Text.PrettyPrint.Leijen.Text (pretty)
|
||||
import Test.Hspec.QuickCheck
|
||||
import Diff.Arbitrary
|
||||
import Data.List (partition)
|
||||
import Term.Arbitrary
|
||||
import Interpreter
|
||||
import Info
|
||||
import Source
|
||||
import Data.Functor.Both
|
||||
import Test.Hspec
|
||||
import Test.Hspec.QuickCheck
|
||||
|
||||
arrayInfo :: Record '[Category, Range]
|
||||
arrayInfo = ArrayLiteral .: Range 0 3 .: RNil
|
||||
@ -46,7 +45,7 @@ spec = parallel $ do
|
||||
diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotation = Nothing } ]
|
||||
|
||||
prop "equal terms produce identity diffs" $
|
||||
\ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in
|
||||
\ a -> let term = featureVectorDecorator (category . headF) 2 2 15 (toTerm (a :: ArbitraryTerm Text (Record '[Category, Range]))) in
|
||||
diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` []
|
||||
|
||||
describe "annotatedSummaries" $ do
|
||||
@ -99,7 +98,7 @@ isIndexedOrFixed' syntax = case syntax of
|
||||
isBranchInfo :: DiffInfo -> Bool
|
||||
isBranchInfo info = case info of
|
||||
(BranchInfo _ _ _) -> True
|
||||
(LeafInfo _ _) -> False
|
||||
_ -> False
|
||||
|
||||
isBranchNode :: Patch DiffInfo -> Bool
|
||||
isBranchNode = any isBranchInfo
|
||||
|
@ -2,8 +2,10 @@
|
||||
module InterpreterSpec where
|
||||
|
||||
import Category
|
||||
import Diff
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import Diff
|
||||
import Info
|
||||
import Interpreter
|
||||
import Patch
|
||||
import Prologue
|
||||
@ -15,16 +17,17 @@ import Test.Hspec.QuickCheck
|
||||
spec :: Spec
|
||||
spec = parallel $ do
|
||||
describe "interpret" $ do
|
||||
let decorate = featureVectorDecorator (category . headF) 2 2 15
|
||||
it "returns a replacement when comparing two unicode equivalent terms" $
|
||||
let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text)
|
||||
termB = cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in
|
||||
diffTerms (free . Free) ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
|
||||
let termA = decorate . cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text)
|
||||
termB = decorate . cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in
|
||||
diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB))
|
||||
|
||||
prop "produces correct diffs" $
|
||||
\ a b -> let diff = diffTerms (free . Free) ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category]) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b))
|
||||
\ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (decorate (toTerm a)) (decorate (toTerm (b :: ArbitraryTerm Text (Record '[Category])))) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (decorate (toTerm a)), Just (decorate (toTerm b)))
|
||||
|
||||
prop "constructs zero-cost diffs of equal terms" $
|
||||
\ a -> let term = toTerm a
|
||||
diff = diffTerms (free . Free) ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category]) in
|
||||
\ a -> let term = decorate (toTerm (a :: ArbitraryTerm Text (Record '[Category])))
|
||||
diff = diffTerms wrap ((==) `on` extract) diffCost term term in
|
||||
diffCost diff `shouldBe` 0
|
||||
|
@ -7,11 +7,10 @@ import Range
|
||||
import Renderer.Patch
|
||||
import Source
|
||||
import Syntax
|
||||
import Category
|
||||
import Test.Hspec
|
||||
|
||||
spec :: Spec
|
||||
spec = parallel $
|
||||
describe "hunks" $
|
||||
spec = parallel $ do
|
||||
describe "hunks" $ do
|
||||
it "empty diffs have empty hunks" $
|
||||
hunks (free . Free $ pure (Range 0 0 .: StringLiteral .: 1 .: 0 .: RNil) :< Leaf "") (both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob)) (SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = pure 0, changes = [], trailingContext = []}]
|
||||
hunks (wrap $ pure (Range 0 0 .: RNil) :< Leaf ("" :: Text)) (both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob)) (SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = pure 0, changes = [], trailingContext = []}]
|
||||
|
10
test/diffs/dictionary.patch.js
Normal file
10
test/diffs/dictionary.patch.js
Normal file
@ -0,0 +1,10 @@
|
||||
diff --git a/test/diffs/dictionary.A.js b/test/diffs/dictionary.B.js
|
||||
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644
|
||||
--- a/test/diffs/dictionary.A.js
|
||||
+++ b/test/diffs/dictionary.B.js
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
- "b": 4,
|
||||
+ "b": 5,
|
||||
"a": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user