diff --git a/src/Alignment.hs b/src/Alignment.hs index 00866ea3a..1513ec0d2 100644 --- a/src/Alignment.hs +++ b/src/Alignment.hs @@ -63,7 +63,7 @@ alignPatch sources patch = case patch of alignSyntax :: (Applicative f, HasField fields Range) => (forall a. f a -> Join These a) -> (CofreeF (Syntax leaf) (Record fields) term -> term) -> (term -> Range) -> f (Source Char) -> CofreeF (Syntax leaf) (f (Record fields)) [Join These term] -> [Join These term] alignSyntax toJoinThese toNode getRange sources (infos :< syntax) = catMaybes $ case syntax of Leaf s -> wrapInBranch (const (Leaf s)) <$> alignBranch getRange [] bothRanges - Comment a -> wrapInBranch (const (Comment a)) <$> alignBranch getRange [] bothRanges + Syntax.Comment a -> wrapInBranch (const (Syntax.Comment a)) <$> alignBranch getRange [] bothRanges Fixed children -> wrapInBranch Fixed <$> alignBranch getRange (join children) bothRanges _ -> wrapInBranch Indexed <$> alignBranch getRange (join (toList syntax)) bothRanges where bothRanges = modifyJoin (fromThese [] []) lineRanges diff --git a/src/Category.hs b/src/Category.hs index b82d99d42..1f471e488 100644 --- a/src/Category.hs +++ b/src/Category.hs @@ -14,6 +14,8 @@ data Category | Error -- | A boolean expression. | Boolean + -- | A bitwise operator. + | BitwiseOperator -- | An operator with 2 operands. | BinaryOperator -- | A literal key-value data structure. @@ -76,6 +78,8 @@ data Category | Case -- | An expression with an operator. | Operator + -- | An comma operator expression + | CommaOperator -- | An object/dictionary/hash literal. | Object -- | A throw statement. @@ -92,8 +96,12 @@ data Category | Class -- | A class method declaration. | Method + -- | A comment. + | Comment -- | A non-standard category, which can be used for comparability. | Other Text + -- | A relational operator (e.g. < or >=) + | RelationalOperator deriving (Eq, Generic, Ord, Show) -- Instances diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index dc164b72a..e3455f009 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -1,8 +1,13 @@ -{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-} +{-# LANGUAGE DataKinds, GADTs, RankNTypes, ScopedTypeVariables, TypeOperators #-} module Data.RandomWalkSimilarity ( rws , pqGramDecorator +, defaultFeatureVectorDecorator , featureVectorDecorator +, editDistanceUpTo +, defaultD +, defaultP +, defaultQ , stripDiff , stripTerm , Gram(..) @@ -15,18 +20,19 @@ import Control.Monad.State import Data.Functor.Both hiding (fst, snd) import Data.Functor.Foldable as Foldable import Data.Hashable +import qualified Data.IntMap as IntMap import qualified Data.KdTree.Static as KdTree import qualified Data.List as List import Data.Record import qualified Data.Vector as Vector import Patch import Prologue -import Term () +import Term (termSize) import Test.QuickCheck hiding (Fixed) import Test.QuickCheck.Random -- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf). -rws :: (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), HasField fields (Vector.Vector Double)) +rws :: forall f fields. (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), HasField fields (Vector.Vector Double)) => (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function which compares a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. -> [Cofree f (Record fields)] -- ^ The list of old terms. -> [Cofree f (Record fields)] -- ^ The list of new terms. @@ -35,29 +41,64 @@ rws compare as bs | null as, null bs = [] | null as = inserting <$> bs | null bs = deleting <$> as - | otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas, fbs)) $ traverse findNearestNeighbourTo fbs + | otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, toMap fas, toMap fbs)) $ traverse findNearestNeighbourTo fbs where fas = zipWith featurize [0..] as fbs = zipWith featurize [0..] bs kdas = KdTree.build (Vector.toList . feature) fas kdbs = KdTree.build (Vector.toList . feature) fbs featurize index term = UnmappedTerm index (getField (extract term)) term + toMap = IntMap.fromList . fmap (termIndex &&& identity) + -- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches. + findNearestNeighbourTo :: UnmappedTerm (Cofree f (Record fields)) -> State (Int, IntMap (UnmappedTerm (Cofree f (Record fields))), IntMap (UnmappedTerm (Cofree f (Record fields)))) (Int, Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) findNearestNeighbourTo kv@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB kv) $ do - foundA@(UnmappedTerm i _ a) <- nearestUnmapped unmappedA kdas kv - foundB@(UnmappedTerm j' _ _) <- nearestUnmapped unmappedB kdbs foundA + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (IntMap.filterWithKey (\ k _ -> isInMoveBounds previous k) unmappedA) kdas kv + UnmappedTerm j' _ _ <- nearestUnmapped unmappedB kdbs foundA guard (j == j') - guard (i >= previous) compared <- compare a b pure $! do - put (i, List.delete foundA unmappedA, List.delete foundB unmappedB) + put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) pure (i, compared) - nearestUnmapped unmapped tree key = find ((== termIndex (KdTree.nearest tree key)) . termIndex) unmapped - insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do - put (previous, unmappedA, List.delete kv unmappedB) + + -- | Determines whether an index is in-bounds for a move given the most recently matched index. + isInMoveBounds previous i = previous <= i && i <= previous + defaultMoveBound + + -- | Finds the most-similar unmapped term to the passed-in term, if any. + -- + -- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance. + -- + -- cf §4.2 of RWS-Diff + nearestUnmapped + :: IntMap (UnmappedTerm (Cofree f (Record fields))) -- ^ A set of terms eligible for matching against. + -> KdTree.KdTree Double (UnmappedTerm (Cofree f (Record fields))) -- ^ The k-d tree to look up nearest neighbours within. + -> UnmappedTerm (Cofree f (Record fields)) -- ^ The term to find the nearest neighbour to. + -> Maybe (UnmappedTerm (Cofree f (Record fields))) -- ^ The most similar unmapped term, if any. + nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (editDistanceUpTo defaultM) . compare (term key) . term) (toList (IntMap.intersection unmapped (toMap (KdTree.kNearest tree defaultL key))))) + + insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do + put (previous, unmappedA, IntMap.delete j unmappedB) pure (negate 1, inserting b) + deleteRemaining diffs (_, unmappedA, _) = foldl' (flip (List.insertBy (comparing fst))) diffs ((termIndex &&& deleting . term) <$> unmappedA) +-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost. +editDistanceUpTo :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int +editDistanceUpTo m = diffSum (patchSum termSize) . cutoff m + where diffSum patchCost diff = sum $ fmap (maybe 0 patchCost) diff + +defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int +defaultD = 15 +-- | How many of the most similar terms to consider, to rule out false positives. +defaultL = 2 +defaultP = 2 +defaultQ = 3 +defaultMoveBound = 2 + +-- | How many nodes to consider for our constant-time approximation to tree edit distance. +defaultM :: Integer +defaultM = 10 + -- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index. data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature :: !(Vector.Vector Double), term :: !a } deriving Eq @@ -78,7 +119,7 @@ pqGramDecorator getLabel p q = cata algebra where algebra term = let label = getLabel term in cofree ((gram label .: headF term) :< assignParentAndSiblingLabels (tailF term) label) gram label = Gram (padToSize p []) (padToSize q (pure (Just label))) - assignParentAndSiblingLabels functor label = (`evalState` (siblingLabels functor)) (for functor (assignLabels label)) + assignParentAndSiblingLabels functor label = (`evalState` (replicate (q `div` 2) Nothing <> siblingLabels functor)) (for functor (assignLabels label)) assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) assignLabels label a = case runCofree a of RCons gram rest :< functor -> do @@ -95,13 +136,17 @@ unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.repl where normalize vec = fmap (/ vmagnitude vec) vec vmagnitude = sqrtDouble . Vector.sum . fmap (** 2) --- | Annotates a term with a feature vector at each node. +-- | Annotates a term with a feature vector at each node, parameterized by stem length, base width, and feature vector dimensions. featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = cata (\ (RCons gram rest :< functor) -> cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor)) . pqGramDecorator getLabel p q +-- | Annotates a term with a feature vector at each node, using the default values for the p, q, and d parameters. +defaultFeatureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) +defaultFeatureVectorDecorator getLabel = featureVectorDecorator getLabel defaultP defaultQ defaultD + -- | Strips the head annotation off a term annotated with non-empty records. stripTerm :: Functor f => Cofree f (Record (h ': t)) -> Cofree f (Record t) stripTerm = fmap rtail diff --git a/src/Diff.hs b/src/Diff.hs index ae917010f..ec56caac7 100644 --- a/src/Diff.hs +++ b/src/Diff.hs @@ -18,11 +18,11 @@ type instance Base (Free f a) = FreeF f a instance Functor f => Foldable.Foldable (Free f a) where project = runFree instance Functor f => Foldable.Unfoldable (Free f a) where embed = free -diffSum :: (Patch (Term a annotation) -> Integer) -> Diff a annotation -> Integer +diffSum :: (Patch (Term a annotation) -> Int) -> Diff a annotation -> Int diffSum patchCost diff = sum $ fmap patchCost diff -- | The sum of the node count of the diff’s patches. -diffCost :: Diff a annotation -> Integer +diffCost :: Diff a annotation -> Int diffCost = diffSum $ patchSum termSize -- | Merge a diff using a function to provide the Term (in Maybe, to simplify recovery of the before/after state) for every Patch. diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 52a2bf24e..c38da5ccb 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -57,7 +57,7 @@ summaries (Replace i1 i2) = zipWith (\a b -> "Replaced" <+> "the" <+> a <+> "wit toLeafInfos :: DiffInfo -> [Doc] toLeafInfos LeafInfo{..} = pure $ squotes (toDoc termName) <+> (toDoc categoryName) -toLeafInfos BranchInfo{..} = pretty <$> branches +toLeafInfos BranchInfo{..} = toLeafInfos =<< branches toLeafInfos err@ErrorInfo{} = pure $ pretty err toTermName :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Source Char -> Term leaf (Record fields) -> Text @@ -108,7 +108,7 @@ toTermName source term = case unwrap term of S.Array _ -> termNameFromSource term S.Class identifier _ _ -> toTermName' identifier S.Method identifier _ _ -> toTermName' identifier - Comment a -> toCategoryName a + S.Comment a -> toCategoryName a S.Commented _ _ -> termNameFromChildren term where toTermName' = toTermName source termNameFromChildren term = termNameFromRange (unionRangesFrom (range term) (range <$> toList (unwrap term))) @@ -172,8 +172,11 @@ instance HasCategory Category where toCategoryName = \case ArrayLiteral -> "array" BinaryOperator -> "binary operator" + BitwiseOperator -> "bitwise operator" + RelationalOperator -> "relational operator" Boolean -> "boolean" DictionaryLiteral -> "dictionary" + C.Comment -> "comment" C.Error -> "error" ExpressionStatements -> "expression statements" C.Assignment -> "assignment" @@ -213,6 +216,7 @@ instance HasCategory Category where C.Class -> "class" C.Method -> "method" C.If -> "if statement" + C.CommaOperator -> "comma operator" instance (HasCategory leaf, HasField fields Category) => HasCategory (Term leaf (Record fields)) where toCategoryName = toCategoryName . category . extract diff --git a/src/Diffing.hs b/src/Diffing.hs index 522f7b293..a406549d0 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -8,6 +8,7 @@ import Data.Functor.Both import Data.Functor.Foldable import Data.RandomWalkSimilarity import Data.Record +import qualified Data.Set as Set import qualified Data.Text.IO as TextIO import qualified Data.Text.ICU.Detect as Detect import qualified Data.Text.ICU.Convert as Convert @@ -34,6 +35,7 @@ import Term import TreeSitter import Text.Parser.TreeSitter.Language import qualified Data.Text as T +import Category import Data.Aeson (pairs) import Data.Aeson.Encoding (encodingToLazyByteString) @@ -43,7 +45,7 @@ import Data.Aeson.Encoding (encodingToLazyByteString) -- | with respect to other IO actions. diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Output diffFiles parser renderer sourceBlobs = do - terms <- traverse (fmap (featureVectorDecorator getLabel p q d) . parser) sourceBlobs + terms <- traverse (fmap (defaultFeatureVectorDecorator getLabel) . parser) sourceBlobs let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs let textDiff = case areNullOids of @@ -62,7 +64,6 @@ diffFiles parser renderer sourceBlobs = do getLabel (h :< t) = (category h, case t of Leaf s -> Just s _ -> Nothing) - (p, q, d) = (2, 2, 15) -- | Return a parser based on the file extension (including the "."). parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category]) @@ -97,11 +98,15 @@ breakDownLeavesByWord source = cata replaceIn where replaceIn (info :< syntax) = cofree $ info :< syntax' where syntax' = case (ranges, syntax) of - (_:_:_, Leaf _) | category info /= Regex -> Indexed (makeLeaf info <$> ranges) + (_:_:_, Leaf _) | Set.notMember (category info) preserveSyntax -> Indexed (makeLeaf info <$> ranges) _ -> syntax ranges = rangesAndWordsInSource (characterRange info) rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source) makeLeaf info (range, substring) = cofree $ setCharacterRange info range :< Leaf (toS substring) + -- Some Category constructors should retain their original structure, and not be sliced + -- into words. This Set represents those Category constructors for which we want to + -- preserve the original Syntax. + preserveSyntax = Set.fromList [Regex, Category.Comment] -- | Transcode a file to a unicode source. transcode :: B1.ByteString -> IO (Source Char) @@ -132,7 +137,7 @@ compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Te compareCategoryEq = (==) `on` category . extract -- | The sum of the node count of the diff’s patches. -diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer +diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Int diffCostWithCachedTermCosts diff = unCost $ case runFree diff of Free (info :< _) -> sum (cost <$> info) Pure patch -> sum (cost . extract <$> patch) diff --git a/src/Info.hs b/src/Info.hs index 44ba5c7e9..30410252a 100644 --- a/src/Info.hs +++ b/src/Info.hs @@ -7,7 +7,7 @@ import Category import Range import Test.QuickCheck -newtype Cost = Cost { unCost :: Integer } +newtype Cost = Cost { unCost :: Int } deriving (Eq, Num, Ord, Show) characterRange :: HasField fields Range => Record fields -> Range diff --git a/src/Parser.hs b/src/Parser.hs index 297516fcf..ed32a12d6 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -19,7 +19,7 @@ type Parser f a = SourceBlob -> IO (Cofree f a) -- | Whether a category is an Operator Category isOperator :: Category -> Bool -isOperator = flip Set.member (Set.fromList [ Operator, BinaryOperator ]) +isOperator = flip Set.member (Set.fromList [ Operator, BinaryOperator, BitwiseOperator, RelationalOperator ]) -- | Construct a term given source, the span covered, the annotation for the term, and its children. -- @@ -55,6 +55,9 @@ termConstructor source sourceSpan info = fmap cofree . construct (base:element:[]) -> withDefaultInfo $ S.SubscriptAccess base element _ -> errorWith children construct children | isOperator (category info) = withDefaultInfo $ S.Operator children + construct children | CommaOperator == category info = withDefaultInfo $ case children of + [child, rest] | S.Indexed cs <- unwrap rest -> S.Indexed $ child : toList cs + _ -> S.Indexed children construct children | Function == category info = case children of (body:[]) -> withDefaultInfo $ S.Function Nothing Nothing body (params:body:[]) | (info :< _) <- runCofree params, Params == category info -> @@ -66,8 +69,10 @@ termConstructor source sourceSpan info = fmap cofree . construct _ -> errorWith children construct children | FunctionCall == category info = case runCofree <$> children of - [ (_ :< S.MemberAccess{..}), params@(_ :< S.Args{}) ] -> - pure $! setCategory info MethodCall :< S.MethodCall memberId property (cofree params) + [ (_ :< S.MemberAccess{..}), (_ :< S.Args args) ] -> + pure $! setCategory info MethodCall :< S.MethodCall memberId property args + [ (_ :< S.MemberAccess{..}) ] -> + pure $! setCategory info MethodCall :< S.MethodCall memberId property [] (x:xs) -> withDefaultInfo $ S.FunctionCall (cofree x) (cofree <$> xs) _ -> errorWith children diff --git a/src/Patch.hs b/src/Patch.hs index a156f01f2..65a163838 100644 --- a/src/Patch.hs +++ b/src/Patch.hs @@ -58,7 +58,7 @@ mapPatch _ g (Insert b) = Insert (g b) mapPatch f g (Replace a b) = Replace (f a) (g b) -- | Calculate the cost of the patch given a function to compute the cost of a item. -patchSum :: (a -> Integer) -> Patch a -> Integer +patchSum :: (a -> Int) -> Patch a -> Int patchSum termCost patch = maybe 0 termCost (before patch) + maybe 0 termCost (after patch) -- | Return Just the value in This, or the first value in These, if any. diff --git a/src/Renderer.hs b/src/Renderer.hs index fbbfca8c3..0c537d657 100644 --- a/src/Renderer.hs +++ b/src/Renderer.hs @@ -24,10 +24,12 @@ concatOutputs l = T.intercalate "\n" (toText <$> l) toSeries :: Output -> Series toSeries (JSONOutput series) = series toSeries (SummaryOutput series) = series +toSeries _ = mempty toText :: Output -> Text toText (SplitOutput text) = text toText (PatchOutput text) = text +toText _ = mempty -- | The available types of diff rendering. diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index d2fac0102..194dcaaff 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -34,12 +34,16 @@ styleName category = "category-" <> case category of Program -> "program" C.Error -> "error" BinaryOperator -> "binary-operator" + BitwiseOperator -> "bitwise-operator" + RelationalOperator -> "relational-operator" + C.CommaOperator -> "comma-operator" Boolean -> "boolean" DictionaryLiteral -> "dictionary" C.Pair -> "pair" StringLiteral -> "string" SymbolLiteral -> "symbol" IntegerLiteral -> "integer" + C.Comment -> "comment" C.FunctionCall -> "function_call" C.Function -> "function" C.MethodCall -> "method_call" @@ -73,6 +77,7 @@ styleName category = "category-" <> case category of C.Class -> "class_statement" C.Method -> "method" C.If -> "if_statement" + C.CommaOperator -> "comma_operator" Other string -> string -- | Pick the class name for a split patch. diff --git a/src/SES.hs b/src/SES.hs index 97ad55d1a..cb6ae2349 100644 --- a/src/SES.hs +++ b/src/SES.hs @@ -9,7 +9,7 @@ import Prologue type Compare term edit = term -> term -> Maybe edit -- | A function that computes the cost of an edit. -type Cost edit = edit -> Integer +type Cost edit = edit -> Int -- | Find the shortest edit script (diff) between two terms given a function to compute the cost. ses :: Applicative edit => Compare term (edit (Patch term)) -> Cost (edit (Patch term)) -> [term] -> [term] -> [edit (Patch term)] @@ -17,7 +17,7 @@ ses diffTerms cost as bs = fst <$> evalState diffState Map.empty where diffState = diffAt diffTerms cost (0, 0) as bs -- | Find the shortest edit script between two terms at a given vertex in the edit graph. -diffAt :: Applicative edit => Compare term (edit (Patch term)) -> Cost (edit (Patch term)) -> (Integer, Integer) -> [term] -> [term] -> State (Map.Map (Integer, Integer) [(edit (Patch term), Integer)]) [(edit (Patch term), Integer)] +diffAt :: Applicative edit => Compare term (edit (Patch term)) -> Cost (edit (Patch term)) -> (Int, Int) -> [term] -> [term] -> State (Map.Map (Int, Int) [(edit (Patch term), Int)]) [(edit (Patch term), Int)] diffAt diffTerms cost (i, j) as bs | (a : as) <- as, (b : bs) <- bs = do cachedDiffs <- get @@ -47,5 +47,5 @@ diffAt diffTerms cost (i, j) as bs -- | Prepend an edit script and the cumulative cost onto the edit script. -consWithCost :: Cost edit -> edit -> [(edit, Integer)] -> [(edit, Integer)] +consWithCost :: Cost edit -> edit -> [(edit, Int)] -> [(edit, Int)] consWithCost cost edit rest = (edit, cost edit + maybe 0 snd (fst <$> uncons rest)) : rest diff --git a/src/Syntax.hs b/src/Syntax.hs index 9ef6465ab..85763201b 100644 --- a/src/Syntax.hs +++ b/src/Syntax.hs @@ -33,7 +33,7 @@ data Syntax a f | MemberAccess { memberId :: f, property :: f } -- | A method call consisting of its target, the method name, and the parameters passed to the method. -- | e.g. in Javascript console.log('hello') represents a method call. - | MethodCall { targetId :: f, methodId :: f, methodParams :: f } + | MethodCall { targetId :: f, methodId :: f, methodParams :: [f] } -- | The list of arguments to a method call. -- | TODO: It might be worth removing this and using Fixed instead. | Args [f] diff --git a/src/Term.hs b/src/Term.hs index 0c71a52fb..ad7d49e5f 100644 --- a/src/Term.hs +++ b/src/Term.hs @@ -25,7 +25,7 @@ zipTerms t1 t2 = iter go (alignCofreeWith galign (const Nothing) both (These t1 where go (a :< s) = cofree . (a :<) <$> sequenceA s -- | Return the node count of a term. -termSize :: Term a annotation -> Integer +termSize :: (Prologue.Foldable f, Functor f) => Cofree f annotation -> Int termSize = cata size where size (_ :< syntax) = 1 + sum syntax diff --git a/src/TreeSitter.hs b/src/TreeSitter.hs index 2e794a1de..659599a0e 100644 --- a/src/TreeSitter.hs +++ b/src/TreeSitter.hs @@ -40,9 +40,7 @@ categoriesForLanguage language name = case (language, name) of (JavaScript, "generator_function") -> Function (JavaScript, "math_op") -> BinaryOperator -- bitwise operator, e.g. +, -, *, /. (JavaScript, "bool_op") -> BinaryOperator -- boolean operator, e.g. ||, &&. - (JavaScript, "bitwise_op") -> BinaryOperator -- bitwise operator, e.g. ^, &, etc. - (JavaScript, "rel_op") -> BinaryOperator -- relational operator, e.g. >, <, <=, >=, ==, !=. - (JavaScript, "comma_op") -> Operator -- comma operator, e.g. expr1, expr2. + (JavaScript, "comma_op") -> CommaOperator -- comma operator, e.g. expr1, expr2. (JavaScript, "delete_op") -> Operator -- delete operator, e.g. delete x[2]. (JavaScript, "type_op") -> Operator -- type operator, e.g. typeof Object. (JavaScript, "void_op") -> Operator -- void operator, e.g. void 2. @@ -95,6 +93,9 @@ defaultCategoryForNodeName name = case name of "throw_statement" -> Throw "try_statement" -> Try "method_definition" -> Method + "comment" -> Comment + "bitwise_op" -> BitwiseOperator + "rel_op" -> RelationalOperator _ -> Other name {-# INLINE defaultCategoryForNodeName #-} diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index fdb9da3d0..118e72898 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -1,6 +1,8 @@ {-# LANGUAGE DataKinds #-} module Data.RandomWalkSimilarity.Spec where +import Data.Functor.Both +import Data.Functor.Foldable (cata) import Data.RandomWalkSimilarity import Data.Record import Diff @@ -29,8 +31,7 @@ spec = parallel $ do \ (term, p, q, d) -> featureVectorDecorator (rhead . headF) (positively p) (positively q) (positively d) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively d)) . length . rhead) describe "rws" $ do - let compare a b = if ((==) `on` category . extract) a b then Just (replacing a b) else Nothing - let decorate = featureVectorDecorator (category . headF) 2 2 15 + let decorate = defaultFeatureVectorDecorator (category . headF) let toTerm' = decorate . toTerm prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $ \ (as, bs) -> let tas = toTerm' <$> (as :: [ArbitraryTerm Text (Record '[Category])]) @@ -41,4 +42,10 @@ spec = parallel $ do it "produces unbiased insertions within branches" $ let (a, b) = (decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "a") ])), decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "b") ]))) in - fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, replacing b b ] + fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, copying b ] + + where compare :: (HasField fields Category, Functor f, Eq (Cofree f Category)) => Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) + compare a b | (category <$> a) == (category <$> b) = Just (copying b) + | otherwise = if ((==) `on` category . extract) a b then Just (replacing a b) else Nothing + copying :: Functor f => Cofree f (Record fields) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))) + copying = cata wrap . fmap pure diff --git a/test/Diff/Spec.hs b/test/Diff/Spec.hs index 91857b665..351444ce9 100644 --- a/test/Diff/Spec.hs +++ b/test/Diff/Spec.hs @@ -17,7 +17,7 @@ import Test.QuickCheck spec :: Spec spec = parallel $ do - let toTerm' = featureVectorDecorator (category . headF) 2 2 15 . toTerm + let toTerm' = defaultFeatureVectorDecorator (category . headF) . toTerm prop "equality is reflexive" $ \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in diff `shouldBe` diff diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index 93d69491c..94f21fae7 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -45,7 +45,7 @@ spec = parallel $ do diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotation = Nothing } ] prop "equal terms produce identity diffs" $ - \ a -> let term = featureVectorDecorator (category . headF) 2 2 15 (toTerm (a :: ArbitraryTerm Text (Record '[Category, Range]))) in + \ a -> let term = defaultFeatureVectorDecorator (category . headF) (toTerm (a :: ArbitraryTerm Text (Record '[Category, Range]))) in diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` [] describe "annotatedSummaries" $ do diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index 755b6001c..29ea5e7aa 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -18,7 +18,7 @@ import Test.Hspec.QuickCheck spec :: Spec spec = parallel $ do describe "interpret" $ do - let decorate = featureVectorDecorator (category . headF) 2 2 15 + let decorate = defaultFeatureVectorDecorator (category . headF) let compare = ((==) `on` category . extract) it "returns a replacement when comparing two unicode equivalent terms" $ let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text) diff --git a/test/diffs/asymmetrical-context.split.js b/test/diffs/asymmetrical-context.split.js index 31bc72255..10de89c0c 100644 --- a/test/diffs/asymmetrical-context.split.js +++ b/test/diffs/asymmetrical-context.split.js @@ -1,7 +1,7 @@ -
1 |
|