1
1
mirror of https://github.com/github/semantic.git synced 2024-12-29 18:06:14 +03:00

Introduce a dedicated term equivalency check for ses

This commit is contained in:
Timothy Clem 2017-08-25 12:39:43 -07:00
parent fd7cbe8393
commit 97bd03543a
6 changed files with 34 additions and 25 deletions

View File

@ -4,6 +4,7 @@ module Interpreter
, decoratingWith
, diffTermsWith
, comparableByConstructor
, equivalentTerms
) where
import Algorithm
@ -13,7 +14,7 @@ import Control.Monad.Free (cutoff, wrap)
import Control.Monad.Free.Freer hiding (cutoff, wrap)
import Data.Align.Generic
import Data.Functor.Both
import Data.Functor.Classes (Eq1)
import Data.Functor.Classes (Eq1(..))
import Data.Hashable (Hashable)
import Data.Maybe (isJust)
import Data.Record
@ -33,7 +34,7 @@ import Term
diffTerms :: HasField fields Category
=> Both (SyntaxTerm fields) -- ^ A pair of terms representing the old and new state, respectively.
-> SyntaxDiff fields
diffTerms = decoratingWith getLabel (diffTermsWith algorithmWithTerms comparableByCategory)
diffTerms = decoratingWith getLabel (diffTermsWith algorithmWithTerms comparableByCategory (equalTerms comparableByCategory))
-- | Diff two terms by decorating with feature vectors computed using the supplied labelling algebra, and stripping the feature vectors from the resulting diff.
decoratingWith :: (Hashable label, Traversable f)
@ -47,16 +48,17 @@ decoratingWith getLabel differ = stripDiff . differ . fmap (defaultFeatureVector
diffTermsWith :: forall f fields . (Traversable f, GAlign f, Eq1 f, HasField fields FeatureVector)
=> (Term f (Record fields) -> Term f (Record fields) -> Algorithm (Term f (Record fields)) (Diff f (Record fields)) (Diff f (Record fields))) -- ^ A function producing syntax-directed continuations of the algorithm.
-> ComparabilityRelation f fields -- ^ A relation on terms used to determine comparability and equality.
-> (Term f (Record fields) -> Term f (Record fields) -> Bool) -- ^ A predicate used to determine term equality.
-> Both (Term f (Record fields)) -- ^ A pair of terms.
-> Diff f (Record fields) -- ^ The resulting diff.
diffTermsWith refine comparable (Join (a, b)) = runFreer decompose (diff a b)
diffTermsWith refine comparable eqTerms (Join (a, b)) = runFreer decompose (diff a b)
where decompose :: AlgorithmF (Term f (Record fields)) (Diff f (Record fields)) result -> Algorithm (Term f (Record fields)) (Diff f (Record fields)) result
decompose step = case step of
Diff t1 t2 -> refine t1 t2
Linear t1 t2 -> case galignWith diffThese (unwrap t1) (unwrap t2) of
Just result -> wrap . (both (extract t1) (extract t2) :<) <$> sequenceA result
_ -> byReplacing t1 t2
RWS as bs -> traverse diffThese (rws (editDistanceUpTo defaultM) comparable as bs)
RWS as bs -> traverse diffThese (rws (editDistanceUpTo defaultM) comparable eqTerms as bs)
Delete a -> pure (deleting a)
Insert b -> pure (inserting b)
Replace a b -> pure (replacing a b)
@ -114,15 +116,20 @@ algorithmWithTerms t1 t2 = case (unwrap t1, unwrap t2) of
-- | Test whether two terms are comparable by their Category.
comparableByCategory :: HasField fields Category => ComparabilityRelation f fields
comparableByCategory _ (a :< _) (b :< _) = category a == category b
comparableByCategory (a :< _) (b :< _) = category a == category b
-- | Test whether two terms are comparable by their constructor.
comparableByConstructor :: (Declaration.Method :< fs, Apply1 GAlign fs) => ComparabilityRelation (Union fs) fields
comparableByConstructor canCompare (_ :< a) (_ :< b)
| Just (Declaration.Method _ identifierA _ _) <- prj a
, Just (Declaration.Method _ identifierB _ _) <- prj b
= canCompare identifierA identifierB
| otherwise = isJust (galign a b)
comparableByConstructor :: (GAlign f) => ComparabilityRelation f fields
comparableByConstructor (_ :< a) (_ :< b) = isJust (galign a b)
-- | Equivalency check for terms.
equivalentTerms :: (Declaration.Method :< fs, Apply1 Functor fs, Apply1 Foldable fs, Apply1 GAlign fs, Apply1 Eq1 fs) => Term (Union fs) a -> Term (Union fs) a -> Bool
equivalentTerms a b | Just (Declaration.Method _ identifierA _ _) <- prj (unwrap a)
, Just (Declaration.Method _ identifierB _ _) <- prj (unwrap b)
= liftEq equivalentTerms (unwrap identifierA) (unwrap identifierB)
| Just aligned <- galignWith (these (const False) (const False) equivalentTerms) (unwrap a) (unwrap b)
= and aligned
| otherwise = False
-- | How many nodes to consider for our constant-time approximation to tree edit distance.
defaultM :: Integer

View File

@ -10,6 +10,7 @@ module RWS (
, pqGramDecorator
, Gram(..)
, defaultD
, equalTerms
) where
import Control.Applicative (empty)
@ -19,7 +20,7 @@ import Control.Comonad.Trans.Cofree hiding (cofree, runCofree)
import Control.Monad.Free
import Control.Monad.State.Strict
import Data.Foldable
import Data.Function ((&), fix, on)
import Data.Function ((&), on)
import Data.Functor.Foldable
import Data.Hashable
import Data.List (sortOn)
@ -48,7 +49,7 @@ type Label f fields label = forall b. TermF f (Record fields) b -> label
-- | A relation on 'Term's, guaranteed constant-time in the size of the 'Term' by parametricity.
--
-- This is used both to determine whether two root terms can be compared in O(1), and, recursively, to determine whether two nodes are equal in O(n); thus, comparability is defined s.t. two terms are equal if they are recursively comparable subterm-wise.
type ComparabilityRelation f fields = forall a b. (a -> b -> Bool) -> TermF f (Record fields) a -> TermF f (Record fields) b -> Bool
type ComparabilityRelation f fields = forall a b. TermF f (Record fields) a -> TermF f (Record fields) b -> Bool
type FeatureVector = UArray Int Double
@ -65,14 +66,15 @@ data TermOrIndexOrNone term = Term term | Index {-# UNPACK #-} !Int | None
rws :: (HasField fields FeatureVector, Functor f, Eq1 f)
=> (Diff f fields -> Int)
-> ComparabilityRelation f fields
-> (Term f (Record fields) -> Term f (Record fields) -> Bool)
-> [Term f (Record fields)]
-> [Term f (Record fields)]
-> RWSEditScript f fields
rws _ _ as [] = This <$> as
rws _ _ [] bs = That <$> bs
rws _ canCompare [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a]
rws editDistance canCompare as bs =
let sesDiffs = ses (equalTerms canCompare) as bs
rws _ _ _ as [] = This <$> as
rws _ _ _ [] bs = That <$> bs
rws _ canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a]
rws editDistance canCompare equivalent as bs =
let sesDiffs = ses equivalent as bs
(featureAs, featureBs, mappedDiffs, allDiffs) = genFeaturizedTermsAndDiffs sesDiffs
(diffs, remaining) = findNearestNeighboursToDiff editDistance canCompare allDiffs featureAs featureBs
diffs' = deleteRemaining diffs remaining
@ -307,7 +309,7 @@ unitVector d hash = listArray (0, d - 1) ((* invMagnitude) <$> components)
-- | Test the comparability of two root 'Term's in O(1).
canCompareTerms :: ComparabilityRelation f fields -> Term f (Record fields) -> Term f (Record fields) -> Bool
canCompareTerms canCompare = fix (\ comparator -> canCompare comparator `on` runCofree)
canCompareTerms canCompare = canCompare `on` runCofree
-- | Recursively test the equality of two 'Term's in O(n).
equalTerms :: Eq1 f => ComparabilityRelation f fields -> Term f (Record fields) -> Term f (Record fields) -> Bool

View File

@ -98,7 +98,7 @@ diffBlobPair renderer blobs = case (renderer, effectiveLanguage) of
run parse diff renderer = distributeFor blobs parse >>= diffTermPair blobs diff >>= render renderer
diffRecursively :: (Declaration.Method :< fs, Apply1 Eq1 fs, Apply1 GAlign fs, Apply1 Show1 fs, Apply1 Foldable fs, Apply1 Functor fs, Apply1 Traversable fs, Apply1 Diffable fs) => Both (Term (Union fs) (Record fields)) -> Diff (Union fs) (Record fields)
diffRecursively = decoratingWith constructorNameAndConstantFields (diffTermsWith algorithmForTerms comparableByConstructor)
diffRecursively = decoratingWith constructorNameAndConstantFields (diffTermsWith algorithmForTerms comparableByConstructor equivalentTerms)
-- | A task to diff a pair of 'Term's, producing insertion/deletion 'Patch'es for non-existent 'Blob's.
diffTermPair :: Functor f => Both Blob -> Differ f a -> Both (Term f a) -> Task (Diff f a)

View File

@ -37,12 +37,12 @@ spec = parallel $ do
\ (as, bs) -> let tas = decorate <$> (unListableF <$> as :: [SyntaxTerm '[Category]])
tbs = decorate <$> (unListableF <$> bs :: [SyntaxTerm '[Category]])
root = cofree . ((Program :. Nil) :<) . Indexed
diff = wrap (pure (Program :. Nil) :< Indexed (stripDiff . diffThese <$> rws editDistance (const canCompare) tas tbs)) in
diff = wrap (pure (Program :. Nil) :< Indexed (stripDiff . diffThese <$> rws editDistance canCompare (equalTerms canCompare) tas tbs)) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (root (stripTerm <$> tas)), Just (root (stripTerm <$> tbs)))
it "produces unbiased insertions within branches" $
let (a, b) = (decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf "a") ])), decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf "b") ]))) in
fmap (bimap stripTerm stripTerm) (rws editDistance (const canCompare) [ b ] [ a, b ]) `shouldBe` fmap (bimap stripTerm stripTerm) [ That a, These b b ]
fmap (bimap stripTerm stripTerm) (rws editDistance canCompare (equalTerms canCompare) [ b ] [ a, b ]) `shouldBe` fmap (bimap stripTerm stripTerm) [ That a, These b b ]
where canCompare a b = headF a == headF b

View File

@ -62,7 +62,7 @@ diffFixtures =
patchOutput = "diff --git a/test/fixtures/ruby/method-declaration.A.rb b/test/fixtures/ruby/method-declaration.B.rb\nindex 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644\n--- a/test/fixtures/ruby/method-declaration.A.rb\n+++ b/test/fixtures/ruby/method-declaration.B.rb\n@@ -1,3 +1,4 @@\n-def foo\n+def bar(a)\n+ baz\n end\n\n"
jsonOutput = "{\"diff\":{\"after\":{\"category\":\"Program\",\"sourceRange\":[0,21],\"sourceSpan\":{\"start\":[1,1],\"end\":[4,1]}},\"children\":[{\"after\":{\"category\":\"Method\",\"identifier\":\"bar\",\"sourceRange\":[0,20],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,4]}},\"children\":[{\"replace\":[{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}},{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}}]},{\"insert\":{\"category\":\"Params\",\"children\":[{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[8,9],\"sourceSpan\":{\"start\":[1,9],\"end\":[1,10]}}],\"sourceRange\":[7,13],\"sourceSpan\":{\"start\":[1,8],\"end\":[2,3]}}},{\"insert\":{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[13,16],\"sourceSpan\":{\"start\":[2,3],\"end\":[2,6]}}}],\"before\":{\"category\":\"Method\",\"identifier\":\"foo\",\"sourceRange\":[0,11],\"sourceSpan\":{\"start\":[1,1],\"end\":[2,4]}}}],\"before\":{\"category\":\"Program\",\"sourceRange\":[0,12],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,1]}}},\"oids\":[\"0000000000000000000000000000000000000000\",\"0000000000000000000000000000000000000000\"],\"paths\":[\"test/fixtures/ruby/method-declaration.A.rb\",\"test/fixtures/ruby/method-declaration.B.rb\"]}\n"
jsonOutput = "{\"diff\":{\"after\":{\"sourceRange\":[0,21],\"sourceSpan\":{\"start\":[1,1],\"end\":[4,1]}},\"children\":[{\"after\":{\"sourceRange\":[0,20],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,4]}},\"children\":[{\"after\":{\"sourceRange\":[0,3],\"sourceSpan\":{\"start\":[1,1],\"end\":[1,4]}},\"children\":[],\"before\":{\"sourceRange\":[0,3],\"sourceSpan\":{\"start\":[1,1],\"end\":[1,4]}}},{\"replace\":[{\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}},{\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}}]},{\"insert\":{\"children\":[],\"sourceRange\":[8,9],\"sourceSpan\":{\"start\":[1,9],\"end\":[1,10]}}},{\"after\":{\"sourceRange\":[13,16],\"sourceSpan\":{\"start\":[2,3],\"end\":[2,6]}},\"children\":[{\"insert\":{\"children\":[],\"sourceRange\":[13,16],\"sourceSpan\":{\"start\":[2,3],\"end\":[2,6]}}}],\"before\":{\"sourceRange\":[8,11],\"sourceSpan\":{\"start\":[2,1],\"end\":[2,4]}}}],\"before\":{\"sourceRange\":[0,11],\"sourceSpan\":{\"start\":[1,1],\"end\":[2,4]}}}],\"before\":{\"sourceRange\":[0,12],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,1]}}},\"oids\":[\"0000000000000000000000000000000000000000\",\"0000000000000000000000000000000000000000\"],\"paths\":[\"test/fixtures/ruby/method-declaration.A.rb\",\"test/fixtures/ruby/method-declaration.B.rb\"]}\n"
sExpressionOutput = "(Program\n (Method\n (Empty)\n { (Identifier)\n ->(Identifier) }\n {+(Identifier)+}\n (\n {+(Identifier)+})))\n"
tocOutput = "{\"changes\":{\"test/fixtures/ruby/method-declaration.A.rb -> test/fixtures/ruby/method-declaration.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"}]},\"errors\":{}}\n"

View File

@ -152,12 +152,12 @@ spec = parallel $ do
it "produces JSON output" $ do
blobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb")
output <- runTask (diffBlobPair ToCDiffRenderer blobs)
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}\n" :: ByteString)
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"}]},\"errors\":{}}\n" :: ByteString)
it "produces JSON output if there are parse errors" $ do
blobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")
output <- runTask (diffBlobPair ToCDiffRenderer blobs)
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"test/fixtures/toc/ruby/methods.X.rb:1:1-3:1: error: expected Program, but got ParseError\\nsymbol (src/Language/Ruby/Syntax.hs:85:41-85:55)\\n\",\"language\":\"Ruby\"}]}}\n" :: ByteString)
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"test/fixtures/toc/ruby/methods.X.rb:1:1-3:1: error: expected Program, but got ParseError\\nsymbol (src/Language/Ruby/Syntax.hs:86:41-86:55)\\n\",\"language\":\"Ruby\"}]}}\n" :: ByteString)
it "summarizes Markdown headings" $ do
blobs <- blobsForPaths (both "markdown/headings.A.md" "markdown/headings.B.md")