mirror of
https://github.com/github/semantic.git
synced 2024-12-29 18:06:14 +03:00
Introduce a dedicated term equivalency check for ses
This commit is contained in:
parent
fd7cbe8393
commit
97bd03543a
@ -4,6 +4,7 @@ module Interpreter
|
||||
, decoratingWith
|
||||
, diffTermsWith
|
||||
, comparableByConstructor
|
||||
, equivalentTerms
|
||||
) where
|
||||
|
||||
import Algorithm
|
||||
@ -13,7 +14,7 @@ import Control.Monad.Free (cutoff, wrap)
|
||||
import Control.Monad.Free.Freer hiding (cutoff, wrap)
|
||||
import Data.Align.Generic
|
||||
import Data.Functor.Both
|
||||
import Data.Functor.Classes (Eq1)
|
||||
import Data.Functor.Classes (Eq1(..))
|
||||
import Data.Hashable (Hashable)
|
||||
import Data.Maybe (isJust)
|
||||
import Data.Record
|
||||
@ -33,7 +34,7 @@ import Term
|
||||
diffTerms :: HasField fields Category
|
||||
=> Both (SyntaxTerm fields) -- ^ A pair of terms representing the old and new state, respectively.
|
||||
-> SyntaxDiff fields
|
||||
diffTerms = decoratingWith getLabel (diffTermsWith algorithmWithTerms comparableByCategory)
|
||||
diffTerms = decoratingWith getLabel (diffTermsWith algorithmWithTerms comparableByCategory (equalTerms comparableByCategory))
|
||||
|
||||
-- | Diff two terms by decorating with feature vectors computed using the supplied labelling algebra, and stripping the feature vectors from the resulting diff.
|
||||
decoratingWith :: (Hashable label, Traversable f)
|
||||
@ -47,16 +48,17 @@ decoratingWith getLabel differ = stripDiff . differ . fmap (defaultFeatureVector
|
||||
diffTermsWith :: forall f fields . (Traversable f, GAlign f, Eq1 f, HasField fields FeatureVector)
|
||||
=> (Term f (Record fields) -> Term f (Record fields) -> Algorithm (Term f (Record fields)) (Diff f (Record fields)) (Diff f (Record fields))) -- ^ A function producing syntax-directed continuations of the algorithm.
|
||||
-> ComparabilityRelation f fields -- ^ A relation on terms used to determine comparability and equality.
|
||||
-> (Term f (Record fields) -> Term f (Record fields) -> Bool) -- ^ A predicate used to determine term equality.
|
||||
-> Both (Term f (Record fields)) -- ^ A pair of terms.
|
||||
-> Diff f (Record fields) -- ^ The resulting diff.
|
||||
diffTermsWith refine comparable (Join (a, b)) = runFreer decompose (diff a b)
|
||||
diffTermsWith refine comparable eqTerms (Join (a, b)) = runFreer decompose (diff a b)
|
||||
where decompose :: AlgorithmF (Term f (Record fields)) (Diff f (Record fields)) result -> Algorithm (Term f (Record fields)) (Diff f (Record fields)) result
|
||||
decompose step = case step of
|
||||
Diff t1 t2 -> refine t1 t2
|
||||
Linear t1 t2 -> case galignWith diffThese (unwrap t1) (unwrap t2) of
|
||||
Just result -> wrap . (both (extract t1) (extract t2) :<) <$> sequenceA result
|
||||
_ -> byReplacing t1 t2
|
||||
RWS as bs -> traverse diffThese (rws (editDistanceUpTo defaultM) comparable as bs)
|
||||
RWS as bs -> traverse diffThese (rws (editDistanceUpTo defaultM) comparable eqTerms as bs)
|
||||
Delete a -> pure (deleting a)
|
||||
Insert b -> pure (inserting b)
|
||||
Replace a b -> pure (replacing a b)
|
||||
@ -114,15 +116,20 @@ algorithmWithTerms t1 t2 = case (unwrap t1, unwrap t2) of
|
||||
|
||||
-- | Test whether two terms are comparable by their Category.
|
||||
comparableByCategory :: HasField fields Category => ComparabilityRelation f fields
|
||||
comparableByCategory _ (a :< _) (b :< _) = category a == category b
|
||||
comparableByCategory (a :< _) (b :< _) = category a == category b
|
||||
|
||||
-- | Test whether two terms are comparable by their constructor.
|
||||
comparableByConstructor :: (Declaration.Method :< fs, Apply1 GAlign fs) => ComparabilityRelation (Union fs) fields
|
||||
comparableByConstructor canCompare (_ :< a) (_ :< b)
|
||||
| Just (Declaration.Method _ identifierA _ _) <- prj a
|
||||
, Just (Declaration.Method _ identifierB _ _) <- prj b
|
||||
= canCompare identifierA identifierB
|
||||
| otherwise = isJust (galign a b)
|
||||
comparableByConstructor :: (GAlign f) => ComparabilityRelation f fields
|
||||
comparableByConstructor (_ :< a) (_ :< b) = isJust (galign a b)
|
||||
|
||||
-- | Equivalency check for terms.
|
||||
equivalentTerms :: (Declaration.Method :< fs, Apply1 Functor fs, Apply1 Foldable fs, Apply1 GAlign fs, Apply1 Eq1 fs) => Term (Union fs) a -> Term (Union fs) a -> Bool
|
||||
equivalentTerms a b | Just (Declaration.Method _ identifierA _ _) <- prj (unwrap a)
|
||||
, Just (Declaration.Method _ identifierB _ _) <- prj (unwrap b)
|
||||
= liftEq equivalentTerms (unwrap identifierA) (unwrap identifierB)
|
||||
| Just aligned <- galignWith (these (const False) (const False) equivalentTerms) (unwrap a) (unwrap b)
|
||||
= and aligned
|
||||
| otherwise = False
|
||||
|
||||
-- | How many nodes to consider for our constant-time approximation to tree edit distance.
|
||||
defaultM :: Integer
|
||||
|
18
src/RWS.hs
18
src/RWS.hs
@ -10,6 +10,7 @@ module RWS (
|
||||
, pqGramDecorator
|
||||
, Gram(..)
|
||||
, defaultD
|
||||
, equalTerms
|
||||
) where
|
||||
|
||||
import Control.Applicative (empty)
|
||||
@ -19,7 +20,7 @@ import Control.Comonad.Trans.Cofree hiding (cofree, runCofree)
|
||||
import Control.Monad.Free
|
||||
import Control.Monad.State.Strict
|
||||
import Data.Foldable
|
||||
import Data.Function ((&), fix, on)
|
||||
import Data.Function ((&), on)
|
||||
import Data.Functor.Foldable
|
||||
import Data.Hashable
|
||||
import Data.List (sortOn)
|
||||
@ -48,7 +49,7 @@ type Label f fields label = forall b. TermF f (Record fields) b -> label
|
||||
-- | A relation on 'Term's, guaranteed constant-time in the size of the 'Term' by parametricity.
|
||||
--
|
||||
-- This is used both to determine whether two root terms can be compared in O(1), and, recursively, to determine whether two nodes are equal in O(n); thus, comparability is defined s.t. two terms are equal if they are recursively comparable subterm-wise.
|
||||
type ComparabilityRelation f fields = forall a b. (a -> b -> Bool) -> TermF f (Record fields) a -> TermF f (Record fields) b -> Bool
|
||||
type ComparabilityRelation f fields = forall a b. TermF f (Record fields) a -> TermF f (Record fields) b -> Bool
|
||||
|
||||
type FeatureVector = UArray Int Double
|
||||
|
||||
@ -65,14 +66,15 @@ data TermOrIndexOrNone term = Term term | Index {-# UNPACK #-} !Int | None
|
||||
rws :: (HasField fields FeatureVector, Functor f, Eq1 f)
|
||||
=> (Diff f fields -> Int)
|
||||
-> ComparabilityRelation f fields
|
||||
-> (Term f (Record fields) -> Term f (Record fields) -> Bool)
|
||||
-> [Term f (Record fields)]
|
||||
-> [Term f (Record fields)]
|
||||
-> RWSEditScript f fields
|
||||
rws _ _ as [] = This <$> as
|
||||
rws _ _ [] bs = That <$> bs
|
||||
rws _ canCompare [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a]
|
||||
rws editDistance canCompare as bs =
|
||||
let sesDiffs = ses (equalTerms canCompare) as bs
|
||||
rws _ _ _ as [] = This <$> as
|
||||
rws _ _ _ [] bs = That <$> bs
|
||||
rws _ canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a]
|
||||
rws editDistance canCompare equivalent as bs =
|
||||
let sesDiffs = ses equivalent as bs
|
||||
(featureAs, featureBs, mappedDiffs, allDiffs) = genFeaturizedTermsAndDiffs sesDiffs
|
||||
(diffs, remaining) = findNearestNeighboursToDiff editDistance canCompare allDiffs featureAs featureBs
|
||||
diffs' = deleteRemaining diffs remaining
|
||||
@ -307,7 +309,7 @@ unitVector d hash = listArray (0, d - 1) ((* invMagnitude) <$> components)
|
||||
|
||||
-- | Test the comparability of two root 'Term's in O(1).
|
||||
canCompareTerms :: ComparabilityRelation f fields -> Term f (Record fields) -> Term f (Record fields) -> Bool
|
||||
canCompareTerms canCompare = fix (\ comparator -> canCompare comparator `on` runCofree)
|
||||
canCompareTerms canCompare = canCompare `on` runCofree
|
||||
|
||||
-- | Recursively test the equality of two 'Term's in O(n).
|
||||
equalTerms :: Eq1 f => ComparabilityRelation f fields -> Term f (Record fields) -> Term f (Record fields) -> Bool
|
||||
|
@ -98,7 +98,7 @@ diffBlobPair renderer blobs = case (renderer, effectiveLanguage) of
|
||||
run parse diff renderer = distributeFor blobs parse >>= diffTermPair blobs diff >>= render renderer
|
||||
|
||||
diffRecursively :: (Declaration.Method :< fs, Apply1 Eq1 fs, Apply1 GAlign fs, Apply1 Show1 fs, Apply1 Foldable fs, Apply1 Functor fs, Apply1 Traversable fs, Apply1 Diffable fs) => Both (Term (Union fs) (Record fields)) -> Diff (Union fs) (Record fields)
|
||||
diffRecursively = decoratingWith constructorNameAndConstantFields (diffTermsWith algorithmForTerms comparableByConstructor)
|
||||
diffRecursively = decoratingWith constructorNameAndConstantFields (diffTermsWith algorithmForTerms comparableByConstructor equivalentTerms)
|
||||
|
||||
-- | A task to diff a pair of 'Term's, producing insertion/deletion 'Patch'es for non-existent 'Blob's.
|
||||
diffTermPair :: Functor f => Both Blob -> Differ f a -> Both (Term f a) -> Task (Diff f a)
|
||||
|
@ -37,12 +37,12 @@ spec = parallel $ do
|
||||
\ (as, bs) -> let tas = decorate <$> (unListableF <$> as :: [SyntaxTerm '[Category]])
|
||||
tbs = decorate <$> (unListableF <$> bs :: [SyntaxTerm '[Category]])
|
||||
root = cofree . ((Program :. Nil) :<) . Indexed
|
||||
diff = wrap (pure (Program :. Nil) :< Indexed (stripDiff . diffThese <$> rws editDistance (const canCompare) tas tbs)) in
|
||||
diff = wrap (pure (Program :. Nil) :< Indexed (stripDiff . diffThese <$> rws editDistance canCompare (equalTerms canCompare) tas tbs)) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (root (stripTerm <$> tas)), Just (root (stripTerm <$> tbs)))
|
||||
|
||||
it "produces unbiased insertions within branches" $
|
||||
let (a, b) = (decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf "a") ])), decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf "b") ]))) in
|
||||
fmap (bimap stripTerm stripTerm) (rws editDistance (const canCompare) [ b ] [ a, b ]) `shouldBe` fmap (bimap stripTerm stripTerm) [ That a, These b b ]
|
||||
fmap (bimap stripTerm stripTerm) (rws editDistance canCompare (equalTerms canCompare) [ b ] [ a, b ]) `shouldBe` fmap (bimap stripTerm stripTerm) [ That a, These b b ]
|
||||
|
||||
where canCompare a b = headF a == headF b
|
||||
|
||||
|
@ -62,7 +62,7 @@ diffFixtures =
|
||||
|
||||
patchOutput = "diff --git a/test/fixtures/ruby/method-declaration.A.rb b/test/fixtures/ruby/method-declaration.B.rb\nindex 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644\n--- a/test/fixtures/ruby/method-declaration.A.rb\n+++ b/test/fixtures/ruby/method-declaration.B.rb\n@@ -1,3 +1,4 @@\n-def foo\n+def bar(a)\n+ baz\n end\n\n"
|
||||
|
||||
jsonOutput = "{\"diff\":{\"after\":{\"category\":\"Program\",\"sourceRange\":[0,21],\"sourceSpan\":{\"start\":[1,1],\"end\":[4,1]}},\"children\":[{\"after\":{\"category\":\"Method\",\"identifier\":\"bar\",\"sourceRange\":[0,20],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,4]}},\"children\":[{\"replace\":[{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}},{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}}]},{\"insert\":{\"category\":\"Params\",\"children\":[{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[8,9],\"sourceSpan\":{\"start\":[1,9],\"end\":[1,10]}}],\"sourceRange\":[7,13],\"sourceSpan\":{\"start\":[1,8],\"end\":[2,3]}}},{\"insert\":{\"category\":\"Identifier\",\"children\":[],\"sourceRange\":[13,16],\"sourceSpan\":{\"start\":[2,3],\"end\":[2,6]}}}],\"before\":{\"category\":\"Method\",\"identifier\":\"foo\",\"sourceRange\":[0,11],\"sourceSpan\":{\"start\":[1,1],\"end\":[2,4]}}}],\"before\":{\"category\":\"Program\",\"sourceRange\":[0,12],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,1]}}},\"oids\":[\"0000000000000000000000000000000000000000\",\"0000000000000000000000000000000000000000\"],\"paths\":[\"test/fixtures/ruby/method-declaration.A.rb\",\"test/fixtures/ruby/method-declaration.B.rb\"]}\n"
|
||||
jsonOutput = "{\"diff\":{\"after\":{\"sourceRange\":[0,21],\"sourceSpan\":{\"start\":[1,1],\"end\":[4,1]}},\"children\":[{\"after\":{\"sourceRange\":[0,20],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,4]}},\"children\":[{\"after\":{\"sourceRange\":[0,3],\"sourceSpan\":{\"start\":[1,1],\"end\":[1,4]}},\"children\":[],\"before\":{\"sourceRange\":[0,3],\"sourceSpan\":{\"start\":[1,1],\"end\":[1,4]}}},{\"replace\":[{\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}},{\"children\":[],\"sourceRange\":[4,7],\"sourceSpan\":{\"start\":[1,5],\"end\":[1,8]}}]},{\"insert\":{\"children\":[],\"sourceRange\":[8,9],\"sourceSpan\":{\"start\":[1,9],\"end\":[1,10]}}},{\"after\":{\"sourceRange\":[13,16],\"sourceSpan\":{\"start\":[2,3],\"end\":[2,6]}},\"children\":[{\"insert\":{\"children\":[],\"sourceRange\":[13,16],\"sourceSpan\":{\"start\":[2,3],\"end\":[2,6]}}}],\"before\":{\"sourceRange\":[8,11],\"sourceSpan\":{\"start\":[2,1],\"end\":[2,4]}}}],\"before\":{\"sourceRange\":[0,11],\"sourceSpan\":{\"start\":[1,1],\"end\":[2,4]}}}],\"before\":{\"sourceRange\":[0,12],\"sourceSpan\":{\"start\":[1,1],\"end\":[3,1]}}},\"oids\":[\"0000000000000000000000000000000000000000\",\"0000000000000000000000000000000000000000\"],\"paths\":[\"test/fixtures/ruby/method-declaration.A.rb\",\"test/fixtures/ruby/method-declaration.B.rb\"]}\n"
|
||||
sExpressionOutput = "(Program\n (Method\n (Empty)\n { (Identifier)\n ->(Identifier) }\n {+(Identifier)+}\n (\n {+(Identifier)+})))\n"
|
||||
tocOutput = "{\"changes\":{\"test/fixtures/ruby/method-declaration.A.rb -> test/fixtures/ruby/method-declaration.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"}]},\"errors\":{}}\n"
|
||||
|
||||
|
@ -152,12 +152,12 @@ spec = parallel $ do
|
||||
it "produces JSON output" $ do
|
||||
blobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb")
|
||||
output <- runTask (diffBlobPair ToCDiffRenderer blobs)
|
||||
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}\n" :: ByteString)
|
||||
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"}]},\"errors\":{}}\n" :: ByteString)
|
||||
|
||||
it "produces JSON output if there are parse errors" $ do
|
||||
blobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")
|
||||
output <- runTask (diffBlobPair ToCDiffRenderer blobs)
|
||||
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"test/fixtures/toc/ruby/methods.X.rb:1:1-3:1: error: expected Program, but got ParseError\\nsymbol (src/Language/Ruby/Syntax.hs:85:41-85:55)\\n\",\"language\":\"Ruby\"}]}}\n" :: ByteString)
|
||||
toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"test/fixtures/toc/ruby/methods.X.rb:1:1-3:1: error: expected Program, but got ParseError\\nsymbol (src/Language/Ruby/Syntax.hs:86:41-86:55)\\n\",\"language\":\"Ruby\"}]}}\n" :: ByteString)
|
||||
|
||||
it "summarizes Markdown headings" $ do
|
||||
blobs <- blobsForPaths (both "markdown/headings.A.md" "markdown/headings.B.md")
|
||||
|
Loading…
Reference in New Issue
Block a user