1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 23:11:50 +03:00

Merge pull request #689 from github/unbiased-branch-diffs

Unbiased branch diffs
This commit is contained in:
Josh Vera 2016-08-15 15:21:06 -04:00 committed by GitHub
commit b06cb4695e
6 changed files with 30 additions and 23 deletions

View File

@ -35,22 +35,28 @@ rws compare as bs
| null as, null bs = []
| null as = inserting <$> bs
| null bs = deleting <$> as
| otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas)) $ traverse findNearestNeighbourTo fbs
| otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas, fbs)) $ traverse findNearestNeighbourTo fbs
where fas = zipWith featurize [0..] as
fbs = zipWith featurize [0..] bs
kdas = KdTree.build (Vector.toList . feature) fas
kdbs = KdTree.build (Vector.toList . feature) fbs
featurize index term = UnmappedTerm index (getField (extract term)) term
findNearestNeighbourTo kv@(UnmappedTerm _ _ v) = do
(previous, unmapped) <- get
let UnmappedTerm i _ _ = KdTree.nearest kdas kv
fromMaybe (pure (negate 1, inserting v)) $ do
found <- find ((== i) . termIndex) unmapped
findNearestNeighbourTo kv@(UnmappedTerm j _ b) = do
(previous, unmappedA, unmappedB) <- get
fromMaybe (insertion previous unmappedA unmappedB kv) $ do
foundA@(UnmappedTerm i _ a) <- nearestUnmapped unmappedA kdas kv
foundB@(UnmappedTerm j' _ _) <- nearestUnmapped unmappedB kdbs foundA
guard (j == j')
guard (i >= previous)
compared <- compare (term found) v
compared <- compare a b
pure $! do
put (i, List.delete found unmapped)
put (i, List.delete foundA unmappedA, List.delete foundB unmappedB)
pure (i, compared)
deleteRemaining diffs (_, unmapped) = foldl' (flip (List.insertBy (comparing fst))) diffs ((termIndex &&& deleting . term) <$> unmapped)
nearestUnmapped unmapped tree key = find ((== termIndex (KdTree.nearest tree key)) . termIndex) unmapped
insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do
put (previous, unmappedA, List.delete kv unmappedB)
pure (negate 1, inserting b)
deleteRemaining diffs (_, unmappedA, _) = foldl' (flip (List.insertBy (comparing fst))) diffs ((termIndex &&& deleting . term) <$> unmappedA)
-- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index.
data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature :: !(Vector.Vector Double), term :: !a }
@ -96,9 +102,11 @@ featureVectorDecorator getLabel p q d
cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor))
. pqGramDecorator getLabel p q
-- | Strips the head annotation off a term annotated with non-empty records.
stripTerm :: Functor f => Cofree f (Record (h ': t)) -> Cofree f (Record t)
stripTerm = fmap rtail
-- | Strips the head annotation off a diff annotated with non-empty records.
stripDiff :: (Functor f, Functor g) => Free (CofreeF f (g (Record (h ': t)))) (Patch (Cofree f (Record (h ': t)))) -> Free (CofreeF f (g (Record t))) (Patch (Cofree f (Record t)))
stripDiff = iter (\ (h :< f) -> wrap (fmap rtail h :< f)) . fmap (pure . fmap stripTerm)

View File

@ -45,30 +45,30 @@ diffComparableTerms construct comparable cost = recur
-- | Construct an algorithm to diff a pair of terms.
algorithmWithTerms :: (TermF leaf (Both a) diff -> diff) -> Term leaf a -> Term leaf a -> Algorithm (Term leaf a) diff diff
algorithmWithTerms construct t1 t2 = case (unwrap t1, unwrap t2) of
(Indexed a, Indexed b) -> byIndex Indexed a b
(Indexed a, Indexed b) -> branch Indexed a b
(S.FunctionCall identifierA argsA, S.FunctionCall identifierB argsB) -> do
identifier <- recursively identifierA identifierB
byIndex (S.FunctionCall identifier) argsA argsB
branch (S.FunctionCall identifier) argsA argsB
(S.Switch exprA casesA, S.Switch exprB casesB) -> do
expr <- recursively exprA exprB
byIndex (S.Switch expr) casesA casesB
(S.Object a, S.Object b) -> byIndex S.Object a b
branch (S.Switch expr) casesA casesB
(S.Object a, S.Object b) -> branch S.Object a b
(Commented commentsA a, Commented commentsB b) -> do
wrapped <- sequenceA (recursively <$> a <*> b)
byIndex (`Commented` wrapped) commentsA commentsB
(Array a, Array b) -> byIndex Array a b
branch (`Commented` wrapped) commentsA commentsB
(Array a, Array b) -> branch Array a b
(S.Class identifierA paramsA expressionsA, S.Class identifierB paramsB expressionsB) -> do
identifier <- recursively identifierA identifierB
params <- sequenceA (recursively <$> paramsA <*> paramsB)
byIndex (S.Class identifier params) expressionsA expressionsB
branch (S.Class identifier params) expressionsA expressionsB
(S.Method identifierA paramsA expressionsA, S.Method identifierB paramsB expressionsB) -> do
identifier <- recursively identifierA identifierB
params <- Algorithm.byIndex paramsA paramsB
expressions <- Algorithm.byIndex expressionsA expressionsB
params <- bySimilarity paramsA paramsB
expressions <- bySimilarity expressionsA expressionsB
annotate $! S.Method identifier params expressions
_ -> recursively t1 t2
where annotate = pure . construct . (both (extract t1) (extract t2) :<)
byIndex constructor a b = Algorithm.byIndex a b >>= annotate . constructor
branch constructor a b = bySimilarity a b >>= annotate . constructor
-- | Run an algorithm, given functions characterizing the evaluation.
runAlgorithm :: (Functor f, GAlign f, Eq a, Eq (Record fields), Eq (f (Cofree f (Record fields))), Prologue.Foldable f, Traversable f, HasField fields (Vector.Vector Double))

View File

@ -36,10 +36,9 @@ spec = parallel $ do
\ (as, bs) -> let tas = toTerm' <$> (as :: [ArbitraryTerm Text (Record '[Category])])
tbs = toTerm' <$> (bs :: [ArbitraryTerm Text (Record '[Category])])
root = cofree . ((Program .: RNil) :<) . Indexed
diff = free (Free (pure (Program .: RNil) :< Indexed (stripDiff <$> rws compare tas tbs))) in
diff = wrap (pure (Program .: RNil) :< Indexed (stripDiff <$> rws compare tas tbs)) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (root (stripTerm <$> tas)), Just (root (stripTerm <$> tbs)))
it "produces unbiased insertions within branches" $
pendingWith "Currently fails due to https://github.com/github/semantic-diff/issues/683"
-- let (a, b) = (decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "a") ])), decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "b") ]))) in
-- fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, replacing b b ]
let (a, b) = (decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "a") ])), decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "b") ]))) in
fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, replacing b b ]