mirror of
https://github.com/github/semantic.git
synced 2024-12-23 23:11:50 +03:00
Merge pull request #689 from github/unbiased-branch-diffs
Unbiased branch diffs
This commit is contained in:
commit
b06cb4695e
@ -35,22 +35,28 @@ rws compare as bs
|
||||
| null as, null bs = []
|
||||
| null as = inserting <$> bs
|
||||
| null bs = deleting <$> as
|
||||
| otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas)) $ traverse findNearestNeighbourTo fbs
|
||||
| otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas, fbs)) $ traverse findNearestNeighbourTo fbs
|
||||
where fas = zipWith featurize [0..] as
|
||||
fbs = zipWith featurize [0..] bs
|
||||
kdas = KdTree.build (Vector.toList . feature) fas
|
||||
kdbs = KdTree.build (Vector.toList . feature) fbs
|
||||
featurize index term = UnmappedTerm index (getField (extract term)) term
|
||||
findNearestNeighbourTo kv@(UnmappedTerm _ _ v) = do
|
||||
(previous, unmapped) <- get
|
||||
let UnmappedTerm i _ _ = KdTree.nearest kdas kv
|
||||
fromMaybe (pure (negate 1, inserting v)) $ do
|
||||
found <- find ((== i) . termIndex) unmapped
|
||||
findNearestNeighbourTo kv@(UnmappedTerm j _ b) = do
|
||||
(previous, unmappedA, unmappedB) <- get
|
||||
fromMaybe (insertion previous unmappedA unmappedB kv) $ do
|
||||
foundA@(UnmappedTerm i _ a) <- nearestUnmapped unmappedA kdas kv
|
||||
foundB@(UnmappedTerm j' _ _) <- nearestUnmapped unmappedB kdbs foundA
|
||||
guard (j == j')
|
||||
guard (i >= previous)
|
||||
compared <- compare (term found) v
|
||||
compared <- compare a b
|
||||
pure $! do
|
||||
put (i, List.delete found unmapped)
|
||||
put (i, List.delete foundA unmappedA, List.delete foundB unmappedB)
|
||||
pure (i, compared)
|
||||
deleteRemaining diffs (_, unmapped) = foldl' (flip (List.insertBy (comparing fst))) diffs ((termIndex &&& deleting . term) <$> unmapped)
|
||||
nearestUnmapped unmapped tree key = find ((== termIndex (KdTree.nearest tree key)) . termIndex) unmapped
|
||||
insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do
|
||||
put (previous, unmappedA, List.delete kv unmappedB)
|
||||
pure (negate 1, inserting b)
|
||||
deleteRemaining diffs (_, unmappedA, _) = foldl' (flip (List.insertBy (comparing fst))) diffs ((termIndex &&& deleting . term) <$> unmappedA)
|
||||
|
||||
-- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index.
|
||||
data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature :: !(Vector.Vector Double), term :: !a }
|
||||
@ -96,9 +102,11 @@ featureVectorDecorator getLabel p q d
|
||||
cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor))
|
||||
. pqGramDecorator getLabel p q
|
||||
|
||||
-- | Strips the head annotation off a term annotated with non-empty records.
|
||||
stripTerm :: Functor f => Cofree f (Record (h ': t)) -> Cofree f (Record t)
|
||||
stripTerm = fmap rtail
|
||||
|
||||
-- | Strips the head annotation off a diff annotated with non-empty records.
|
||||
stripDiff :: (Functor f, Functor g) => Free (CofreeF f (g (Record (h ': t)))) (Patch (Cofree f (Record (h ': t)))) -> Free (CofreeF f (g (Record t))) (Patch (Cofree f (Record t)))
|
||||
stripDiff = iter (\ (h :< f) -> wrap (fmap rtail h :< f)) . fmap (pure . fmap stripTerm)
|
||||
|
||||
|
@ -45,30 +45,30 @@ diffComparableTerms construct comparable cost = recur
|
||||
-- | Construct an algorithm to diff a pair of terms.
|
||||
algorithmWithTerms :: (TermF leaf (Both a) diff -> diff) -> Term leaf a -> Term leaf a -> Algorithm (Term leaf a) diff diff
|
||||
algorithmWithTerms construct t1 t2 = case (unwrap t1, unwrap t2) of
|
||||
(Indexed a, Indexed b) -> byIndex Indexed a b
|
||||
(Indexed a, Indexed b) -> branch Indexed a b
|
||||
(S.FunctionCall identifierA argsA, S.FunctionCall identifierB argsB) -> do
|
||||
identifier <- recursively identifierA identifierB
|
||||
byIndex (S.FunctionCall identifier) argsA argsB
|
||||
branch (S.FunctionCall identifier) argsA argsB
|
||||
(S.Switch exprA casesA, S.Switch exprB casesB) -> do
|
||||
expr <- recursively exprA exprB
|
||||
byIndex (S.Switch expr) casesA casesB
|
||||
(S.Object a, S.Object b) -> byIndex S.Object a b
|
||||
branch (S.Switch expr) casesA casesB
|
||||
(S.Object a, S.Object b) -> branch S.Object a b
|
||||
(Commented commentsA a, Commented commentsB b) -> do
|
||||
wrapped <- sequenceA (recursively <$> a <*> b)
|
||||
byIndex (`Commented` wrapped) commentsA commentsB
|
||||
(Array a, Array b) -> byIndex Array a b
|
||||
branch (`Commented` wrapped) commentsA commentsB
|
||||
(Array a, Array b) -> branch Array a b
|
||||
(S.Class identifierA paramsA expressionsA, S.Class identifierB paramsB expressionsB) -> do
|
||||
identifier <- recursively identifierA identifierB
|
||||
params <- sequenceA (recursively <$> paramsA <*> paramsB)
|
||||
byIndex (S.Class identifier params) expressionsA expressionsB
|
||||
branch (S.Class identifier params) expressionsA expressionsB
|
||||
(S.Method identifierA paramsA expressionsA, S.Method identifierB paramsB expressionsB) -> do
|
||||
identifier <- recursively identifierA identifierB
|
||||
params <- Algorithm.byIndex paramsA paramsB
|
||||
expressions <- Algorithm.byIndex expressionsA expressionsB
|
||||
params <- bySimilarity paramsA paramsB
|
||||
expressions <- bySimilarity expressionsA expressionsB
|
||||
annotate $! S.Method identifier params expressions
|
||||
_ -> recursively t1 t2
|
||||
where annotate = pure . construct . (both (extract t1) (extract t2) :<)
|
||||
byIndex constructor a b = Algorithm.byIndex a b >>= annotate . constructor
|
||||
branch constructor a b = bySimilarity a b >>= annotate . constructor
|
||||
|
||||
-- | Run an algorithm, given functions characterizing the evaluation.
|
||||
runAlgorithm :: (Functor f, GAlign f, Eq a, Eq (Record fields), Eq (f (Cofree f (Record fields))), Prologue.Foldable f, Traversable f, HasField fields (Vector.Vector Double))
|
||||
|
@ -36,10 +36,9 @@ spec = parallel $ do
|
||||
\ (as, bs) -> let tas = toTerm' <$> (as :: [ArbitraryTerm Text (Record '[Category])])
|
||||
tbs = toTerm' <$> (bs :: [ArbitraryTerm Text (Record '[Category])])
|
||||
root = cofree . ((Program .: RNil) :<) . Indexed
|
||||
diff = free (Free (pure (Program .: RNil) :< Indexed (stripDiff <$> rws compare tas tbs))) in
|
||||
diff = wrap (pure (Program .: RNil) :< Indexed (stripDiff <$> rws compare tas tbs)) in
|
||||
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (root (stripTerm <$> tas)), Just (root (stripTerm <$> tbs)))
|
||||
|
||||
it "produces unbiased insertions within branches" $
|
||||
pendingWith "Currently fails due to https://github.com/github/semantic-diff/issues/683"
|
||||
-- let (a, b) = (decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "a") ])), decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "b") ]))) in
|
||||
-- fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, replacing b b ]
|
||||
let (a, b) = (decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "a") ])), decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "b") ]))) in
|
||||
fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, replacing b b ]
|
||||
|
Loading…
Reference in New Issue
Block a user