1
1
mirror of https://github.com/github/semantic.git synced 2024-12-29 01:42:43 +03:00

🔥 diffAt, Cost, &c.

This commit is contained in:
Rob Rix 2017-03-24 14:12:14 -04:00
parent fedacdc7d5
commit 66706c31de
2 changed files with 7 additions and 50 deletions

View File

@ -33,7 +33,7 @@ import Diff
import Info
import Patch
import Prologue as P
import qualified SES
import SES
import System.Random.Mersenne.Pure64
import Term (Term, TermF)
@ -47,7 +47,7 @@ type Label f fields label = forall b. TermF f (Record fields) b -> label
-- This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
rws :: forall f fields.
(GAlign f, Traversable f, Eq1 f, HasField fields Category, HasField fields (Maybe FeatureVector))
=> SES.Cost (Term f (Record fields)) -- ^ A function computes a constant-time approximation to the edit distance between two terms.
=> (These (Term f (Record fields)) (Term f (Record fields)) -> Int) -- ^ A function computes a constant-time approximation to the edit distance between two terms.
-> (Term f (Record fields) -> Term f (Record fields) -> Bool) -- ^ A relation determining whether two terms can be compared.
-> [Term f (Record fields)] -- ^ The list of old terms.
-> [Term f (Record fields)] -- ^ The list of new terms.
@ -69,7 +69,7 @@ rws editDistance canCompare as bs
where
minimumTermIndex = pred . maybe 0 getMin . getOption . foldMap (Option . Just . Min . termIndex)
sesDiffs = SES.ses (gliftEq (==) `on` fmap category) cost as bs
sesDiffs = ses (gliftEq (==) `on` fmap category) as bs
(featurizedAs, featurizedBs, _, _, countersAndDiffs, allDiffs) =
foldl' (\(as, bs, counterA, counterB, diffs, allDiffs) diff -> case diff of
@ -151,8 +151,6 @@ rws editDistance canCompare as bs
diffs
((termIndex &&& This . term) <$> unmappedA)
cost = these (const 1) (const 1) (const (const 0))
kdas = KdTree.build (elems . feature) featurizedAs
kdbs = KdTree.build (elems . feature) featurizedBs

View File

@ -1,52 +1,11 @@
{-# LANGUAGE Strict #-}
module SES where
module SES
( Comparable
, Myers.ses
) where
import Data.Array.MArray
import Data.Array.ST
import Data.These
import Prologue
import qualified SES.Myers as Myers
-- | Edit constructor for two terms, if comparable. Otherwise returns Nothing.
type Comparable term = term -> term -> Bool
-- | A function that computes the cost of an edit.
type Cost term = These term term -> Int
-- | Find the shortest edit script (diff) between two terms given a function to compute the cost.
ses :: Comparable term -> Cost term -> [term] -> [term] -> [These term term]
ses canCompare _ as bs = Myers.ses canCompare as bs
-- | Find the shortest edit script between two terms at a given vertex in the edit graph.
diffAt :: STArray s (Int, Int) (Maybe [(These term term, Int)]) -> Comparable term -> Cost term -> (Int, Int) -> [term] -> [term] -> ST s [(These term term, Int)]
diffAt array canCompare cost (i, j) as bs
| (a : as') <- as, (b : bs') <- bs = do
maybeDiff <- readArray array (i, j)
case maybeDiff of
Just diffs -> pure diffs
Nothing -> do
down <- recur (i, succ j) as' bs
right <- recur (succ i, j) as bs'
nomination <- best <$> if canCompare a b
then do
diagonal <- recur (succ i, succ j) as' bs'
pure [ delete a down, insert b right, consWithCost cost (These a b) diagonal ]
else pure [ delete a down, insert b right ]
writeArray array (i, j) (Just nomination)
pure nomination
| null as = pure $ foldr insert [] bs
| null bs = pure $ foldr delete [] as
| otherwise = pure []
where
delete = consWithCost cost . This
insert = consWithCost cost . That
costOf [] = 0
costOf ((_, c) : _) = c
best = minimumBy (comparing costOf)
recur = diffAt array canCompare cost
-- | Prepend an edit script and the cumulative cost onto the edit script.
consWithCost :: Cost term -> These term term -> [(These term term, Int)] -> [(These term term, Int)]
consWithCost cost edit rest = (edit, cost edit + maybe 0 snd (fst <$> uncons rest)) : rest