mirror of
https://github.com/github/semantic.git
synced 2024-12-29 01:42:43 +03:00
🔥 diffAt, Cost, &c.
This commit is contained in:
parent
fedacdc7d5
commit
66706c31de
@ -33,7 +33,7 @@ import Diff
|
||||
import Info
|
||||
import Patch
|
||||
import Prologue as P
|
||||
import qualified SES
|
||||
import SES
|
||||
import System.Random.Mersenne.Pure64
|
||||
import Term (Term, TermF)
|
||||
|
||||
@ -47,7 +47,7 @@ type Label f fields label = forall b. TermF f (Record fields) b -> label
|
||||
-- This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
|
||||
rws :: forall f fields.
|
||||
(GAlign f, Traversable f, Eq1 f, HasField fields Category, HasField fields (Maybe FeatureVector))
|
||||
=> SES.Cost (Term f (Record fields)) -- ^ A function computes a constant-time approximation to the edit distance between two terms.
|
||||
=> (These (Term f (Record fields)) (Term f (Record fields)) -> Int) -- ^ A function computes a constant-time approximation to the edit distance between two terms.
|
||||
-> (Term f (Record fields) -> Term f (Record fields) -> Bool) -- ^ A relation determining whether two terms can be compared.
|
||||
-> [Term f (Record fields)] -- ^ The list of old terms.
|
||||
-> [Term f (Record fields)] -- ^ The list of new terms.
|
||||
@ -69,7 +69,7 @@ rws editDistance canCompare as bs
|
||||
|
||||
where
|
||||
minimumTermIndex = pred . maybe 0 getMin . getOption . foldMap (Option . Just . Min . termIndex)
|
||||
sesDiffs = SES.ses (gliftEq (==) `on` fmap category) cost as bs
|
||||
sesDiffs = ses (gliftEq (==) `on` fmap category) as bs
|
||||
|
||||
(featurizedAs, featurizedBs, _, _, countersAndDiffs, allDiffs) =
|
||||
foldl' (\(as, bs, counterA, counterB, diffs, allDiffs) diff -> case diff of
|
||||
@ -151,8 +151,6 @@ rws editDistance canCompare as bs
|
||||
diffs
|
||||
((termIndex &&& This . term) <$> unmappedA)
|
||||
|
||||
cost = these (const 1) (const 1) (const (const 0))
|
||||
|
||||
kdas = KdTree.build (elems . feature) featurizedAs
|
||||
kdbs = KdTree.build (elems . feature) featurizedBs
|
||||
|
||||
|
49
src/SES.hs
49
src/SES.hs
@ -1,52 +1,11 @@
|
||||
{-# LANGUAGE Strict #-}
|
||||
module SES where
|
||||
module SES
|
||||
( Comparable
|
||||
, Myers.ses
|
||||
) where
|
||||
|
||||
import Data.Array.MArray
|
||||
import Data.Array.ST
|
||||
import Data.These
|
||||
import Prologue
|
||||
import qualified SES.Myers as Myers
|
||||
|
||||
|
||||
-- | Edit constructor for two terms, if comparable. Otherwise returns Nothing.
|
||||
type Comparable term = term -> term -> Bool
|
||||
|
||||
-- | A function that computes the cost of an edit.
|
||||
type Cost term = These term term -> Int
|
||||
|
||||
-- | Find the shortest edit script (diff) between two terms given a function to compute the cost.
|
||||
ses :: Comparable term -> Cost term -> [term] -> [term] -> [These term term]
|
||||
ses canCompare _ as bs = Myers.ses canCompare as bs
|
||||
|
||||
|
||||
-- | Find the shortest edit script between two terms at a given vertex in the edit graph.
|
||||
diffAt :: STArray s (Int, Int) (Maybe [(These term term, Int)]) -> Comparable term -> Cost term -> (Int, Int) -> [term] -> [term] -> ST s [(These term term, Int)]
|
||||
diffAt array canCompare cost (i, j) as bs
|
||||
| (a : as') <- as, (b : bs') <- bs = do
|
||||
maybeDiff <- readArray array (i, j)
|
||||
case maybeDiff of
|
||||
Just diffs -> pure diffs
|
||||
Nothing -> do
|
||||
down <- recur (i, succ j) as' bs
|
||||
right <- recur (succ i, j) as bs'
|
||||
nomination <- best <$> if canCompare a b
|
||||
then do
|
||||
diagonal <- recur (succ i, succ j) as' bs'
|
||||
pure [ delete a down, insert b right, consWithCost cost (These a b) diagonal ]
|
||||
else pure [ delete a down, insert b right ]
|
||||
writeArray array (i, j) (Just nomination)
|
||||
pure nomination
|
||||
| null as = pure $ foldr insert [] bs
|
||||
| null bs = pure $ foldr delete [] as
|
||||
| otherwise = pure []
|
||||
where
|
||||
delete = consWithCost cost . This
|
||||
insert = consWithCost cost . That
|
||||
costOf [] = 0
|
||||
costOf ((_, c) : _) = c
|
||||
best = minimumBy (comparing costOf)
|
||||
recur = diffAt array canCompare cost
|
||||
|
||||
-- | Prepend an edit script and the cumulative cost onto the edit script.
|
||||
consWithCost :: Cost term -> These term term -> [(These term term, Int)] -> [(These term term, Int)]
|
||||
consWithCost cost edit rest = (edit, cost edit + maybe 0 snd (fst <$> uncons rest)) : rest
|
||||
|
Loading…
Reference in New Issue
Block a user