1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 06:11:49 +03:00

SES produces edit scripts in These.

This commit is contained in:
Rob Rix 2017-02-23 14:35:40 -05:00
parent 8237a6aafd
commit 98e9da9249
2 changed files with 24 additions and 29 deletions

View File

@ -71,17 +71,18 @@ rws compare canCompare as bs
where
minimumTermIndex = pred . maybe 0 getMin . getOption . foldMap (Option . Just . Min . termIndex)
sesDiffs = SES.ses replaceIfEqual cost as bs
sesDiffs = SES.ses (gliftEq (==) `on` fmap category) cost as bs
(featurizedAs, featurizedBs, _, _, countersAndDiffs, allDiffs) =
foldl' (\(as, bs, counterA, counterB, diffs, allDiffs) diff -> case runFree diff of
Pure (Delete term) ->
foldl' (\(as, bs, counterA, counterB, diffs, allDiffs) diff -> case diff of
This term ->
(as <> pure (featurize counterA term), bs, succ counterA, counterB, diffs, allDiffs <> pure None)
Pure (Insert term) ->
That term ->
(as, bs <> pure (featurize counterB term), counterA, succ counterB, diffs, allDiffs <> pure (Term (featurize counterB term)))
_ ->
(as, bs, succ counterA, succ counterB, diffs <> pure (These counterA counterB, diff), allDiffs <> pure (Index counterA))
These a b ->
(as, bs, succ counterA, succ counterB, diffs <> pure (These counterA counterB, getDiff a b), allDiffs <> pure (Index counterA))
) ([], [], 0, 0, [], []) sesDiffs
getDiff a b = let Just diff = hylo wrap runCofree <$> zipTerms (eraseFeatureVector a) (eraseFeatureVector b) in diff
findNearestNeighbourToDiff :: TermOrIndexOrNone (UnmappedTerm f fields)
-> State (Int, UnmappedTerms f fields, UnmappedTerms f fields)
@ -153,13 +154,7 @@ rws compare canCompare as bs
diffs
((termIndex &&& compare . This . term) <$> unmappedA)
-- Possibly replace terms in a diff.
replaceIfEqual :: Term f (Record fields) -> Term f (Record fields) -> Maybe (Diff f (Record fields))
replaceIfEqual a b
| gliftEq (==) (category <$> a) (category <$> b) = hylo wrap runCofree <$> zipTerms (eraseFeatureVector a) (eraseFeatureVector b)
| otherwise = Nothing
cost = iter (const 0) . (1 <$)
cost = these (const 1) (const 1) (const (const 0))
kdas = KdTree.build (elems . feature) featurizedAs
kdbs = KdTree.build (elems . feature) featurizedBs

View File

@ -2,24 +2,24 @@
module SES where
import qualified Data.Map as Map
import Patch
import Data.These
import Prologue
-- | Edit constructor for two terms, if comparable. Otherwise returns Nothing.
type Compare term edit = term -> term -> Maybe edit
type Comparable term = term -> term -> Bool
-- | A function that computes the cost of an edit.
type Cost edit = edit -> Int
type Cost term = These term term -> Int
-- | Find the shortest edit script (diff) between two terms given a function to compute the cost.
ses :: Applicative edit => Compare term (edit (Patch term)) -> Cost (edit (Patch term)) -> [term] -> [term] -> [edit (Patch term)]
ses diffTerms cost as bs = fst <$> evalState diffState Map.empty where
diffState = diffAt diffTerms cost (0, 0) as bs
ses :: Comparable term -> Cost term -> [term] -> [term] -> [These term term]
ses canCompare cost as bs = fst <$> evalState diffState Map.empty where
diffState = diffAt canCompare cost (0, 0) as bs
-- | Find the shortest edit script between two terms at a given vertex in the edit graph.
diffAt :: Applicative edit => Compare term (edit (Patch term)) -> Cost (edit (Patch term)) -> (Int, Int) -> [term] -> [term] -> State (Map.Map (Int, Int) [(edit (Patch term), Int)]) [(edit (Patch term), Int)]
diffAt diffTerms cost (i, j) as bs
diffAt :: Comparable term -> Cost term -> (Int, Int) -> [term] -> [term] -> State (Map.Map (Int, Int) [(These term term, Int)]) [(These term term, Int)]
diffAt canCompare cost (i, j) as bs
| (a : as) <- as, (b : bs) <- bs = do
cachedDiffs <- get
case Map.lookup (i, j) cachedDiffs of
@ -27,11 +27,11 @@ diffAt diffTerms cost (i, j) as bs
Nothing -> do
down <- recur (i, succ j) as (b : bs)
right <- recur (succ i, j) (a : as) bs
nomination <- best <$> case diffTerms a b of
Just diff -> do
nomination <- best <$> if canCompare a b
then do
diagonal <- recur (succ i, succ j) as bs
pure [ delete a down, insert b right, consWithCost cost diff diagonal ]
Nothing -> pure [ delete a down, insert b right ]
pure [ delete a down, insert b right, consWithCost cost (These a b) diagonal ]
else pure [ delete a down, insert b right ]
cachedDiffs' <- get
put $ Map.insert (i, j) nomination cachedDiffs'
pure nomination
@ -39,13 +39,13 @@ diffAt diffTerms cost (i, j) as bs
| null bs = pure $ foldr delete [] as
| otherwise = pure []
where
delete = consWithCost cost . deleting
insert = consWithCost cost . inserting
delete = consWithCost cost . This
insert = consWithCost cost . That
costOf [] = 0
costOf ((_, c) : _) = c
best = minimumBy (comparing costOf)
recur = diffAt diffTerms cost
recur = diffAt canCompare cost
-- | Prepend an edit script and the cumulative cost onto the edit script.
consWithCost :: Cost edit -> edit -> [(edit, Int)] -> [(edit, Int)]
consWithCost :: Cost term -> These term term -> [(These term term, Int)] -> [(These term term, Int)]
consWithCost cost edit rest = (edit, cost edit + maybe 0 snd (fst <$> uncons rest)) : rest