mirror of
https://github.com/github/semantic.git
synced 2024-11-29 02:44:36 +03:00
📝 all the things.
This commit is contained in:
parent
70b825ecbb
commit
7f13ef23a2
@ -13,6 +13,7 @@ import GHC.Stack
|
||||
import Prologue hiding (for, State, error)
|
||||
import Text.Show (showListWith)
|
||||
|
||||
-- | Operations in Myers’ algorithm.
|
||||
data MyersF a b result where
|
||||
SES :: MyersF a b (EditScript a b)
|
||||
LCS :: MyersF a b [(a, b)]
|
||||
@ -28,39 +29,48 @@ data MyersF a b result where
|
||||
|
||||
Slide :: Endpoint a b -> MyersF a b (Endpoint a b)
|
||||
|
||||
-- | An edit script, i.e. a sequence of changes/copies of elements.
|
||||
type EditScript a b = [These a b]
|
||||
|
||||
-- | Steps in the execution of Myers’ algorithm, i.e. the sum of MyersF and State.
|
||||
data StepF a b result where
|
||||
M :: HasCallStack => MyersF a b c -> StepF a b c
|
||||
S :: State (MyersState a b) c -> StepF a b c
|
||||
|
||||
type Myers a b = Freer (StepF a b)
|
||||
|
||||
-- | Notionally the cartesian product of two sequences, represented as a simple wrapper around those arrays holding those sequences’ elements for O(1) lookups.
|
||||
data EditGraph a b = EditGraph { as :: !(Array.Array Int a), bs :: !(Array.Array Int b) }
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- | Construct an edit graph from Foldable sequences.
|
||||
makeEditGraph :: (Foldable t, Foldable u) => t a -> u b -> EditGraph a b
|
||||
makeEditGraph as bs = EditGraph (Array.listArray (0, pred (length as)) (toList as)) (Array.listArray (0, pred (length bs)) (toList bs))
|
||||
|
||||
-- | An edit distance, i.e. a cardinal number of changes.
|
||||
newtype Distance = Distance { unDistance :: Int }
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- | A diagonal in the edit graph of lists of lengths n and m, numbered from -m to n.
|
||||
newtype Diagonal = Diagonal { unDiagonal :: Int }
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- | The endpoint of a path through the edit graph, represented as the x/y indices and the script of edits made to get to that point.
|
||||
data Endpoint a b = Endpoint { x :: !Int, y :: !Int, script :: !(EditScript a b) }
|
||||
deriving (Eq, Show)
|
||||
|
||||
|
||||
-- API
|
||||
|
||||
-- | Compute the shortest edit script using Myers’ algorithm.
|
||||
ses :: (HasCallStack, Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript a b
|
||||
ses eq as bs = runMyers eq (makeEditGraph as bs) (M SES `Then` return)
|
||||
|
||||
|
||||
-- Evaluation
|
||||
|
||||
runMyers :: forall a b c. HasCallStack => (a -> b -> Bool) -> EditGraph a b ->Myers a b c -> c
|
||||
-- | Fully evaluate an operation in Myers’ algorithm given a comparator function and an edit graph.
|
||||
runMyers :: forall a b c. HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Myers a b c -> c
|
||||
runMyers eq graph step = evalState (go step) (emptyStateForGraph graph)
|
||||
where go :: forall c. Myers a b c -> StateT (MyersState a b) Identity c
|
||||
go = iterFreerA algebra
|
||||
@ -70,7 +80,8 @@ runMyers eq graph step = evalState (go step) (emptyStateForGraph graph)
|
||||
S Get -> get >>= cont
|
||||
S (Put s) -> put s >>= cont
|
||||
|
||||
runMyersSteps :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->Myers a b c -> [(MyersState a b, Myers a b c)]
|
||||
-- | Fully evaluate an operation in Myers’ algorithm given a comparator function and an edit graph, returning a list of states and next steps.
|
||||
runMyersSteps :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Myers a b c -> [(MyersState a b, Myers a b c)]
|
||||
runMyersSteps eq graph = go (emptyStateForGraph graph)
|
||||
where go state step = let ?callStack = popCallStack callStack in prefix state step $ case runMyersStep eq graph state step of
|
||||
Left result -> [ (state, return result) ]
|
||||
@ -79,7 +90,8 @@ runMyersSteps eq graph = go (emptyStateForGraph graph)
|
||||
Then (M _) _ -> ((state, step) :)
|
||||
_ -> identity
|
||||
|
||||
runMyersStep :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->MyersState a b -> Myers a b c -> Either c (MyersState a b, Myers a b c)
|
||||
-- | Evaluate one step in Myers’ algorithm given a comparator function and an edit graph, returning Either the final result, or the next state and step.
|
||||
runMyersStep :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> MyersState a b -> Myers a b c -> Either c (MyersState a b, Myers a b c)
|
||||
runMyersStep eq graph state step = let ?callStack = popCallStack callStack in case step of
|
||||
Return a -> Left a
|
||||
Then step cont -> case step of
|
||||
@ -89,7 +101,10 @@ runMyersStep eq graph state step = let ?callStack = popCallStack callStack in ca
|
||||
S (Put state') -> Right (state', cont ())
|
||||
|
||||
|
||||
decompose :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->MyersF a b c -> Myers a b c
|
||||
-- | Decompose an operation in Myers’ algorithm into its continuation.
|
||||
--
|
||||
-- Dispatches to the per-operation run… functions which implement the meat of the algorithm.
|
||||
decompose :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> MyersF a b c -> Myers a b c
|
||||
decompose eq graph myers = let ?callStack = popCallStack callStack in case myers of
|
||||
SES -> runSES graph
|
||||
LCS -> runLCS graph
|
||||
@ -107,6 +122,7 @@ decompose eq graph myers = let ?callStack = popCallStack callStack in case myers
|
||||
{-# INLINE decompose #-}
|
||||
|
||||
|
||||
-- | Compute the shortest edit script (diff) of an edit graph.
|
||||
runSES :: HasCallStack => EditGraph a b -> Myers a b (EditScript a b)
|
||||
runSES (EditGraph as bs)
|
||||
| null bs = return (This <$> toList as)
|
||||
@ -117,6 +133,7 @@ runSES (EditGraph as bs)
|
||||
Just (script, _) -> return (reverse script)
|
||||
_ -> fail "no shortest edit script found in edit graph (this is a bug in SES.Myers)."
|
||||
|
||||
-- | Compute the longest common subsequence of an edit graph.
|
||||
runLCS :: HasCallStack => EditGraph a b -> Myers a b [(a, b)]
|
||||
runLCS (EditGraph as bs)
|
||||
| null as || null bs = return []
|
||||
@ -124,15 +141,18 @@ runLCS (EditGraph as bs)
|
||||
result <- M SES `Then` return
|
||||
return (catMaybes (these (const Nothing) (const Nothing) ((Just .) . (,)) <$> result))
|
||||
|
||||
-- | Compute the edit distance of an edit graph.
|
||||
runEditDistance :: HasCallStack => EditGraph a b -> Myers a b Int
|
||||
runEditDistance _ = let ?callStack = popCallStack callStack in length . filter (these (const True) (const True) (const (const False))) <$> (M SES `Then` return)
|
||||
|
||||
|
||||
-- | Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches.
|
||||
runSearchUpToD :: HasCallStack => EditGraph a b -> Distance -> Myers a b (Maybe (EditScript a b, Distance))
|
||||
runSearchUpToD (EditGraph as bs) (Distance d) = let ?callStack = popCallStack callStack in
|
||||
for [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] (searchAlongK (Distance d) . Diagonal)
|
||||
where (n, m) = (length as, length bs)
|
||||
|
||||
-- | Search an edit graph for the shortest edit script along a specific diagonal.
|
||||
runSearchAlongK :: HasCallStack => EditGraph a b -> Distance -> Diagonal -> Myers a b (Maybe (EditScript a b, Distance))
|
||||
runSearchAlongK (EditGraph as bs) d k = let ?callStack = popCallStack callStack in do
|
||||
Endpoint x y script <- moveFromAdjacent d k
|
||||
@ -141,14 +161,18 @@ runSearchAlongK (EditGraph as bs) d k = let ?callStack = popCallStack callStack
|
||||
else
|
||||
continue
|
||||
|
||||
-- | Move onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and slide down any diagonal edges eagerly.
|
||||
runMoveFromAdjacent :: HasCallStack => EditGraph a b -> Distance -> Diagonal -> Myers a b (Endpoint a b)
|
||||
runMoveFromAdjacent (EditGraph as bs) (Distance d) (Diagonal k) = let ?callStack = popCallStack callStack in do
|
||||
let (n, m) = (length as, length bs)
|
||||
from <- if d == 0 || k < negate m || k > n then
|
||||
-- The top-left corner, or otherwise out-of-bounds.
|
||||
return (Endpoint 0 0 [])
|
||||
else if k == negate d || k == negate m then
|
||||
-- The lower/left extent of the search region or edit graph, whichever is smaller.
|
||||
getK (Diagonal (succ k)) >>= moveDownFrom
|
||||
else if k /= d && k /= n then do
|
||||
-- Somewhere in the interior of the search region and edit graph.
|
||||
prev <- getK (Diagonal (pred k))
|
||||
next <- getK (Diagonal (succ k))
|
||||
if x prev < x next then
|
||||
@ -156,27 +180,33 @@ runMoveFromAdjacent (EditGraph as bs) (Distance d) (Diagonal k) = let ?callStack
|
||||
else
|
||||
moveRightFrom prev
|
||||
else
|
||||
-- The upper/right extent of the search region or edit graph, whichever is smaller.
|
||||
getK (Diagonal (pred k)) >>= moveRightFrom
|
||||
endpoint <- slide from
|
||||
setK (Diagonal k) endpoint
|
||||
return endpoint
|
||||
|
||||
-- | Move downward from a given vertex, inserting the element for the corresponding row.
|
||||
runMoveDownFrom :: HasCallStack => EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b)
|
||||
runMoveDownFrom (EditGraph _ bs) (Endpoint x y script) = return (Endpoint x (succ y) (if y < length bs then That (bs ! y) : script else script))
|
||||
|
||||
-- | Move rightward from a given vertex, deleting the element for the corresponding column.
|
||||
runMoveRightFrom :: HasCallStack => EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b)
|
||||
runMoveRightFrom (EditGraph as _) (Endpoint x y script) = return (Endpoint (succ x) y (if x < length as then This (as ! x) : script else script))
|
||||
|
||||
-- | Return the maximum extent reached and path taken along a given diagonal.
|
||||
runGetK :: HasCallStack => EditGraph a b -> Diagonal -> Myers a b (Endpoint a b)
|
||||
runGetK graph k = let ?callStack = popCallStack callStack in do
|
||||
(i, v) <- checkK graph k
|
||||
let (x, script) = v ! i in return (Endpoint x (x - unDiagonal k) script)
|
||||
|
||||
-- | Update the maximum extent reached and path taken along a given diagonal.
|
||||
runSetK :: HasCallStack => EditGraph a b -> Diagonal -> Endpoint a b -> Myers a b ()
|
||||
runSetK graph k (Endpoint x _ script) = let ?callStack = popCallStack callStack in do
|
||||
(i, v) <- checkK graph k
|
||||
put (MyersState (v Array.// [(i, (x, script))]))
|
||||
|
||||
-- | Slide down any diagonal edges from a given vertex.
|
||||
runSlide :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b)
|
||||
runSlide eq (EditGraph as bs) (Endpoint x y script)
|
||||
| x >= 0, x < length as
|
||||
@ -189,69 +219,88 @@ runSlide eq (EditGraph as bs) (Endpoint x y script)
|
||||
|
||||
-- Smart constructors
|
||||
|
||||
-- | Compute the longest common subsequence.
|
||||
lcs :: HasCallStack => Myers a b [(a, b)]
|
||||
lcs = M LCS `Then` return
|
||||
|
||||
-- | Compute the edit distance.
|
||||
editDistance :: HasCallStack => Myers a b Int
|
||||
editDistance = M EditDistance `Then` return
|
||||
|
||||
-- | Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches.
|
||||
searchUpToD :: HasCallStack => Distance -> Myers a b (Maybe (EditScript a b, Distance))
|
||||
searchUpToD distance = M (SearchUpToD distance) `Then` return
|
||||
|
||||
-- | Search an edit graph for the shortest edit script along a specific diagonal.
|
||||
searchAlongK :: HasCallStack => Distance -> Diagonal -> Myers a b (Maybe (EditScript a b, Distance))
|
||||
searchAlongK d k = M (SearchAlongK d k) `Then` return
|
||||
|
||||
-- | Move onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and slide down any diagonal edges eagerly.
|
||||
moveFromAdjacent :: HasCallStack => Distance -> Diagonal -> Myers a b (Endpoint a b)
|
||||
moveFromAdjacent d k = M (MoveFromAdjacent d k) `Then` return
|
||||
|
||||
-- | Move downward from a given vertex, inserting the element for the corresponding row.
|
||||
moveDownFrom :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b)
|
||||
moveDownFrom e = M (MoveDownFrom e) `Then` return
|
||||
|
||||
-- | Move rightward from a given vertex, deleting the element for the corresponding column.
|
||||
moveRightFrom :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b)
|
||||
moveRightFrom e = M (MoveRightFrom e) `Then` return
|
||||
|
||||
-- | Return the maximum extent reached and path taken along a given diagonal.
|
||||
getK :: HasCallStack => Diagonal -> Myers a b (Endpoint a b)
|
||||
getK diagonal = M (GetK diagonal) `Then` return
|
||||
|
||||
-- | Update the maximum extent reached and path taken along a given diagonal.
|
||||
setK :: HasCallStack => Diagonal -> Endpoint a b -> Myers a b ()
|
||||
setK diagonal x = M (SetK diagonal x) `Then` return
|
||||
|
||||
-- | Slide down any diagonal edges from a given vertex.
|
||||
slide :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b)
|
||||
slide from = M (Slide from) `Then` return
|
||||
|
||||
|
||||
-- Implementation details
|
||||
|
||||
-- | The state stored by Myers’ algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal.
|
||||
newtype MyersState a b = MyersState { unMyersState :: Array.Array Int (Int, EditScript a b) }
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- | State effect used in Myers.
|
||||
data State s a where
|
||||
Get :: State s s
|
||||
Put :: s -> State s ()
|
||||
|
||||
-- | Compute the empty state of length m + n + 1 for a given edit graph.
|
||||
emptyStateForGraph :: EditGraph a b -> MyersState a b
|
||||
emptyStateForGraph (EditGraph as bs) = let (n, m) = (length as, length bs) in
|
||||
MyersState (Array.listArray (0, m + n) (repeat (0, [])))
|
||||
|
||||
-- | Evaluate some function for each value in a list until one returns a value or the list is exhausted.
|
||||
for :: [a] -> (a -> Myers c d (Maybe b)) -> Myers c d (Maybe b)
|
||||
for all run = foldr (\ a b -> (<|>) <$> run a <*> b) (return Nothing) all
|
||||
|
||||
-- | Continue evaluation of a for loop without returning a value. To exit the loop without continuing, return a value in 'Just' instead.
|
||||
continue :: Myers b c (Maybe a)
|
||||
continue = return Nothing
|
||||
|
||||
-- | Compute the actual index into the state array from a (possibly negative) diagonal number.
|
||||
index :: Array.Array Int a -> Int -> Int
|
||||
index v k = if k >= 0 then k else length v + k
|
||||
|
||||
|
||||
-- | Throw a failure. Used to indicate an error in the implementation of Myers’ algorithm.
|
||||
fail :: (HasCallStack, Monad m) => String -> m a
|
||||
fail s = let ?callStack = fromCallSiteList (filter ((/= "M") . fst) (getCallStack callStack)) in
|
||||
throw (MyersException s callStack)
|
||||
|
||||
-- | Bounds-checked indexing of arrays, preserving the call stack.
|
||||
(!) :: HasCallStack => Array.Array Int a -> Int -> a
|
||||
v ! i | i < length v = v Array.! i
|
||||
| otherwise = let ?callStack = fromCallSiteList (filter ((/= "M") . fst) (getCallStack callStack)) in
|
||||
throw (MyersException ("index " <> show i <> " out of bounds") callStack)
|
||||
|
||||
-- | Check that a given diagonal is in-bounds for the edit graph, returning the actual index to use and the state array.
|
||||
checkK :: HasCallStack => EditGraph a b -> Diagonal -> Myers a b (Int, Array.Array Int (Int, EditScript a b))
|
||||
checkK (EditGraph as bs) (Diagonal k) = let ?callStack = popCallStack callStack in do
|
||||
v <- gets unMyersState
|
||||
@ -264,9 +313,11 @@ checkK (EditGraph as bs) (Diagonal k) = let ?callStack = popCallStack callStack
|
||||
return (i, v)
|
||||
|
||||
|
||||
-- | Lifted showing of arrays.
|
||||
liftShowsVector :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> Int -> Array.Array Int a -> ShowS
|
||||
liftShowsVector sp sl d = liftShowsPrec sp sl d . toList
|
||||
|
||||
-- | Lifted showing of operations in Myers’ algorithm.
|
||||
liftShowsMyersF :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> MyersF a b c -> ShowS
|
||||
liftShowsMyersF sp1 sp2 d m = case m of
|
||||
SES -> showString "SES"
|
||||
@ -281,40 +332,49 @@ liftShowsMyersF sp1 sp2 d m = case m of
|
||||
SetK diagonal v -> showsBinaryWith showsPrec (liftShowsEndpoint sp1 sp2) "SetK" d diagonal v
|
||||
Slide endpoint -> showsUnaryWith (liftShowsEndpoint sp1 sp2) "Slide" d endpoint
|
||||
|
||||
-- | Lifted showing of ternary constructors.
|
||||
showsTernaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> String -> Int -> a -> b -> c -> ShowS
|
||||
showsTernaryWith sp1 sp2 sp3 name d x y z = showParen (d > 10) $
|
||||
showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z
|
||||
|
||||
-- | Lifted showing of quaternary constructors.
|
||||
showsQuaternaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> (Int -> d -> ShowS) -> String -> Int -> a -> b -> c -> d -> ShowS
|
||||
showsQuaternaryWith sp1 sp2 sp3 sp4 name d x y z w = showParen (d > 10) $
|
||||
showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z . showChar ' ' . sp4 11 w
|
||||
|
||||
-- | Lifted showing of quinary constructors.
|
||||
showsQuinaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> (Int -> d -> ShowS) -> (Int -> e -> ShowS) -> String -> Int -> a -> b -> c -> d -> e -> ShowS
|
||||
showsQuinaryWith sp1 sp2 sp3 sp4 sp5 name d x y z w v = showParen (d > 10) $
|
||||
showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z . showChar ' ' . sp4 11 w . showChar ' ' . sp5 11 v
|
||||
|
||||
-- | Lifted showing of State.
|
||||
liftShowsState :: (Int -> a -> ShowS) -> Int -> State a b -> ShowS
|
||||
liftShowsState sp d state = case state of
|
||||
Get -> showString "Get"
|
||||
Put s -> showsUnaryWith sp "Put" d s
|
||||
|
||||
-- | Lift value/list showing functions into a showing function for steps in Myers’ algorithm.
|
||||
liftShowsStepF :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> (Int -> b -> ShowS) -> ([b] -> ShowS) -> Int -> StepF a b c -> ShowS
|
||||
liftShowsStepF sp1 sl1 sp2 sl2 d step = case step of
|
||||
M m -> showsUnaryWith (liftShowsMyersF sp1 sp2) "M" d m
|
||||
S s -> showsUnaryWith (liftShowsState (liftShowsPrec2 sp1 sl1 sp2 sl2)) "S" d s
|
||||
|
||||
-- | Lifted showing of These.
|
||||
liftShowsThese :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> These a b -> ShowS
|
||||
liftShowsThese sa sb d t = case t of
|
||||
This a -> showsUnaryWith sa "This" d a
|
||||
That b -> showsUnaryWith sb "That" d b
|
||||
These a b -> showsBinaryWith sa sb "These" d a b
|
||||
|
||||
-- | Lifted showing of edit scripts.
|
||||
liftShowsEditScript :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> EditScript a b -> ShowS
|
||||
liftShowsEditScript sa sb _ = showListWith (liftShowsThese sa sb 0)
|
||||
|
||||
-- | Lifted showing of edit graph endpoints.
|
||||
liftShowsEndpoint :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> Endpoint a b -> ShowS
|
||||
liftShowsEndpoint sp1 sp2 d (Endpoint x y script) = showsTernaryWith showsPrec showsPrec (liftShowsEditScript sp1 sp2) "Endpoint" d x y script
|
||||
|
||||
-- | Exceptions in Myers’ algorithm, along with a description and call stack.
|
||||
data MyersException = MyersException String CallStack
|
||||
deriving (Typeable)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user