1
1
mirror of https://github.com/github/semantic.git synced 2024-11-29 02:44:36 +03:00

📝 all the things.

This commit is contained in:
Rob Rix 2017-03-23 15:34:52 -04:00
parent 70b825ecbb
commit 7f13ef23a2

View File

@ -13,6 +13,7 @@ import GHC.Stack
import Prologue hiding (for, State, error)
import Text.Show (showListWith)
-- | Operations in Myers algorithm.
data MyersF a b result where
SES :: MyersF a b (EditScript a b)
LCS :: MyersF a b [(a, b)]
@ -28,39 +29,48 @@ data MyersF a b result where
Slide :: Endpoint a b -> MyersF a b (Endpoint a b)
-- | An edit script, i.e. a sequence of changes/copies of elements.
type EditScript a b = [These a b]
-- | Steps in the execution of Myers algorithm, i.e. the sum of MyersF and State.
data StepF a b result where
M :: HasCallStack => MyersF a b c -> StepF a b c
S :: State (MyersState a b) c -> StepF a b c
type Myers a b = Freer (StepF a b)
-- | Notionally the cartesian product of two sequences, represented as a simple wrapper around those arrays holding those sequences elements for O(1) lookups.
data EditGraph a b = EditGraph { as :: !(Array.Array Int a), bs :: !(Array.Array Int b) }
deriving (Eq, Show)
-- | Construct an edit graph from Foldable sequences.
makeEditGraph :: (Foldable t, Foldable u) => t a -> u b -> EditGraph a b
makeEditGraph as bs = EditGraph (Array.listArray (0, pred (length as)) (toList as)) (Array.listArray (0, pred (length bs)) (toList bs))
-- | An edit distance, i.e. a cardinal number of changes.
newtype Distance = Distance { unDistance :: Int }
deriving (Eq, Show)
-- | A diagonal in the edit graph of lists of lengths n and m, numbered from -m to n.
newtype Diagonal = Diagonal { unDiagonal :: Int }
deriving (Eq, Show)
-- | The endpoint of a path through the edit graph, represented as the x/y indices and the script of edits made to get to that point.
data Endpoint a b = Endpoint { x :: !Int, y :: !Int, script :: !(EditScript a b) }
deriving (Eq, Show)
-- API
-- | Compute the shortest edit script using Myers algorithm.
ses :: (HasCallStack, Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript a b
ses eq as bs = runMyers eq (makeEditGraph as bs) (M SES `Then` return)
-- Evaluation
runMyers :: forall a b c. HasCallStack => (a -> b -> Bool) -> EditGraph a b ->Myers a b c -> c
-- | Fully evaluate an operation in Myers algorithm given a comparator function and an edit graph.
runMyers :: forall a b c. HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Myers a b c -> c
runMyers eq graph step = evalState (go step) (emptyStateForGraph graph)
where go :: forall c. Myers a b c -> StateT (MyersState a b) Identity c
go = iterFreerA algebra
@ -70,7 +80,8 @@ runMyers eq graph step = evalState (go step) (emptyStateForGraph graph)
S Get -> get >>= cont
S (Put s) -> put s >>= cont
runMyersSteps :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->Myers a b c -> [(MyersState a b, Myers a b c)]
-- | Fully evaluate an operation in Myers algorithm given a comparator function and an edit graph, returning a list of states and next steps.
runMyersSteps :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Myers a b c -> [(MyersState a b, Myers a b c)]
runMyersSteps eq graph = go (emptyStateForGraph graph)
where go state step = let ?callStack = popCallStack callStack in prefix state step $ case runMyersStep eq graph state step of
Left result -> [ (state, return result) ]
@ -79,7 +90,8 @@ runMyersSteps eq graph = go (emptyStateForGraph graph)
Then (M _) _ -> ((state, step) :)
_ -> identity
runMyersStep :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->MyersState a b -> Myers a b c -> Either c (MyersState a b, Myers a b c)
-- | Evaluate one step in Myers algorithm given a comparator function and an edit graph, returning Either the final result, or the next state and step.
runMyersStep :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> MyersState a b -> Myers a b c -> Either c (MyersState a b, Myers a b c)
runMyersStep eq graph state step = let ?callStack = popCallStack callStack in case step of
Return a -> Left a
Then step cont -> case step of
@ -89,7 +101,10 @@ runMyersStep eq graph state step = let ?callStack = popCallStack callStack in ca
S (Put state') -> Right (state', cont ())
decompose :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->MyersF a b c -> Myers a b c
-- | Decompose an operation in Myers algorithm into its continuation.
--
-- Dispatches to the per-operation run… functions which implement the meat of the algorithm.
decompose :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> MyersF a b c -> Myers a b c
decompose eq graph myers = let ?callStack = popCallStack callStack in case myers of
SES -> runSES graph
LCS -> runLCS graph
@ -107,6 +122,7 @@ decompose eq graph myers = let ?callStack = popCallStack callStack in case myers
{-# INLINE decompose #-}
-- | Compute the shortest edit script (diff) of an edit graph.
runSES :: HasCallStack => EditGraph a b -> Myers a b (EditScript a b)
runSES (EditGraph as bs)
| null bs = return (This <$> toList as)
@ -117,6 +133,7 @@ runSES (EditGraph as bs)
Just (script, _) -> return (reverse script)
_ -> fail "no shortest edit script found in edit graph (this is a bug in SES.Myers)."
-- | Compute the longest common subsequence of an edit graph.
runLCS :: HasCallStack => EditGraph a b -> Myers a b [(a, b)]
runLCS (EditGraph as bs)
| null as || null bs = return []
@ -124,15 +141,18 @@ runLCS (EditGraph as bs)
result <- M SES `Then` return
return (catMaybes (these (const Nothing) (const Nothing) ((Just .) . (,)) <$> result))
-- | Compute the edit distance of an edit graph.
runEditDistance :: HasCallStack => EditGraph a b -> Myers a b Int
runEditDistance _ = let ?callStack = popCallStack callStack in length . filter (these (const True) (const True) (const (const False))) <$> (M SES `Then` return)
-- | Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches.
runSearchUpToD :: HasCallStack => EditGraph a b -> Distance -> Myers a b (Maybe (EditScript a b, Distance))
runSearchUpToD (EditGraph as bs) (Distance d) = let ?callStack = popCallStack callStack in
for [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] (searchAlongK (Distance d) . Diagonal)
where (n, m) = (length as, length bs)
-- | Search an edit graph for the shortest edit script along a specific diagonal.
runSearchAlongK :: HasCallStack => EditGraph a b -> Distance -> Diagonal -> Myers a b (Maybe (EditScript a b, Distance))
runSearchAlongK (EditGraph as bs) d k = let ?callStack = popCallStack callStack in do
Endpoint x y script <- moveFromAdjacent d k
@ -141,14 +161,18 @@ runSearchAlongK (EditGraph as bs) d k = let ?callStack = popCallStack callStack
else
continue
-- | Move onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and slide down any diagonal edges eagerly.
runMoveFromAdjacent :: HasCallStack => EditGraph a b -> Distance -> Diagonal -> Myers a b (Endpoint a b)
runMoveFromAdjacent (EditGraph as bs) (Distance d) (Diagonal k) = let ?callStack = popCallStack callStack in do
let (n, m) = (length as, length bs)
from <- if d == 0 || k < negate m || k > n then
-- The top-left corner, or otherwise out-of-bounds.
return (Endpoint 0 0 [])
else if k == negate d || k == negate m then
-- The lower/left extent of the search region or edit graph, whichever is smaller.
getK (Diagonal (succ k)) >>= moveDownFrom
else if k /= d && k /= n then do
-- Somewhere in the interior of the search region and edit graph.
prev <- getK (Diagonal (pred k))
next <- getK (Diagonal (succ k))
if x prev < x next then
@ -156,27 +180,33 @@ runMoveFromAdjacent (EditGraph as bs) (Distance d) (Diagonal k) = let ?callStack
else
moveRightFrom prev
else
-- The upper/right extent of the search region or edit graph, whichever is smaller.
getK (Diagonal (pred k)) >>= moveRightFrom
endpoint <- slide from
setK (Diagonal k) endpoint
return endpoint
-- | Move downward from a given vertex, inserting the element for the corresponding row.
runMoveDownFrom :: HasCallStack => EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b)
runMoveDownFrom (EditGraph _ bs) (Endpoint x y script) = return (Endpoint x (succ y) (if y < length bs then That (bs ! y) : script else script))
-- | Move rightward from a given vertex, deleting the element for the corresponding column.
runMoveRightFrom :: HasCallStack => EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b)
runMoveRightFrom (EditGraph as _) (Endpoint x y script) = return (Endpoint (succ x) y (if x < length as then This (as ! x) : script else script))
-- | Return the maximum extent reached and path taken along a given diagonal.
runGetK :: HasCallStack => EditGraph a b -> Diagonal -> Myers a b (Endpoint a b)
runGetK graph k = let ?callStack = popCallStack callStack in do
(i, v) <- checkK graph k
let (x, script) = v ! i in return (Endpoint x (x - unDiagonal k) script)
-- | Update the maximum extent reached and path taken along a given diagonal.
runSetK :: HasCallStack => EditGraph a b -> Diagonal -> Endpoint a b -> Myers a b ()
runSetK graph k (Endpoint x _ script) = let ?callStack = popCallStack callStack in do
(i, v) <- checkK graph k
put (MyersState (v Array.// [(i, (x, script))]))
-- | Slide down any diagonal edges from a given vertex.
runSlide :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b)
runSlide eq (EditGraph as bs) (Endpoint x y script)
| x >= 0, x < length as
@ -189,69 +219,88 @@ runSlide eq (EditGraph as bs) (Endpoint x y script)
-- Smart constructors
-- | Compute the longest common subsequence.
lcs :: HasCallStack => Myers a b [(a, b)]
lcs = M LCS `Then` return
-- | Compute the edit distance.
editDistance :: HasCallStack => Myers a b Int
editDistance = M EditDistance `Then` return
-- | Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches.
searchUpToD :: HasCallStack => Distance -> Myers a b (Maybe (EditScript a b, Distance))
searchUpToD distance = M (SearchUpToD distance) `Then` return
-- | Search an edit graph for the shortest edit script along a specific diagonal.
searchAlongK :: HasCallStack => Distance -> Diagonal -> Myers a b (Maybe (EditScript a b, Distance))
searchAlongK d k = M (SearchAlongK d k) `Then` return
-- | Move onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and slide down any diagonal edges eagerly.
moveFromAdjacent :: HasCallStack => Distance -> Diagonal -> Myers a b (Endpoint a b)
moveFromAdjacent d k = M (MoveFromAdjacent d k) `Then` return
-- | Move downward from a given vertex, inserting the element for the corresponding row.
moveDownFrom :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b)
moveDownFrom e = M (MoveDownFrom e) `Then` return
-- | Move rightward from a given vertex, deleting the element for the corresponding column.
moveRightFrom :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b)
moveRightFrom e = M (MoveRightFrom e) `Then` return
-- | Return the maximum extent reached and path taken along a given diagonal.
getK :: HasCallStack => Diagonal -> Myers a b (Endpoint a b)
getK diagonal = M (GetK diagonal) `Then` return
-- | Update the maximum extent reached and path taken along a given diagonal.
setK :: HasCallStack => Diagonal -> Endpoint a b -> Myers a b ()
setK diagonal x = M (SetK diagonal x) `Then` return
-- | Slide down any diagonal edges from a given vertex.
slide :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b)
slide from = M (Slide from) `Then` return
-- Implementation details
-- | The state stored by Myers algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal.
newtype MyersState a b = MyersState { unMyersState :: Array.Array Int (Int, EditScript a b) }
deriving (Eq, Show)
-- | State effect used in Myers.
data State s a where
Get :: State s s
Put :: s -> State s ()
-- | Compute the empty state of length m + n + 1 for a given edit graph.
emptyStateForGraph :: EditGraph a b -> MyersState a b
emptyStateForGraph (EditGraph as bs) = let (n, m) = (length as, length bs) in
MyersState (Array.listArray (0, m + n) (repeat (0, [])))
-- | Evaluate some function for each value in a list until one returns a value or the list is exhausted.
for :: [a] -> (a -> Myers c d (Maybe b)) -> Myers c d (Maybe b)
for all run = foldr (\ a b -> (<|>) <$> run a <*> b) (return Nothing) all
-- | Continue evaluation of a for loop without returning a value. To exit the loop without continuing, return a value in 'Just' instead.
continue :: Myers b c (Maybe a)
continue = return Nothing
-- | Compute the actual index into the state array from a (possibly negative) diagonal number.
index :: Array.Array Int a -> Int -> Int
index v k = if k >= 0 then k else length v + k
-- | Throw a failure. Used to indicate an error in the implementation of Myers algorithm.
fail :: (HasCallStack, Monad m) => String -> m a
fail s = let ?callStack = fromCallSiteList (filter ((/= "M") . fst) (getCallStack callStack)) in
throw (MyersException s callStack)
-- | Bounds-checked indexing of arrays, preserving the call stack.
(!) :: HasCallStack => Array.Array Int a -> Int -> a
v ! i | i < length v = v Array.! i
| otherwise = let ?callStack = fromCallSiteList (filter ((/= "M") . fst) (getCallStack callStack)) in
throw (MyersException ("index " <> show i <> " out of bounds") callStack)
-- | Check that a given diagonal is in-bounds for the edit graph, returning the actual index to use and the state array.
checkK :: HasCallStack => EditGraph a b -> Diagonal -> Myers a b (Int, Array.Array Int (Int, EditScript a b))
checkK (EditGraph as bs) (Diagonal k) = let ?callStack = popCallStack callStack in do
v <- gets unMyersState
@ -264,9 +313,11 @@ checkK (EditGraph as bs) (Diagonal k) = let ?callStack = popCallStack callStack
return (i, v)
-- | Lifted showing of arrays.
liftShowsVector :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> Int -> Array.Array Int a -> ShowS
liftShowsVector sp sl d = liftShowsPrec sp sl d . toList
-- | Lifted showing of operations in Myers algorithm.
liftShowsMyersF :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> MyersF a b c -> ShowS
liftShowsMyersF sp1 sp2 d m = case m of
SES -> showString "SES"
@ -281,40 +332,49 @@ liftShowsMyersF sp1 sp2 d m = case m of
SetK diagonal v -> showsBinaryWith showsPrec (liftShowsEndpoint sp1 sp2) "SetK" d diagonal v
Slide endpoint -> showsUnaryWith (liftShowsEndpoint sp1 sp2) "Slide" d endpoint
-- | Lifted showing of ternary constructors.
showsTernaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> String -> Int -> a -> b -> c -> ShowS
showsTernaryWith sp1 sp2 sp3 name d x y z = showParen (d > 10) $
showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z
-- | Lifted showing of quaternary constructors.
showsQuaternaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> (Int -> d -> ShowS) -> String -> Int -> a -> b -> c -> d -> ShowS
showsQuaternaryWith sp1 sp2 sp3 sp4 name d x y z w = showParen (d > 10) $
showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z . showChar ' ' . sp4 11 w
-- | Lifted showing of quinary constructors.
showsQuinaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> (Int -> d -> ShowS) -> (Int -> e -> ShowS) -> String -> Int -> a -> b -> c -> d -> e -> ShowS
showsQuinaryWith sp1 sp2 sp3 sp4 sp5 name d x y z w v = showParen (d > 10) $
showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z . showChar ' ' . sp4 11 w . showChar ' ' . sp5 11 v
-- | Lifted showing of State.
liftShowsState :: (Int -> a -> ShowS) -> Int -> State a b -> ShowS
liftShowsState sp d state = case state of
Get -> showString "Get"
Put s -> showsUnaryWith sp "Put" d s
-- | Lift value/list showing functions into a showing function for steps in Myers algorithm.
liftShowsStepF :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> (Int -> b -> ShowS) -> ([b] -> ShowS) -> Int -> StepF a b c -> ShowS
liftShowsStepF sp1 sl1 sp2 sl2 d step = case step of
M m -> showsUnaryWith (liftShowsMyersF sp1 sp2) "M" d m
S s -> showsUnaryWith (liftShowsState (liftShowsPrec2 sp1 sl1 sp2 sl2)) "S" d s
-- | Lifted showing of These.
liftShowsThese :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> These a b -> ShowS
liftShowsThese sa sb d t = case t of
This a -> showsUnaryWith sa "This" d a
That b -> showsUnaryWith sb "That" d b
These a b -> showsBinaryWith sa sb "These" d a b
-- | Lifted showing of edit scripts.
liftShowsEditScript :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> EditScript a b -> ShowS
liftShowsEditScript sa sb _ = showListWith (liftShowsThese sa sb 0)
-- | Lifted showing of edit graph endpoints.
liftShowsEndpoint :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> Endpoint a b -> ShowS
liftShowsEndpoint sp1 sp2 d (Endpoint x y script) = showsTernaryWith showsPrec showsPrec (liftShowsEditScript sp1 sp2) "Endpoint" d x y script
-- | Exceptions in Myers algorithm, along with a description and call stack.
data MyersException = MyersException String CallStack
deriving (Typeable)