diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 74560747b..31ee314ef 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -13,6 +13,7 @@ import GHC.Stack import Prologue hiding (for, State, error) import Text.Show (showListWith) +-- | Operations in Myers’ algorithm. data MyersF a b result where SES :: MyersF a b (EditScript a b) LCS :: MyersF a b [(a, b)] @@ -28,39 +29,48 @@ data MyersF a b result where Slide :: Endpoint a b -> MyersF a b (Endpoint a b) +-- | An edit script, i.e. a sequence of changes/copies of elements. type EditScript a b = [These a b] +-- | Steps in the execution of Myers’ algorithm, i.e. the sum of MyersF and State. data StepF a b result where M :: HasCallStack => MyersF a b c -> StepF a b c S :: State (MyersState a b) c -> StepF a b c type Myers a b = Freer (StepF a b) +-- | Notionally the cartesian product of two sequences, represented as a simple wrapper around those arrays holding those sequences’ elements for O(1) lookups. data EditGraph a b = EditGraph { as :: !(Array.Array Int a), bs :: !(Array.Array Int b) } deriving (Eq, Show) +-- | Construct an edit graph from Foldable sequences. makeEditGraph :: (Foldable t, Foldable u) => t a -> u b -> EditGraph a b makeEditGraph as bs = EditGraph (Array.listArray (0, pred (length as)) (toList as)) (Array.listArray (0, pred (length bs)) (toList bs)) +-- | An edit distance, i.e. a cardinal number of changes. newtype Distance = Distance { unDistance :: Int } deriving (Eq, Show) +-- | A diagonal in the edit graph of lists of lengths n and m, numbered from -m to n. newtype Diagonal = Diagonal { unDiagonal :: Int } deriving (Eq, Show) +-- | The endpoint of a path through the edit graph, represented as the x/y indices and the script of edits made to get to that point. data Endpoint a b = Endpoint { x :: !Int, y :: !Int, script :: !(EditScript a b) } deriving (Eq, Show) -- API +-- | Compute the shortest edit script using Myers’ algorithm. ses :: (HasCallStack, Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript a b ses eq as bs = runMyers eq (makeEditGraph as bs) (M SES `Then` return) -- Evaluation -runMyers :: forall a b c. HasCallStack => (a -> b -> Bool) -> EditGraph a b ->Myers a b c -> c +-- | Fully evaluate an operation in Myers’ algorithm given a comparator function and an edit graph. +runMyers :: forall a b c. HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Myers a b c -> c runMyers eq graph step = evalState (go step) (emptyStateForGraph graph) where go :: forall c. Myers a b c -> StateT (MyersState a b) Identity c go = iterFreerA algebra @@ -70,7 +80,8 @@ runMyers eq graph step = evalState (go step) (emptyStateForGraph graph) S Get -> get >>= cont S (Put s) -> put s >>= cont -runMyersSteps :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->Myers a b c -> [(MyersState a b, Myers a b c)] +-- | Fully evaluate an operation in Myers’ algorithm given a comparator function and an edit graph, returning a list of states and next steps. +runMyersSteps :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Myers a b c -> [(MyersState a b, Myers a b c)] runMyersSteps eq graph = go (emptyStateForGraph graph) where go state step = let ?callStack = popCallStack callStack in prefix state step $ case runMyersStep eq graph state step of Left result -> [ (state, return result) ] @@ -79,7 +90,8 @@ runMyersSteps eq graph = go (emptyStateForGraph graph) Then (M _) _ -> ((state, step) :) _ -> identity -runMyersStep :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->MyersState a b -> Myers a b c -> Either c (MyersState a b, Myers a b c) +-- | Evaluate one step in Myers’ algorithm given a comparator function and an edit graph, returning Either the final result, or the next state and step. +runMyersStep :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> MyersState a b -> Myers a b c -> Either c (MyersState a b, Myers a b c) runMyersStep eq graph state step = let ?callStack = popCallStack callStack in case step of Return a -> Left a Then step cont -> case step of @@ -89,7 +101,10 @@ runMyersStep eq graph state step = let ?callStack = popCallStack callStack in ca S (Put state') -> Right (state', cont ()) -decompose :: HasCallStack => (a -> b -> Bool) -> EditGraph a b ->MyersF a b c -> Myers a b c +-- | Decompose an operation in Myers’ algorithm into its continuation. +-- +-- Dispatches to the per-operation run… functions which implement the meat of the algorithm. +decompose :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> MyersF a b c -> Myers a b c decompose eq graph myers = let ?callStack = popCallStack callStack in case myers of SES -> runSES graph LCS -> runLCS graph @@ -107,6 +122,7 @@ decompose eq graph myers = let ?callStack = popCallStack callStack in case myers {-# INLINE decompose #-} +-- | Compute the shortest edit script (diff) of an edit graph. runSES :: HasCallStack => EditGraph a b -> Myers a b (EditScript a b) runSES (EditGraph as bs) | null bs = return (This <$> toList as) @@ -117,6 +133,7 @@ runSES (EditGraph as bs) Just (script, _) -> return (reverse script) _ -> fail "no shortest edit script found in edit graph (this is a bug in SES.Myers)." +-- | Compute the longest common subsequence of an edit graph. runLCS :: HasCallStack => EditGraph a b -> Myers a b [(a, b)] runLCS (EditGraph as bs) | null as || null bs = return [] @@ -124,15 +141,18 @@ runLCS (EditGraph as bs) result <- M SES `Then` return return (catMaybes (these (const Nothing) (const Nothing) ((Just .) . (,)) <$> result)) +-- | Compute the edit distance of an edit graph. runEditDistance :: HasCallStack => EditGraph a b -> Myers a b Int runEditDistance _ = let ?callStack = popCallStack callStack in length . filter (these (const True) (const True) (const (const False))) <$> (M SES `Then` return) +-- | Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. runSearchUpToD :: HasCallStack => EditGraph a b -> Distance -> Myers a b (Maybe (EditScript a b, Distance)) runSearchUpToD (EditGraph as bs) (Distance d) = let ?callStack = popCallStack callStack in for [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] (searchAlongK (Distance d) . Diagonal) where (n, m) = (length as, length bs) +-- | Search an edit graph for the shortest edit script along a specific diagonal. runSearchAlongK :: HasCallStack => EditGraph a b -> Distance -> Diagonal -> Myers a b (Maybe (EditScript a b, Distance)) runSearchAlongK (EditGraph as bs) d k = let ?callStack = popCallStack callStack in do Endpoint x y script <- moveFromAdjacent d k @@ -141,14 +161,18 @@ runSearchAlongK (EditGraph as bs) d k = let ?callStack = popCallStack callStack else continue +-- | Move onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and slide down any diagonal edges eagerly. runMoveFromAdjacent :: HasCallStack => EditGraph a b -> Distance -> Diagonal -> Myers a b (Endpoint a b) runMoveFromAdjacent (EditGraph as bs) (Distance d) (Diagonal k) = let ?callStack = popCallStack callStack in do let (n, m) = (length as, length bs) from <- if d == 0 || k < negate m || k > n then + -- The top-left corner, or otherwise out-of-bounds. return (Endpoint 0 0 []) else if k == negate d || k == negate m then + -- The lower/left extent of the search region or edit graph, whichever is smaller. getK (Diagonal (succ k)) >>= moveDownFrom else if k /= d && k /= n then do + -- Somewhere in the interior of the search region and edit graph. prev <- getK (Diagonal (pred k)) next <- getK (Diagonal (succ k)) if x prev < x next then @@ -156,27 +180,33 @@ runMoveFromAdjacent (EditGraph as bs) (Distance d) (Diagonal k) = let ?callStack else moveRightFrom prev else + -- The upper/right extent of the search region or edit graph, whichever is smaller. getK (Diagonal (pred k)) >>= moveRightFrom endpoint <- slide from setK (Diagonal k) endpoint return endpoint +-- | Move downward from a given vertex, inserting the element for the corresponding row. runMoveDownFrom :: HasCallStack => EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b) runMoveDownFrom (EditGraph _ bs) (Endpoint x y script) = return (Endpoint x (succ y) (if y < length bs then That (bs ! y) : script else script)) +-- | Move rightward from a given vertex, deleting the element for the corresponding column. runMoveRightFrom :: HasCallStack => EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b) runMoveRightFrom (EditGraph as _) (Endpoint x y script) = return (Endpoint (succ x) y (if x < length as then This (as ! x) : script else script)) +-- | Return the maximum extent reached and path taken along a given diagonal. runGetK :: HasCallStack => EditGraph a b -> Diagonal -> Myers a b (Endpoint a b) runGetK graph k = let ?callStack = popCallStack callStack in do (i, v) <- checkK graph k let (x, script) = v ! i in return (Endpoint x (x - unDiagonal k) script) +-- | Update the maximum extent reached and path taken along a given diagonal. runSetK :: HasCallStack => EditGraph a b -> Diagonal -> Endpoint a b -> Myers a b () runSetK graph k (Endpoint x _ script) = let ?callStack = popCallStack callStack in do (i, v) <- checkK graph k put (MyersState (v Array.// [(i, (x, script))])) +-- | Slide down any diagonal edges from a given vertex. runSlide :: HasCallStack => (a -> b -> Bool) -> EditGraph a b -> Endpoint a b -> Myers a b (Endpoint a b) runSlide eq (EditGraph as bs) (Endpoint x y script) | x >= 0, x < length as @@ -189,69 +219,88 @@ runSlide eq (EditGraph as bs) (Endpoint x y script) -- Smart constructors +-- | Compute the longest common subsequence. lcs :: HasCallStack => Myers a b [(a, b)] lcs = M LCS `Then` return +-- | Compute the edit distance. editDistance :: HasCallStack => Myers a b Int editDistance = M EditDistance `Then` return +-- | Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD :: HasCallStack => Distance -> Myers a b (Maybe (EditScript a b, Distance)) searchUpToD distance = M (SearchUpToD distance) `Then` return +-- | Search an edit graph for the shortest edit script along a specific diagonal. searchAlongK :: HasCallStack => Distance -> Diagonal -> Myers a b (Maybe (EditScript a b, Distance)) searchAlongK d k = M (SearchAlongK d k) `Then` return +-- | Move onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and slide down any diagonal edges eagerly. moveFromAdjacent :: HasCallStack => Distance -> Diagonal -> Myers a b (Endpoint a b) moveFromAdjacent d k = M (MoveFromAdjacent d k) `Then` return +-- | Move downward from a given vertex, inserting the element for the corresponding row. moveDownFrom :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b) moveDownFrom e = M (MoveDownFrom e) `Then` return +-- | Move rightward from a given vertex, deleting the element for the corresponding column. moveRightFrom :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b) moveRightFrom e = M (MoveRightFrom e) `Then` return +-- | Return the maximum extent reached and path taken along a given diagonal. getK :: HasCallStack => Diagonal -> Myers a b (Endpoint a b) getK diagonal = M (GetK diagonal) `Then` return +-- | Update the maximum extent reached and path taken along a given diagonal. setK :: HasCallStack => Diagonal -> Endpoint a b -> Myers a b () setK diagonal x = M (SetK diagonal x) `Then` return +-- | Slide down any diagonal edges from a given vertex. slide :: HasCallStack => Endpoint a b -> Myers a b (Endpoint a b) slide from = M (Slide from) `Then` return -- Implementation details +-- | The state stored by Myers’ algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal. newtype MyersState a b = MyersState { unMyersState :: Array.Array Int (Int, EditScript a b) } deriving (Eq, Show) +-- | State effect used in Myers. data State s a where Get :: State s s Put :: s -> State s () +-- | Compute the empty state of length m + n + 1 for a given edit graph. emptyStateForGraph :: EditGraph a b -> MyersState a b emptyStateForGraph (EditGraph as bs) = let (n, m) = (length as, length bs) in MyersState (Array.listArray (0, m + n) (repeat (0, []))) +-- | Evaluate some function for each value in a list until one returns a value or the list is exhausted. for :: [a] -> (a -> Myers c d (Maybe b)) -> Myers c d (Maybe b) for all run = foldr (\ a b -> (<|>) <$> run a <*> b) (return Nothing) all +-- | Continue evaluation of a for loop without returning a value. To exit the loop without continuing, return a value in 'Just' instead. continue :: Myers b c (Maybe a) continue = return Nothing +-- | Compute the actual index into the state array from a (possibly negative) diagonal number. index :: Array.Array Int a -> Int -> Int index v k = if k >= 0 then k else length v + k +-- | Throw a failure. Used to indicate an error in the implementation of Myers’ algorithm. fail :: (HasCallStack, Monad m) => String -> m a fail s = let ?callStack = fromCallSiteList (filter ((/= "M") . fst) (getCallStack callStack)) in throw (MyersException s callStack) +-- | Bounds-checked indexing of arrays, preserving the call stack. (!) :: HasCallStack => Array.Array Int a -> Int -> a v ! i | i < length v = v Array.! i | otherwise = let ?callStack = fromCallSiteList (filter ((/= "M") . fst) (getCallStack callStack)) in throw (MyersException ("index " <> show i <> " out of bounds") callStack) +-- | Check that a given diagonal is in-bounds for the edit graph, returning the actual index to use and the state array. checkK :: HasCallStack => EditGraph a b -> Diagonal -> Myers a b (Int, Array.Array Int (Int, EditScript a b)) checkK (EditGraph as bs) (Diagonal k) = let ?callStack = popCallStack callStack in do v <- gets unMyersState @@ -264,9 +313,11 @@ checkK (EditGraph as bs) (Diagonal k) = let ?callStack = popCallStack callStack return (i, v) +-- | Lifted showing of arrays. liftShowsVector :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> Int -> Array.Array Int a -> ShowS liftShowsVector sp sl d = liftShowsPrec sp sl d . toList +-- | Lifted showing of operations in Myers’ algorithm. liftShowsMyersF :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> MyersF a b c -> ShowS liftShowsMyersF sp1 sp2 d m = case m of SES -> showString "SES" @@ -281,40 +332,49 @@ liftShowsMyersF sp1 sp2 d m = case m of SetK diagonal v -> showsBinaryWith showsPrec (liftShowsEndpoint sp1 sp2) "SetK" d diagonal v Slide endpoint -> showsUnaryWith (liftShowsEndpoint sp1 sp2) "Slide" d endpoint +-- | Lifted showing of ternary constructors. showsTernaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> String -> Int -> a -> b -> c -> ShowS showsTernaryWith sp1 sp2 sp3 name d x y z = showParen (d > 10) $ showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z +-- | Lifted showing of quaternary constructors. showsQuaternaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> (Int -> d -> ShowS) -> String -> Int -> a -> b -> c -> d -> ShowS showsQuaternaryWith sp1 sp2 sp3 sp4 name d x y z w = showParen (d > 10) $ showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z . showChar ' ' . sp4 11 w +-- | Lifted showing of quinary constructors. showsQuinaryWith :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> (Int -> c -> ShowS) -> (Int -> d -> ShowS) -> (Int -> e -> ShowS) -> String -> Int -> a -> b -> c -> d -> e -> ShowS showsQuinaryWith sp1 sp2 sp3 sp4 sp5 name d x y z w v = showParen (d > 10) $ showString name . showChar ' ' . sp1 11 x . showChar ' ' . sp2 11 y . showChar ' ' . sp3 11 z . showChar ' ' . sp4 11 w . showChar ' ' . sp5 11 v +-- | Lifted showing of State. liftShowsState :: (Int -> a -> ShowS) -> Int -> State a b -> ShowS liftShowsState sp d state = case state of Get -> showString "Get" Put s -> showsUnaryWith sp "Put" d s +-- | Lift value/list showing functions into a showing function for steps in Myers’ algorithm. liftShowsStepF :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> (Int -> b -> ShowS) -> ([b] -> ShowS) -> Int -> StepF a b c -> ShowS liftShowsStepF sp1 sl1 sp2 sl2 d step = case step of M m -> showsUnaryWith (liftShowsMyersF sp1 sp2) "M" d m S s -> showsUnaryWith (liftShowsState (liftShowsPrec2 sp1 sl1 sp2 sl2)) "S" d s +-- | Lifted showing of These. liftShowsThese :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> These a b -> ShowS liftShowsThese sa sb d t = case t of This a -> showsUnaryWith sa "This" d a That b -> showsUnaryWith sb "That" d b These a b -> showsBinaryWith sa sb "These" d a b +-- | Lifted showing of edit scripts. liftShowsEditScript :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> EditScript a b -> ShowS liftShowsEditScript sa sb _ = showListWith (liftShowsThese sa sb 0) +-- | Lifted showing of edit graph endpoints. liftShowsEndpoint :: (Int -> a -> ShowS) -> (Int -> b -> ShowS) -> Int -> Endpoint a b -> ShowS liftShowsEndpoint sp1 sp2 d (Endpoint x y script) = showsTernaryWith showsPrec showsPrec (liftShowsEditScript sp1 sp2) "Endpoint" d x y script +-- | Exceptions in Myers’ algorithm, along with a description and call stack. data MyersException = MyersException String CallStack deriving (Typeable)