From 2a23d86e32a34773c179fcd5425585dc848ced21 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 09:47:39 -0400 Subject: [PATCH 01/46] searchAlongK produces strict results. --- src/SES/Myers.hs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index c5d02570f..6827a5ef1 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -67,12 +67,12 @@ runSES eq (EditGraph as bs) searchUpToD (Distance d) = do v <- get let extents = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] - put (Map.fromList extents) - pure . fmap (snd . snd) $! find isComplete extents - where isComplete (k, (x, _)) = x >= n && (x - k) >= m + put (Map.fromList ((k' &&& (x' &&& script')) <$> extents)) + pure . fmap script' $! find isComplete extents + where isComplete (Extent k x _) = x >= n && (x - k) >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. - searchAlongK v (Diagonal k) = (,) k . (x &&& script) . slideFrom $! + searchAlongK v (Diagonal k) = toExtent k . slideFrom $! if d == 0 || k < negate m || k > n then -- The top-left corner, or otherwise out-of-bounds. Endpoint 0 0 [] @@ -98,6 +98,8 @@ runSES eq (EditGraph as bs) -- | Move rightward from a given vertex, deleting the element for the corresponding column. moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) + toExtent k (Endpoint x _ script) = Extent k x script + -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) | x >= 0, x < n @@ -108,6 +110,8 @@ runSES eq (EditGraph as bs) | otherwise = (Endpoint x y script) +data Extent a b = Extent { k' :: {-# UNPACK #-} !Int, x' :: {-# UNPACK #-} !Int, script' :: !(EditScript a b) } + -- Implementation details -- | The state stored by Myers’ algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal. From 9308e7f64acdff8ee79e4ed8107c2b03c04bb5ed Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 10:07:59 -0400 Subject: [PATCH 02/46] Search edit envelopes tail-recursively. --- src/SES/Myers.hs | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 6827a5ef1..c93bdf9d0 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -1,9 +1,8 @@ {-# LANGUAGE GADTs, ImplicitParams, MultiParamTypeClasses, ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-incomplete-patterns #-} module SES.Myers ( EditScript -, Myers , EditGraph(..) -, Distance(..) , Diagonal(..) , Endpoint(..) , ses @@ -22,8 +21,6 @@ import Prologue hiding (error) -- | An edit script, i.e. a sequence of changes/copies of elements. type EditScript a b = [These a b] -type Myers a b = State (MyersState a b) - -- | Notionally the cartesian product of two sequences, represented as a simple wrapper around those arrays holding those sequences’ elements for O(1) lookups. data EditGraph a b = EditGraph { as :: !(Array.Array Int a), bs :: !(Array.Array Int b) } deriving (Eq, Show) @@ -32,10 +29,6 @@ data EditGraph a b = EditGraph { as :: !(Array.Array Int a), bs :: !(Array.Array makeEditGraph :: (Foldable t, Foldable u) => t a -> u b -> EditGraph a b makeEditGraph as bs = EditGraph (Array.listArray (0, pred (length as)) (toList as)) (Array.listArray (0, pred (length bs)) (toList bs)) --- | An edit distance, i.e. a cardinal number of changes. -newtype Distance = Distance { unDistance :: Int } - deriving (Eq, Show) - -- | A diagonal in the edit graph of lists of lengths n and m, numbered from -m to n. newtype Diagonal = Diagonal { unDiagonal :: Int } deriving (Eq, Ix, Ord, Show) @@ -48,27 +41,25 @@ data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, y :: {-# UNPACK #-} !In -- | Compute the shortest edit script using Myers’ algorithm. ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript a b -ses eq as bs = let graph = makeEditGraph as bs in evalState (runSES eq graph) (emptyStateForGraph graph) +ses eq as bs = let graph = makeEditGraph as bs in runSES eq graph -- Evaluation -- | Compute the shortest edit script (diff) of an edit graph. -runSES :: (a -> b -> Bool) -> EditGraph a b -> Myers a b (EditScript a b) +runSES :: (a -> b -> Bool) -> EditGraph a b -> EditScript a b runSES eq (EditGraph as bs) - | null bs = return (This <$> toList as) - | null as = return (That <$> toList bs) - | otherwise = do - Just script <- asum <$> for [0..(n + m)] (searchUpToD . Distance) - return (reverse script) + | null bs = This <$> toList as + | null as = That <$> toList bs + | otherwise = reverse (searchUpToD [0..(n + m)] (Map.singleton 0 (0, []))) where (n, m) = (length as, length bs) -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. - searchUpToD (Distance d) = do - v <- get - let extents = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] - put (Map.fromList ((k' &&& (x' &&& script')) <$> extents)) - pure . fmap script' $! find isComplete extents + searchUpToD (d:ds) v = + let extents = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in + case find isComplete extents of + Just (Extent _ _ script) -> script + _ -> searchUpToD ds (Map.fromList ((k' &&& (x' &&& script')) <$> extents)) where isComplete (Extent k x _) = x >= n && (x - k) >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. @@ -117,11 +108,6 @@ data Extent a b = Extent { k' :: {-# UNPACK #-} !Int, x' :: {-# UNPACK #-} !Int, -- | The state stored by Myers’ algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal. type MyersState a b = Map.IntMap (Int, EditScript a b) --- | Compute the empty state of length m + n + 1 for a given edit graph. -emptyStateForGraph :: EditGraph a b -> MyersState a b -emptyStateForGraph _ = - Map.singleton 0 (0, []) - -- | Lifted showing of arrays. liftShowsVector :: Show i => (Int -> a -> ShowS) -> ([a] -> ShowS) -> Int -> Array.Array i a -> ShowS From 105cd7b1ec559f1c53e79ab1869d925eff1cdfeb Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:07:53 -0400 Subject: [PATCH 03/46] Replace Extent with Endpoint. --- src/SES/Myers.hs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index c93bdf9d0..0e89eedd4 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -58,12 +58,12 @@ runSES eq (EditGraph as bs) searchUpToD (d:ds) v = let extents = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in case find isComplete extents of - Just (Extent _ _ script) -> script - _ -> searchUpToD ds (Map.fromList ((k' &&& (x' &&& script')) <$> extents)) - where isComplete (Extent k x _) = x >= n && (x - k) >= m + Just (Endpoint _ _ script) -> script + _ -> searchUpToD ds (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> extents)) + where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. - searchAlongK v (Diagonal k) = toExtent k . slideFrom $! + searchAlongK v (Diagonal k) = slideFrom $! if d == 0 || k < negate m || k > n then -- The top-left corner, or otherwise out-of-bounds. Endpoint 0 0 [] @@ -89,8 +89,6 @@ runSES eq (EditGraph as bs) -- | Move rightward from a given vertex, deleting the element for the corresponding column. moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) - toExtent k (Endpoint x _ script) = Extent k x script - -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) | x >= 0, x < n @@ -101,8 +99,6 @@ runSES eq (EditGraph as bs) | otherwise = (Endpoint x y script) -data Extent a b = Extent { k' :: {-# UNPACK #-} !Int, x' :: {-# UNPACK #-} !Int, script' :: !(EditScript a b) } - -- Implementation details -- | The state stored by Myers’ algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal. From b1fff9bf52a9a72509be649f336fcd6acaad7407 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:08:17 -0400 Subject: [PATCH 04/46] Rename extents to endpoints. --- src/SES/Myers.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 0e89eedd4..abe10f01b 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -56,10 +56,10 @@ runSES eq (EditGraph as bs) -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD (d:ds) v = - let extents = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in - case find isComplete extents of + let endpoints = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in + case find isComplete endpoints of Just (Endpoint _ _ script) -> script - _ -> searchUpToD ds (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> extents)) + _ -> searchUpToD ds (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> endpoints)) where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. From 635c3fc00de6e65884cdd7742e83f1b76679b9f6 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:14:28 -0400 Subject: [PATCH 05/46] No need to allocate a list of envelopes. --- src/SES/Myers.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index abe10f01b..94c73e757 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -51,15 +51,15 @@ runSES :: (a -> b -> Bool) -> EditGraph a b -> EditScript a b runSES eq (EditGraph as bs) | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = reverse (searchUpToD [0..(n + m)] (Map.singleton 0 (0, []))) + | otherwise = reverse (searchUpToD 0 (Map.singleton 0 (0, []))) where (n, m) = (length as, length bs) -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. - searchUpToD (d:ds) v = + searchUpToD d v = let endpoints = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script - _ -> searchUpToD ds (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> endpoints)) + _ -> searchUpToD (succ d) (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> endpoints)) where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. From 7eb9c6c834375343870c8ad630019f21985e576e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:15:48 -0400 Subject: [PATCH 06/46] Exhaustiveness has been restored. --- src/SES/Myers.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 94c73e757..1180c4e57 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -1,5 +1,4 @@ {-# LANGUAGE GADTs, ImplicitParams, MultiParamTypeClasses, ScopedTypeVariables #-} -{-# OPTIONS_GHC -Wno-incomplete-patterns #-} module SES.Myers ( EditScript , EditGraph(..) From 7aaff0b6348878d24c9aaf300c9d7ee65fd35a6e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:17:06 -0400 Subject: [PATCH 07/46] :fire: Diagonal. --- src/SES/Myers.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 1180c4e57..2a1b39ee6 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -2,7 +2,6 @@ module SES.Myers ( EditScript , EditGraph(..) -, Diagonal(..) , Endpoint(..) , ses , MyersState @@ -28,10 +27,6 @@ data EditGraph a b = EditGraph { as :: !(Array.Array Int a), bs :: !(Array.Array makeEditGraph :: (Foldable t, Foldable u) => t a -> u b -> EditGraph a b makeEditGraph as bs = EditGraph (Array.listArray (0, pred (length as)) (toList as)) (Array.listArray (0, pred (length bs)) (toList bs)) --- | A diagonal in the edit graph of lists of lengths n and m, numbered from -m to n. -newtype Diagonal = Diagonal { unDiagonal :: Int } - deriving (Eq, Ix, Ord, Show) - data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, y :: {-# UNPACK #-} !Int, script :: !(EditScript a b) } deriving (Eq, Show) @@ -55,14 +50,14 @@ runSES eq (EditGraph as bs) -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = - let endpoints = searchAlongK v . Diagonal <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in + let endpoints = searchAlongK v <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> endpoints)) where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. - searchAlongK v (Diagonal k) = slideFrom $! + searchAlongK v k = slideFrom $! if d == 0 || k < negate m || k > n then -- The top-left corner, or otherwise out-of-bounds. Endpoint 0 0 [] From 3cbc2b3505bb3ea2720934f93e7d13125a673725 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:18:32 -0400 Subject: [PATCH 08/46] Close over the state. --- src/SES/Myers.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 2a1b39ee6..853e42856 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -50,14 +50,14 @@ runSES eq (EditGraph as bs) -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = - let endpoints = searchAlongK v <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in + let endpoints = searchAlongK <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> endpoints)) where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. - searchAlongK v k = slideFrom $! + searchAlongK k = slideFrom $! if d == 0 || k < negate m || k > n then -- The top-left corner, or otherwise out-of-bounds. Endpoint 0 0 [] From c4992ff950674be0dadbd6525ccdcf263af6309b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:20:44 -0400 Subject: [PATCH 09/46] :fire: runSES. --- src/SES/Myers.hs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 853e42856..f38992621 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -35,18 +35,12 @@ data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, y :: {-# UNPACK #-} !In -- | Compute the shortest edit script using Myers’ algorithm. ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript a b -ses eq as bs = let graph = makeEditGraph as bs in runSES eq graph - - --- Evaluation - --- | Compute the shortest edit script (diff) of an edit graph. -runSES :: (a -> b -> Bool) -> EditGraph a b -> EditScript a b -runSES eq (EditGraph as bs) +ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs | otherwise = reverse (searchUpToD 0 (Map.singleton 0 (0, []))) - where (n, m) = (length as, length bs) + where EditGraph as bs = makeEditGraph as' bs' + (n, m) = (length as, length bs) -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = From 854b2b1c622201110121591f16a00ce190133b3e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:24:12 -0400 Subject: [PATCH 10/46] :fire: EditGraph. --- src/SES/Myers.hs | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index f38992621..8fb0554bd 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -1,7 +1,6 @@ {-# LANGUAGE GADTs, ImplicitParams, MultiParamTypeClasses, ScopedTypeVariables #-} module SES.Myers ( EditScript -, EditGraph(..) , Endpoint(..) , ses , MyersState @@ -11,7 +10,6 @@ import Data.Array ((!)) import qualified Data.Array as Array import qualified Data.IntMap.Lazy as Map import Data.Ix -import Data.Functor.Classes import Data.These import GHC.Show hiding (show) import Prologue hiding (error) @@ -19,14 +17,6 @@ import Prologue hiding (error) -- | An edit script, i.e. a sequence of changes/copies of elements. type EditScript a b = [These a b] --- | Notionally the cartesian product of two sequences, represented as a simple wrapper around those arrays holding those sequences’ elements for O(1) lookups. -data EditGraph a b = EditGraph { as :: !(Array.Array Int a), bs :: !(Array.Array Int b) } - deriving (Eq, Show) - --- | Construct an edit graph from Foldable sequences. -makeEditGraph :: (Foldable t, Foldable u) => t a -> u b -> EditGraph a b -makeEditGraph as bs = EditGraph (Array.listArray (0, pred (length as)) (toList as)) (Array.listArray (0, pred (length bs)) (toList bs)) - data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, y :: {-# UNPACK #-} !Int, script :: !(EditScript a b) } deriving (Eq, Show) @@ -39,8 +29,8 @@ ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs | otherwise = reverse (searchUpToD 0 (Map.singleton 0 (0, []))) - where EditGraph as bs = makeEditGraph as' bs' - (n, m) = (length as, length bs) + where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) + (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = @@ -91,14 +81,3 @@ ses eq as' bs' -- | The state stored by Myers’ algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal. type MyersState a b = Map.IntMap (Int, EditScript a b) - - --- | Lifted showing of arrays. -liftShowsVector :: Show i => (Int -> a -> ShowS) -> ([a] -> ShowS) -> Int -> Array.Array i a -> ShowS -liftShowsVector sp sl d = liftShowsPrec sp sl d . toList - - --- Instances - -instance Show2 EditGraph where - liftShowsPrec2 sp1 sl1 sp2 sl2 d (EditGraph as bs) = showsBinaryWith (liftShowsVector sp1 sl1) (liftShowsVector sp2 sl2) "EditGraph" d as bs From 25d66ede5f1bc7102749df4bfc85ba8aa75f3819 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:25:57 -0400 Subject: [PATCH 11/46] :fire: MyersState. --- src/SES/Myers.hs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 8fb0554bd..1172b5328 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -3,7 +3,6 @@ module SES.Myers ( EditScript , Endpoint(..) , ses -, MyersState ) where import Data.Array ((!)) @@ -21,8 +20,6 @@ data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, y :: {-# UNPACK #-} !In deriving (Eq, Show) --- API - -- | Compute the shortest edit script using Myers’ algorithm. ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript a b ses eq as' bs' @@ -75,9 +72,3 @@ ses eq as' bs' , b <- bs ! y , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) | otherwise = (Endpoint x y script) - - --- Implementation details - --- | The state stored by Myers’ algorithm; an array of m + n + 1 values indicating the maximum x-index reached and path taken along each diagonal. -type MyersState a b = Map.IntMap (Int, EditScript a b) From 7ff6e1252c58a962bb24ec840a3479bb7e03d8b6 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:26:25 -0400 Subject: [PATCH 12/46] =?UTF-8?q?Don=E2=80=99t=20export=20Endpoint.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SES/Myers.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 1172b5328..5774ed346 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -1,7 +1,6 @@ {-# LANGUAGE GADTs, ImplicitParams, MultiParamTypeClasses, ScopedTypeVariables #-} module SES.Myers ( EditScript -, Endpoint(..) , ses ) where @@ -16,7 +15,7 @@ import Prologue hiding (error) -- | An edit script, i.e. a sequence of changes/copies of elements. type EditScript a b = [These a b] -data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, y :: {-# UNPACK #-} !Int, script :: !(EditScript a b) } +data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, _y :: {-# UNPACK #-} !Int, _script :: !(EditScript a b) } deriving (Eq, Show) From 158b6387a38819086ef0063d8c5808ef7cf48aea Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:26:43 -0400 Subject: [PATCH 13/46] :fire: redundant parens. --- src/SES/Myers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 5774ed346..2f9087690 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -70,4 +70,4 @@ ses eq as' bs' , a <- as ! x , b <- bs ! y , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) - | otherwise = (Endpoint x y script) + | otherwise = Endpoint x y script From e505b532a942fc294c41a67fd9b5c999a0b915d7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:28:57 -0400 Subject: [PATCH 14/46] Use the inRange method to bounds check x/y. --- src/SES/Myers.hs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 2f9087690..83517d16c 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -65,9 +65,7 @@ ses eq as' bs' -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) - | x >= 0, x < n - , y >= 0, y < m - , a <- as ! x - , b <- bs ! y + | inRange (Array.bounds as) x, a <- as ! x + , inRange (Array.bounds bs) y, b <- bs ! y , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) | otherwise = Endpoint x y script From 0d22eabf5bf9ddd0a904586062e8b9a0a34cf376 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:31:38 -0400 Subject: [PATCH 15/46] Store Endpoints in the state. This uses a word of extra space but saves some time moving things around. --- src/SES/Myers.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 83517d16c..a87ff96f2 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -24,7 +24,7 @@ ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = reverse (searchUpToD 0 (Map.singleton 0 (0, []))) + | otherwise = reverse (searchUpToD 0 (Map.singleton 0 (Endpoint 0 0 []))) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) (n, m) = (length as', length bs') @@ -33,7 +33,7 @@ ses eq as' bs' let endpoints = searchAlongK <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script - _ -> searchUpToD (succ d) (Map.fromList ((\ (Endpoint x y script) -> (x - y, (x, script))) <$> endpoints)) + _ -> searchUpToD (succ d) (Map.fromList ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. @@ -53,7 +53,7 @@ ses eq as' bs' else -- The upper/right extent of the search region or edit graph, whichever is smaller. moveRightFrom prev - where getK k = let (x, script) = v Map.! k in Endpoint x (x - k) script + where getK k = v Map.! k prev = getK (pred k) next = getK (succ k) From 30b9e85ed31fa7eb104ea78f8d968788d96693e0 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:36:26 -0400 Subject: [PATCH 16/46] Use minimal Arrays for the state. --- src/SES/Myers.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index a87ff96f2..e9be5bb91 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -6,7 +6,6 @@ module SES.Myers import Data.Array ((!)) import qualified Data.Array as Array -import qualified Data.IntMap.Lazy as Map import Data.Ix import Data.These import GHC.Show hiding (show) @@ -24,7 +23,7 @@ ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = reverse (searchUpToD 0 (Map.singleton 0 (Endpoint 0 0 []))) + | otherwise = reverse (searchUpToD 0 (Array.array (0, 0) [(0, Endpoint 0 0 [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) (n, m) = (length as', length bs') @@ -33,7 +32,7 @@ ses eq as' bs' let endpoints = searchAlongK <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script - _ -> searchUpToD (succ d) (Map.fromList ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) + _ -> searchUpToD (succ d) (Array.array (negate d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. @@ -53,7 +52,7 @@ ses eq as' bs' else -- The upper/right extent of the search region or edit graph, whichever is smaller. moveRightFrom prev - where getK k = v Map.! k + where getK k = v ! k prev = getK (pred k) next = getK (succ k) From 51e05c39d38d3b1f2ffa6ba66d410eead4fc8cc8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:52:48 -0400 Subject: [PATCH 17/46] Align moveDownFrom/moveRightFrom. --- src/SES/Myers.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index e9be5bb91..cd2f9a042 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -57,10 +57,10 @@ ses eq as' bs' next = getK (succ k) -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y < m then That (bs ! y) : script else script) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y < m then That (bs ! y) : script else script) -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) From f395d5c144c24e32fb5b3a02d1477e9b4638667b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:54:39 -0400 Subject: [PATCH 18/46] Rename prev/next to left/up. --- src/SES/Myers.hs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index cd2f9a042..40ced8af6 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -42,19 +42,19 @@ ses eq as' bs' Endpoint 0 0 [] else if k == negate d || k == negate m then -- The lower/left extent of the search region or edit graph, whichever is smaller. - moveDownFrom next + moveDownFrom up else if k /= d && k /= n then -- Somewhere in the interior of the search region and edit graph. - if x prev < x next then - moveDownFrom next + if x left < x up then + moveDownFrom up else - moveRightFrom prev + moveRightFrom left else -- The upper/right extent of the search region or edit graph, whichever is smaller. - moveRightFrom prev + moveRightFrom left where getK k = v ! k - prev = getK (pred k) - next = getK (succ k) + left = getK (pred k) + up = getK (succ k) -- | Move downward from a given vertex, inserting the element for the corresponding row. moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y < m then That (bs ! y) : script else script) From 6a4148fd6fe6a8c8ee072130df2de9163a8248bc Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 11:57:50 -0400 Subject: [PATCH 19/46] s/negate/-/ --- src/SES/Myers.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 40ced8af6..33125dd0d 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -29,18 +29,18 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = - let endpoints = searchAlongK <$> [ k | k <- [negate d, negate d + 2 .. d], inRange (negate m, n) k ] in + let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (negate m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script - _ -> searchUpToD (succ d) (Array.array (negate d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) + _ -> searchUpToD (succ d) (Array.array (-d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. searchAlongK k = slideFrom $! - if d == 0 || k < negate m || k > n then + if d == 0 || k < -m || k > n then -- The top-left corner, or otherwise out-of-bounds. Endpoint 0 0 [] - else if k == negate d || k == negate m then + else if k == -d || k == -m then -- The lower/left extent of the search region or edit graph, whichever is smaller. moveDownFrom up else if k /= d && k /= n then From 05ec4dda6389fad40cec72694fa8a9e04985d1dd Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 12:02:34 -0400 Subject: [PATCH 20/46] k is already bounded. --- src/SES/Myers.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 33125dd0d..b4a5507ca 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -37,8 +37,8 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. searchAlongK k = slideFrom $! - if d == 0 || k < -m || k > n then - -- The top-left corner, or otherwise out-of-bounds. + if d == 0 then + -- The top-left corner. Endpoint 0 0 [] else if k == -d || k == -m then -- The lower/left extent of the search region or edit graph, whichever is smaller. From e532f7d2fb4deddf72961eee4495dcb2ff6a3d5f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:13:40 -0400 Subject: [PATCH 21/46] Simplify the search logic. --- src/SES/Myers.hs | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index b4a5507ca..3ad601db0 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -23,13 +23,13 @@ ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = reverse (searchUpToD 0 (Array.array (0, 0) [(0, Endpoint 0 0 [])])) + | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = - let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (negate m, n) k ] in + let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Array.array (-d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) @@ -37,30 +37,18 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. searchAlongK k = slideFrom $! - if d == 0 then - -- The top-left corner. - Endpoint 0 0 [] - else if k == -d || k == -m then - -- The lower/left extent of the search region or edit graph, whichever is smaller. + if k == -d || k == -m || k /= d && k /= n && x left < x up then moveDownFrom up - else if k /= d && k /= n then - -- Somewhere in the interior of the search region and edit graph. - if x left < x up then - moveDownFrom up - else - moveRightFrom left else - -- The upper/right extent of the search region or edit graph, whichever is smaller. moveRightFrom left - where getK k = v ! k - left = getK (pred k) - up = getK (succ k) + where left = v ! pred k + up = v ! succ k -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y < m then That (bs ! y) : script else script) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if inRange (Array.bounds bs) y then That (bs ! y) : script else script) -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if inRange (Array.bounds as) x then This (as ! x) : script else script) -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) From c2daf3af07d75e5ca67a19f8f76a1c4df4f4ddf8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:17:44 -0400 Subject: [PATCH 22/46] Avoid refetching the bounds. --- src/SES/Myers.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 3ad601db0..300d428a3 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -25,6 +25,7 @@ ses eq as' bs' | null as = That <$> toList bs | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) + (aBounds, bBounds) = (Array.bounds as, Array.bounds bs) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. @@ -45,14 +46,14 @@ ses eq as' bs' up = v ! succ k -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if inRange (Array.bounds bs) y then That (bs ! y) : script else script) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if inRange bBounds y then That (bs ! y) : script else script) -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if inRange (Array.bounds as) x then This (as ! x) : script else script) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if inRange aBounds x then This (as ! x) : script else script) -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) - | inRange (Array.bounds as) x, a <- as ! x - , inRange (Array.bounds bs) y, b <- bs ! y + | inRange aBounds x, a <- as ! x + , inRange bBounds y, b <- bs ! y , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) | otherwise = Endpoint x y script From d71f97d1b58e1b929d3135bdb45b428049660b82 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:20:25 -0400 Subject: [PATCH 23/46] Save the initial bounds computation. --- src/SES/Myers.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 300d428a3..ea2fda4f1 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -24,8 +24,8 @@ ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) - where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) - (aBounds, bBounds) = (Array.bounds as, Array.bounds bs) + where (as, bs) = (Array.listArray aBounds (toList as'), Array.listArray bBounds (toList bs')) + (aBounds, bBounds) = ((0, pred n), (0, pred m)) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. From 8a2c0bf6c2c1abcecb227df5cb389f9d3330288f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:45:16 -0400 Subject: [PATCH 24/46] Less strict evaluation of the edit script. --- src/SES/Myers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index ea2fda4f1..e55f2245f 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -14,7 +14,7 @@ import Prologue hiding (error) -- | An edit script, i.e. a sequence of changes/copies of elements. type EditScript a b = [These a b] -data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, _y :: {-# UNPACK #-} !Int, _script :: !(EditScript a b) } +data Endpoint a b = Endpoint { x :: {-# UNPACK #-} !Int, _y :: {-# UNPACK #-} !Int, _script :: EditScript a b } deriving (Eq, Show) From 6a627255c9dcbe052e08621845a7b980da0f9252 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:53:15 -0400 Subject: [PATCH 25/46] Revert "Save the initial bounds computation." This reverts commit e062aa06676723be75c47d376fcf66457d9d39aa. --- src/SES/Myers.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index e55f2245f..dfa8c43f3 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -24,8 +24,8 @@ ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) - where (as, bs) = (Array.listArray aBounds (toList as'), Array.listArray bBounds (toList bs')) - (aBounds, bBounds) = ((0, pred n), (0, pred m)) + where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) + (aBounds, bBounds) = (Array.bounds as, Array.bounds bs) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. From c45b5a32b07f2225fd3eaa9de454626448cbb7bd Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:53:27 -0400 Subject: [PATCH 26/46] Revert "Avoid refetching the bounds." This reverts commit de7739b5cf19f93a1ae697350e3332c9a1ea3034. --- src/SES/Myers.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index dfa8c43f3..c4f6349b5 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -25,7 +25,6 @@ ses eq as' bs' | null as = That <$> toList bs | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) - (aBounds, bBounds) = (Array.bounds as, Array.bounds bs) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. @@ -46,14 +45,14 @@ ses eq as' bs' up = v ! succ k -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if inRange bBounds y then That (bs ! y) : script else script) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if inRange (Array.bounds bs) y then That (bs ! y) : script else script) -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if inRange aBounds x then This (as ! x) : script else script) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if inRange (Array.bounds as) x then This (as ! x) : script else script) -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) - | inRange aBounds x, a <- as ! x - , inRange bBounds y, b <- bs ! y + | inRange (Array.bounds as) x, a <- as ! x + , inRange (Array.bounds bs) y, b <- bs ! y , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) | otherwise = Endpoint x y script From c736d2527c50b1dea0cf8608a744f06e2fb3d5af Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:54:59 -0400 Subject: [PATCH 27/46] Revert "Simplify the search logic." This reverts commit 008658c2bfb06da29d56278b4b48f5b76d0dd211. --- src/SES/Myers.hs | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index c4f6349b5..0e5ae2238 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -23,13 +23,13 @@ ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) + | otherwise = reverse (searchUpToD 0 (Array.array (0, 0) [(0, Endpoint 0 0 [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = - let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in + let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (negate m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Array.array (-d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) @@ -37,18 +37,30 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. searchAlongK k = slideFrom $! - if k == -d || k == -m || k /= d && k /= n && x left < x up then + if d == 0 then + -- The top-left corner. + Endpoint 0 0 [] + else if k == -d || k == -m then + -- The lower/left extent of the search region or edit graph, whichever is smaller. moveDownFrom up + else if k /= d && k /= n then + -- Somewhere in the interior of the search region and edit graph. + if x left < x up then + moveDownFrom up + else + moveRightFrom left else + -- The upper/right extent of the search region or edit graph, whichever is smaller. moveRightFrom left - where left = v ! pred k - up = v ! succ k + where getK k = v ! k + left = getK (pred k) + up = getK (succ k) -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if inRange (Array.bounds bs) y then That (bs ! y) : script else script) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y < m then That (bs ! y) : script else script) -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if inRange (Array.bounds as) x then This (as ! x) : script else script) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) From f0147ae5137699d70ffe82850b1a5b69606dfdca Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 13:58:19 -0400 Subject: [PATCH 28/46] Prioritize the upper/right extent. --- src/SES/Myers.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 0e5ae2238..1bfbe6808 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -43,15 +43,15 @@ ses eq as' bs' else if k == -d || k == -m then -- The lower/left extent of the search region or edit graph, whichever is smaller. moveDownFrom up - else if k /= d && k /= n then + else if k == d || k == n then + -- The upper/right extent of the search region or edit graph, whichever is smaller. + moveRightFrom left + else -- Somewhere in the interior of the search region and edit graph. if x left < x up then moveDownFrom up else moveRightFrom left - else - -- The upper/right extent of the search region or edit graph, whichever is smaller. - moveRightFrom left where getK k = v ! k left = getK (pred k) up = getK (succ k) From 231a746d58db0c945ec94b2b1abae9f7b515ad1b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 14:00:13 -0400 Subject: [PATCH 29/46] Use unary - instead of negate. --- src/SES/Myers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 1bfbe6808..35f0b4b2f 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -29,7 +29,7 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = - let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (negate m, n) k ] in + let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Array.array (-d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) From 0bf24a86dbe23603eb826107da8135236fd6cb81 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 14:01:33 -0400 Subject: [PATCH 30/46] Force the evaluation of the state array. --- src/SES/Myers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 35f0b4b2f..4879fcb49 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -28,7 +28,7 @@ ses eq as' bs' (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. - searchUpToD d v = + searchUpToD d v = v `seq` let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script From c4f45522af7cbe7b760576c57c1ecf9674c7845b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 14:02:06 -0400 Subject: [PATCH 31/46] :fire: getK. --- src/SES/Myers.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 4879fcb49..fe6a2e832 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -52,9 +52,8 @@ ses eq as' bs' moveDownFrom up else moveRightFrom left - where getK k = v ! k - left = getK (pred k) - up = getK (succ k) + where left = v ! pred k + up = v ! succ k -- | Move downward from a given vertex, inserting the element for the corresponding row. moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y < m then That (bs ! y) : script else script) From 922569fbe6f3c0ed2050983d51b948912b0ec4b6 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 14:46:57 -0400 Subject: [PATCH 32/46] Simplify the search along k logic again. --- src/SES/Myers.hs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index fe6a2e832..8f90a00a6 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -23,40 +23,32 @@ ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = reverse (searchUpToD 0 (Array.array (0, 0) [(0, Endpoint 0 0 [])])) + | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD d v = v `seq` - let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in + let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (lower, upper) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Array.array (-d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) - where isComplete (Endpoint x y _) = x >= n && y >= m + where (lower, upper) = (-(min m d), min n d) + isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. searchAlongK k = slideFrom $! - if d == 0 then - -- The top-left corner. - Endpoint 0 0 [] - else if k == -d || k == -m then + if k == lower || k /= upper && x left < x up then -- The lower/left extent of the search region or edit graph, whichever is smaller. moveDownFrom up - else if k == d || k == n then + else -- The upper/right extent of the search region or edit graph, whichever is smaller. moveRightFrom left - else - -- Somewhere in the interior of the search region and edit graph. - if x left < x up then - moveDownFrom up - else - moveRightFrom left where left = v ! pred k up = v ! succ k -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y < m then That (bs ! y) : script else script) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y >= 0 && y < m then That (bs ! y) : script else script) -- | Move rightward from a given vertex, deleting the element for the corresponding column. moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) From 74c301f7dad35efa16437e20a987087c695e3e59 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:01:49 -0400 Subject: [PATCH 33/46] Define searchAlongK with guard clauses. --- src/SES/Myers.hs | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 8f90a00a6..486325b44 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -23,27 +23,26 @@ ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) + | otherwise = n `seq` m `seq` reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) (n, m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. - searchUpToD d v = v `seq` - let endpoints = searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (lower, upper) k ] in + searchUpToD d v = d `seq` v `seq` + let endpoints = slideFrom . searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Array.array (-d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) - where (lower, upper) = (-(min m d), min n d) - isComplete (Endpoint x y _) = x >= n && y >= m + where isComplete (Endpoint x y _) = x >= n && y >= m - -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any), and sliding down any diagonal edges eagerly. - searchAlongK k = slideFrom $! - if k == lower || k /= upper && x left < x up then - -- The lower/left extent of the search region or edit graph, whichever is smaller. - moveDownFrom up - else - -- The upper/right extent of the search region or edit graph, whichever is smaller. - moveRightFrom left + -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any). + searchAlongK k + | k == -d = moveDownFrom up + | k == d = moveRightFrom left + | k == -m = moveDownFrom up + | k == n = moveRightFrom left + | x left < x up = moveDownFrom up + | otherwise = moveRightFrom left where left = v ! pred k up = v ! succ k From 9fa104fb23b8d10ddeedfb4a6cad0e42b7348a2c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:04:11 -0400 Subject: [PATCH 34/46] Factor out the downward/rightward moves. --- src/SES/Myers.hs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 486325b44..d57efb5cc 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -37,13 +37,15 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any). searchAlongK k - | k == -d = moveDownFrom up - | k == d = moveRightFrom left - | k == -m = moveDownFrom up - | k == n = moveRightFrom left - | x left < x up = moveDownFrom up - | otherwise = moveRightFrom left - where left = v ! pred k + | k == -d = moveDown + | k == d = moveRight + | k == -m = moveDown + | k == n = moveRight + | x left < x up = moveDown + | otherwise = moveRight + where moveRight = moveRightFrom left + moveDown = moveDownFrom up + left = v ! pred k up = v ! succ k -- | Move downward from a given vertex, inserting the element for the corresponding row. From 6947a56501cda0b67ada89217dbaa69ebb798759 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:27:08 -0400 Subject: [PATCH 35/46] Use BangPatterns to denote strictness. --- src/SES/Myers.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index d57efb5cc..8833051fe 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GADTs, ImplicitParams, MultiParamTypeClasses, ScopedTypeVariables #-} +{-# LANGUAGE BangPatterns, GADTs, ImplicitParams, MultiParamTypeClasses, ScopedTypeVariables #-} module SES.Myers ( EditScript , ses @@ -23,12 +23,12 @@ ses :: (Foldable t, Foldable u) => (a -> b -> Bool) -> t a -> u b -> EditScript ses eq as' bs' | null bs = This <$> toList as | null as = That <$> toList bs - | otherwise = n `seq` m `seq` reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) + | otherwise = reverse (searchUpToD 0 (Array.array (1, 1) [(1, Endpoint 0 (-1) [])])) where (as, bs) = (Array.listArray (0, pred n) (toList as'), Array.listArray (0, pred m) (toList bs')) - (n, m) = (length as', length bs') + (!n, !m) = (length as', length bs') -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. - searchUpToD d v = d `seq` v `seq` + searchUpToD !d !v = let endpoints = slideFrom . searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script @@ -36,7 +36,7 @@ ses eq as' bs' where isComplete (Endpoint x y _) = x >= n && y >= m -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any). - searchAlongK k + searchAlongK !k | k == -d = moveDown | k == d = moveRight | k == -m = moveDown From 2ed1926f94f54fef9233823c8d436556bcfe962d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:32:07 -0400 Subject: [PATCH 36/46] :fire: moveDown/moveRight. --- src/SES/Myers.hs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 8833051fe..f4dd044b5 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -37,16 +37,17 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any). searchAlongK !k - | k == -d = moveDown - | k == d = moveRight - | k == -m = moveDown - | k == n = moveRight - | x left < x up = moveDown - | otherwise = moveRight - where moveRight = moveRightFrom left - moveDown = moveDownFrom up - left = v ! pred k - up = v ! succ k + | k == -d = moveDownFrom (v ! succ k) + | k == d = moveRightFrom (v ! pred k) + | k == -m = moveDownFrom (v ! succ k) + | k == n = moveRightFrom (v ! pred k) + | otherwise = + let left = v ! pred k + up = v ! succ k in + if x left < x up then + moveDownFrom up + else + moveRightFrom left -- | Move downward from a given vertex, inserting the element for the corresponding row. moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y >= 0 && y < m then That (bs ! y) : script else script) From 8131dfaf756c38f66d386ebaa3e9062fdaeb5e74 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:33:06 -0400 Subject: [PATCH 37/46] Move moveDownFrom/moveRightFrom/slideFrom out a scope. --- src/SES/Myers.hs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index f4dd044b5..97d379941 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -49,15 +49,15 @@ ses eq as' bs' else moveRightFrom left - -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y >= 0 && y < m then That (bs ! y) : script else script) + -- | Move downward from a given vertex, inserting the element for the corresponding row. + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y >= 0 && y < m then That (bs ! y) : script else script) - -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) + -- | Move rightward from a given vertex, deleting the element for the corresponding column. + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) - -- | Slide down any diagonal edges from a given vertex. - slideFrom (Endpoint x y script) - | inRange (Array.bounds as) x, a <- as ! x - , inRange (Array.bounds bs) y, b <- bs ! y - , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) - | otherwise = Endpoint x y script + -- | Slide down any diagonal edges from a given vertex. + slideFrom (Endpoint x y script) + | inRange (Array.bounds as) x, a <- as ! x + , inRange (Array.bounds bs) y, b <- bs ! y + , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) + | otherwise = Endpoint x y script From 139c9513d18060cee274e75036403e56be807839 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:33:26 -0400 Subject: [PATCH 38/46] Align moveDownFrom/moveRightFrom. --- src/SES/Myers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 97d379941..32a720fb2 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -53,7 +53,7 @@ ses eq as' bs' moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y >= 0 && y < m then That (bs ! y) : script else script) -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) From 9337631149de65873ae9d2c8fd74c0a77d478ce4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:46:36 -0400 Subject: [PATCH 39/46] Safe array subscripting. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This halves our heap consumption. I don’t know how. --- src/SES/Myers.hs | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 32a720fb2..491426a24 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -4,7 +4,6 @@ module SES.Myers , ses ) where -import Data.Array ((!)) import qualified Data.Array as Array import Data.Ix import Data.These @@ -37,27 +36,36 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any). searchAlongK !k - | k == -d = moveDownFrom (v ! succ k) - | k == d = moveRightFrom (v ! pred k) - | k == -m = moveDownFrom (v ! succ k) - | k == n = moveRightFrom (v ! pred k) + | k == -d = moveDownFrom (v Array.! succ k) + | k == d = moveRightFrom (v Array.! pred k) + | k == -m = moveDownFrom (v Array.! succ k) + | k == n = moveRightFrom (v Array.! pred k) | otherwise = - let left = v ! pred k - up = v ! succ k in + let left = v Array.! pred k + up = v Array.! succ k in if x left < x up then moveDownFrom up else moveRightFrom left -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) (if y >= 0 && y < m then That (bs ! y) : script else script) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) $ case bs ! y of + Just b -> That b : script + _ -> script -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y (if x < n then This (as ! x) : script else script) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y $ case as ! x of + Just a -> This a : script + _ -> script -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) - | inRange (Array.bounds as) x, a <- as ! x - , inRange (Array.bounds bs) y, b <- bs ! y + | Just a <- as ! x + , Just b <- bs ! y , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) | otherwise = Endpoint x y script + + +(!) :: Ix i => Array.Array i a -> i -> Maybe a +(!) v i | inRange (Array.bounds v) i = Just $! v Array.! i + | otherwise = Nothing From 4cdd66b12b6f2bee4326dcdabaf6bfd3e4446fd8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 15:50:56 -0400 Subject: [PATCH 40/46] Mark moveDownFrom and moveRightFrom for inlining. --- src/SES/Myers.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 491426a24..7ddb73560 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -52,11 +52,13 @@ ses eq as' bs' moveDownFrom (Endpoint x y script) = Endpoint x (succ y) $ case bs ! y of Just b -> That b : script _ -> script + {-# INLINE moveDownFrom #-} -- | Move rightward from a given vertex, deleting the element for the corresponding column. moveRightFrom (Endpoint x y script) = Endpoint (succ x) y $ case as ! x of Just a -> This a : script _ -> script + {-# INLINE moveRightFrom #-} -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) From 676cfaa68ec8797c64db2fbac932e1694d084920 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 16:02:56 -0400 Subject: [PATCH 41/46] Try to inline !. --- src/SES/Myers.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 7ddb73560..c5b09e083 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -71,3 +71,4 @@ ses eq as' bs' (!) :: Ix i => Array.Array i a -> i -> Maybe a (!) v i | inRange (Array.bounds v) i = Just $! v Array.! i | otherwise = Nothing +{-# INLINE (!) #-} From d822c138b3235fc0620caa2aad401eb86684aade Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 16:06:20 -0400 Subject: [PATCH 42/46] Use bang patterns for strictness. --- src/SES/Myers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index c5b09e083..ce3c7f596 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -69,6 +69,6 @@ ses eq as' bs' (!) :: Ix i => Array.Array i a -> i -> Maybe a -(!) v i | inRange (Array.bounds v) i = Just $! v Array.! i +(!) v i | inRange (Array.bounds v) i, !a <- v Array.! i = Just a | otherwise = Nothing {-# INLINE (!) #-} From a084544487c13b663ee5708dbb27a768b66e5205 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 16:07:42 -0400 Subject: [PATCH 43/46] Evaluate endpoints eagerly. --- src/SES/Myers.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index ce3c7f596..0c68a3953 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -28,7 +28,7 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script up to a given proposed edit distance, building on the results of previous searches. searchUpToD !d !v = - let endpoints = slideFrom . searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in + let !endpoints = slideFrom . searchAlongK <$> [ k | k <- [-d, -d + 2 .. d], inRange (-m, n) k ] in case find isComplete endpoints of Just (Endpoint _ _ script) -> script _ -> searchUpToD (succ d) (Array.array (-d, d) ((\ e@(Endpoint x y _) -> (x - y, e)) <$> endpoints)) From 3bf95e05f428396eda5e3967b9edd6dc9bc0c55c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 16:33:47 -0400 Subject: [PATCH 44/46] Tighten up moveDownFrom/moveRightFrom. --- src/SES/Myers.hs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 0c68a3953..ae6538866 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -49,15 +49,11 @@ ses eq as' bs' moveRightFrom left -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) $ case bs ! y of - Just b -> That b : script - _ -> script + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) $ maybe script ((: script) . That) (bs ! y) {-# INLINE moveDownFrom #-} -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y $ case as ! x of - Just a -> This a : script - _ -> script + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y $ maybe script ((: script) . This) (as ! x) {-# INLINE moveRightFrom #-} -- | Slide down any diagonal edges from a given vertex. From 47a33a7916e3a17069a31fe69b698441e11a396e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 17:02:29 -0400 Subject: [PATCH 45/46] Rename ! to !?. --- src/SES/Myers.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index ae6538866..819c622ea 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -49,22 +49,22 @@ ses eq as' bs' moveRightFrom left -- | Move downward from a given vertex, inserting the element for the corresponding row. - moveDownFrom (Endpoint x y script) = Endpoint x (succ y) $ maybe script ((: script) . That) (bs ! y) + moveDownFrom (Endpoint x y script) = Endpoint x (succ y) $ maybe script ((: script) . That) (bs !? y) {-# INLINE moveDownFrom #-} -- | Move rightward from a given vertex, deleting the element for the corresponding column. - moveRightFrom (Endpoint x y script) = Endpoint (succ x) y $ maybe script ((: script) . This) (as ! x) + moveRightFrom (Endpoint x y script) = Endpoint (succ x) y $ maybe script ((: script) . This) (as !? x) {-# INLINE moveRightFrom #-} -- | Slide down any diagonal edges from a given vertex. slideFrom (Endpoint x y script) - | Just a <- as ! x - , Just b <- bs ! y + | Just a <- as !? x + , Just b <- bs !? y , a `eq` b = slideFrom (Endpoint (succ x) (succ y) (These a b : script)) | otherwise = Endpoint x y script -(!) :: Ix i => Array.Array i a -> i -> Maybe a -(!) v i | inRange (Array.bounds v) i, !a <- v Array.! i = Just a - | otherwise = Nothing -{-# INLINE (!) #-} +(!?) :: Ix i => Array.Array i a -> i -> Maybe a +(!?) v i | inRange (Array.bounds v) i, !a <- v Array.! i = Just a + | otherwise = Nothing +{-# INLINE (!?) #-} From 21508fb6c31c4d697938b1a9d37ceb841fb10fbe Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 21 Jun 2017 17:05:13 -0400 Subject: [PATCH 46/46] Resume the unqualified import of !. --- src/SES/Myers.hs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/SES/Myers.hs b/src/SES/Myers.hs index 819c622ea..b8c32fd2e 100644 --- a/src/SES/Myers.hs +++ b/src/SES/Myers.hs @@ -4,6 +4,7 @@ module SES.Myers , ses ) where +import Data.Array ((!)) import qualified Data.Array as Array import Data.Ix import Data.These @@ -36,13 +37,13 @@ ses eq as' bs' -- Search an edit graph for the shortest edit script along a specific diagonal, moving onto a given diagonal from one of its in-bounds adjacent diagonals (if any). searchAlongK !k - | k == -d = moveDownFrom (v Array.! succ k) - | k == d = moveRightFrom (v Array.! pred k) - | k == -m = moveDownFrom (v Array.! succ k) - | k == n = moveRightFrom (v Array.! pred k) + | k == -d = moveDownFrom (v ! succ k) + | k == d = moveRightFrom (v ! pred k) + | k == -m = moveDownFrom (v ! succ k) + | k == n = moveRightFrom (v ! pred k) | otherwise = - let left = v Array.! pred k - up = v Array.! succ k in + let left = v ! pred k + up = v ! succ k in if x left < x up then moveDownFrom up else @@ -65,6 +66,6 @@ ses eq as' bs' (!?) :: Ix i => Array.Array i a -> i -> Maybe a -(!?) v i | inRange (Array.bounds v) i, !a <- v Array.! i = Just a +(!?) v i | inRange (Array.bounds v) i, !a <- v ! i = Just a | otherwise = Nothing {-# INLINE (!?) #-}