From bb5e061aac6bbca8e199f0016339f0f9855c4aa3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 09:37:25 -0400 Subject: [PATCH 001/120] List the imports from Data.Mergeable explicitly. --- src/Data/Diff.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Diff.hs b/src/Data/Diff.hs index c28d8a65b..a625dc0d2 100644 --- a/src/Data/Diff.hs +++ b/src/Data/Diff.hs @@ -9,7 +9,7 @@ import Data.Foldable (toList) import Data.Functor.Classes import Data.Functor.Foldable hiding (fold) import Data.JSON.Fields -import Data.Mergeable +import Data.Mergeable (Mergeable(sequenceAlt)) import Data.Patch import Data.Record import Data.Term From 08eedd23ccd8889f95efec21300dab7c9f0605c8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 09:37:47 -0400 Subject: [PATCH 002/120] Measure the length of diffPatches instead of using diffCost. --- test/DiffSpec.hs | 2 +- test/InterpreterSpec.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/DiffSpec.hs b/test/DiffSpec.hs index cb60467eb..8a80bbe66 100644 --- a/test/DiffSpec.hs +++ b/test/DiffSpec.hs @@ -19,7 +19,7 @@ spec = parallel $ do \ diff -> diff `shouldBe` (diff :: Diff ListableSyntax (Record '[]) (Record '[])) prop "equal terms produce identity diffs" $ - \ term -> diffCost (diffTerms term (term :: Term ListableSyntax (Record '[]))) `shouldBe` 0 + \ term -> length (diffPatches (diffTerms term (term :: Term ListableSyntax (Record '[])))) `shouldBe` 0 describe "beforeTerm" $ do prop "recovers the before term" $ diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index 5edb9ca55..b72ca8f25 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -28,7 +28,7 @@ spec = parallel $ do prop "constructs zero-cost diffs of equal terms" $ \ a -> let diff = diffTerms a a :: Diff ListableSyntax (Record '[]) (Record '[]) in - diffCost diff `shouldBe` 0 + length (diffPatches diff) `shouldBe` 0 it "produces unbiased insertions within branches" $ let term s = termIn Nil (inj [ termIn Nil (inj (Syntax.Identifier s)) ]) :: Term ListableSyntax (Record '[]) From 02d5bc1518aff8ada1fa583a109698a9ab68bbe9 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 09:38:22 -0400 Subject: [PATCH 003/120] :fire: diffCost. --- src/Data/Diff.hs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Data/Diff.hs b/src/Data/Diff.hs index a625dc0d2..d4bfca093 100644 --- a/src/Data/Diff.hs +++ b/src/Data/Diff.hs @@ -66,10 +66,6 @@ diffSum patchCost = cata $ \ diff -> case diff of Patch patch -> patchCost patch + sum (sum <$> patch) Merge merge -> sum merge --- | The sum of the node count of the diff’s patches. -diffCost :: (Foldable syntax, Functor syntax) => Diff syntax ann1 ann2 -> Int -diffCost = diffSum (const 1) - diffPatch :: Diff syntax ann1 ann2 -> Maybe (Patch (TermF syntax ann1 (Diff syntax ann1 ann2)) (TermF syntax ann2 (Diff syntax ann1 ann2))) diffPatch diff = case unDiff diff of From 8fbdd8e6c3132ca7cda38a7620704094b7a59eba Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 09:39:00 -0400 Subject: [PATCH 004/120] :fire: diffSum. --- src/Data/Diff.hs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Data/Diff.hs b/src/Data/Diff.hs index d4bfca093..297179717 100644 --- a/src/Data/Diff.hs +++ b/src/Data/Diff.hs @@ -61,12 +61,6 @@ merging :: Functor syntax => Term syntax ann -> Diff syntax ann ann merging = cata (\ (In ann syntax) -> mergeF (In (ann, ann) syntax)) -diffSum :: (Foldable syntax, Functor syntax) => (forall a b. Patch a b -> Int) -> Diff syntax ann1 ann2 -> Int -diffSum patchCost = cata $ \ diff -> case diff of - Patch patch -> patchCost patch + sum (sum <$> patch) - Merge merge -> sum merge - - diffPatch :: Diff syntax ann1 ann2 -> Maybe (Patch (TermF syntax ann1 (Diff syntax ann1 ann2)) (TermF syntax ann2 (Diff syntax ann1 ann2))) diffPatch diff = case unDiff diff of Patch patch -> Just patch From 22538876eaa283b2f24f86bad23f1add1b4432bc Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 09:39:08 -0400 Subject: [PATCH 005/120] Explicitly list the exports from Data.Diff. --- src/Data/Diff.hs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Data/Diff.hs b/src/Data/Diff.hs index 297179717..119c1bfd3 100644 --- a/src/Data/Diff.hs +++ b/src/Data/Diff.hs @@ -1,5 +1,21 @@ {-# LANGUAGE DataKinds, RankNTypes, TypeFamilies, TypeOperators #-} -module Data.Diff where +module Data.Diff +( Diff(..) +, DiffF(..) +, replacing +, inserting +, insertF +, deleting +, deleteF +, merge +, mergeF +, merging +, diffPatches +, mergeMaybe +, beforeTerm +, afterTerm +, stripDiff +) where import Data.Aeson import Data.Bifoldable From a4a4074a2284334339a96bec9c5d2729fd3c3109 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 09:40:06 -0400 Subject: [PATCH 006/120] Re-enable the pending tests. --- test/DiffSpec.hs | 10 +++++----- test/IntegrationSpec.hs | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/DiffSpec.hs b/test/DiffSpec.hs index 8a80bbe66..2ee30fe12 100644 --- a/test/DiffSpec.hs +++ b/test/DiffSpec.hs @@ -31,11 +31,11 @@ spec = parallel $ do \ a b -> let diff = diffTerms a b :: Diff ListableSyntax (Record '[]) (Record '[]) in afterTerm diff `shouldBe` Just b - prop "forward permutations are changes" $ pendingWith "https://github.com/github/semantic-diff/issues/1359" - -- \ a -> let wrap = termIn Nil . inj - -- b = wrap [a] - -- c = wrap [a, b] in - -- diffTerms (wrap [a, b, c]) (wrap [c, a, b :: Term ListableSyntax (Record '[])]) `shouldBe` merge (Nil, Nil) (inj [ inserting c, merging a, merging b, deleting c ]) + prop "forward permutations are changes" $ + \ a -> let wrap = termIn Nil . inj + b = wrap [a] + c = wrap [a, b] in + diffTerms (wrap [a, b, c]) (wrap [c, a, b :: Term ListableSyntax (Record '[])]) `shouldBe` merge (Nil, Nil) (inj [ inserting c, merging a, merging b, deleting c ]) prop "backward permutations are changes" $ \ a -> let wrap = termIn Nil . inj diff --git a/test/IntegrationSpec.hs b/test/IntegrationSpec.hs index e64112e8e..4020fb27d 100644 --- a/test/IntegrationSpec.hs +++ b/test/IntegrationSpec.hs @@ -26,7 +26,7 @@ spec = parallel $ do describe "go" $ runTestsIn "test/fixtures/go/" [] describe "javascript" $ runTestsIn "test/fixtures/javascript/" [] - describe "python" $ runTestsIn "test/fixtures/python/" [ ("test/fixtures/python/while-statement.diffB-A.txt", "https://github.com/github/semantic-diff/issues/1359") ] + describe "python" $ runTestsIn "test/fixtures/python/" [] describe "ruby" $ runTestsIn "test/fixtures/ruby/" [] describe "typescript" $ runTestsIn "test/fixtures/typescript/" [] From dd33e479ead0e611135c7d3a80c21d84014e06bb Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:14:21 -0400 Subject: [PATCH 007/120] =?UTF-8?q?Don=E2=80=99t=20parenthesize=20Nil.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Data/Record.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 17c27dcb5..35e94f83a 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -53,7 +53,7 @@ instance (Show h, Show (Record t)) => Show (Record (h ': t)) where showsPrec n (h :. t) = showParen (n > 0) $ showsPrec 1 h . (" :. " <>) . shows t instance Show (Record '[]) where - showsPrec n Nil = showParen (n > 0) ("Nil" <>) + showsPrec _ Nil = showString "Nil" instance (Eq h, Eq (Record t)) => Eq (Record (h ': t)) where (h1 :. t1) == (h2 :. t2) = h1 == h2 && t1 == t2 From e8101cd735538db7e8409b0b50bc049d6d60d791 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:14:41 -0400 Subject: [PATCH 008/120] Correct how we show the tail of records. --- src/Data/Record.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 35e94f83a..6073508d3 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -50,7 +50,7 @@ instance {-# OVERLAPPABLE #-} HasField (field ': fields) field where instance (Show h, Show (Record t)) => Show (Record (h ': t)) where - showsPrec n (h :. t) = showParen (n > 0) $ showsPrec 1 h . (" :. " <>) . shows t + showsPrec n (h :. t) = showParen (n > 0) $ showsPrec 1 h . showString " :. " . showsPrec 0 t instance Show (Record '[]) where showsPrec _ Nil = showString "Nil" From b0fdc0e607245b927884b38944f9e6ff3d02a368 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:27:03 -0400 Subject: [PATCH 009/120] :fire: mergeMaybe. --- src/Data/Diff.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Data/Diff.hs b/src/Data/Diff.hs index 119c1bfd3..11521fa9e 100644 --- a/src/Data/Diff.hs +++ b/src/Data/Diff.hs @@ -11,7 +11,6 @@ module Data.Diff , mergeF , merging , diffPatches -, mergeMaybe , beforeTerm , afterTerm , stripDiff @@ -88,19 +87,15 @@ diffPatches = para $ \ diff -> case diff of Merge merge -> foldMap (toList . diffPatch . fst) merge --- | Merge a diff using a function to provide the Term (in Maybe, to simplify recovery of the before/after state) for every Patch. -mergeMaybe :: (Mergeable syntax, Traversable syntax) => (DiffF syntax ann1 ann2 (Maybe (Term syntax combined)) -> Maybe (Term syntax combined)) -> Diff syntax ann1 ann2 -> Maybe (Term syntax combined) -mergeMaybe = cata - -- | Recover the before state of a diff. beforeTerm :: (Mergeable syntax, Traversable syntax) => Diff syntax ann1 ann2 -> Maybe (Term syntax ann1) -beforeTerm = mergeMaybe $ \ diff -> case diff of +beforeTerm = cata $ \ diff -> case diff of Patch patch -> before patch >>= \ (In a l) -> termIn a <$> sequenceAlt l Merge (In (a, _) l) -> termIn a <$> sequenceAlt l -- | Recover the after state of a diff. afterTerm :: (Mergeable syntax, Traversable syntax) => Diff syntax ann1 ann2 -> Maybe (Term syntax ann2) -afterTerm = mergeMaybe $ \ diff -> case diff of +afterTerm = cata $ \ diff -> case diff of Patch patch -> after patch >>= \ (In b r) -> termIn b <$> sequenceAlt r Merge (In (_, b) r) -> termIn b <$> sequenceAlt r From 662eb608a761c59902836d7779e394290da62017 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:28:59 -0400 Subject: [PATCH 010/120] Align the beforeTerm/afterTerm definitions. --- src/Data/Diff.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Data/Diff.hs b/src/Data/Diff.hs index 11521fa9e..21ae6e527 100644 --- a/src/Data/Diff.hs +++ b/src/Data/Diff.hs @@ -90,14 +90,14 @@ diffPatches = para $ \ diff -> case diff of -- | Recover the before state of a diff. beforeTerm :: (Mergeable syntax, Traversable syntax) => Diff syntax ann1 ann2 -> Maybe (Term syntax ann1) beforeTerm = cata $ \ diff -> case diff of - Patch patch -> before patch >>= \ (In a l) -> termIn a <$> sequenceAlt l - Merge (In (a, _) l) -> termIn a <$> sequenceAlt l + Patch patch -> before patch >>= \ (In a l) -> termIn a <$> sequenceAlt l + Merge (In (a, _) l) -> termIn a <$> sequenceAlt l -- | Recover the after state of a diff. afterTerm :: (Mergeable syntax, Traversable syntax) => Diff syntax ann1 ann2 -> Maybe (Term syntax ann2) afterTerm = cata $ \ diff -> case diff of - Patch patch -> after patch >>= \ (In b r) -> termIn b <$> sequenceAlt r - Merge (In (_, b) r) -> termIn b <$> sequenceAlt r + Patch patch -> after patch >>= \ (In b r) -> termIn b <$> sequenceAlt r + Merge (In (_, b) r) -> termIn b <$> sequenceAlt r -- | Strips the head annotation off a diff annotated with non-empty records. From 0184f577138d4e35c895e493a04e4e44aa3171b9 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:32:32 -0400 Subject: [PATCH 011/120] Define Listable instances for Context & Empty. --- test/Data/Functor/Listable.hs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/Data/Functor/Listable.hs b/test/Data/Functor/Listable.hs index e7a088814..a8fac1800 100644 --- a/test/Data/Functor/Listable.hs +++ b/test/Data/Functor/Listable.hs @@ -318,9 +318,15 @@ instance Listable1 Statement.Return where instance Listable1 Syntax.Context where liftTiers tiers = liftCons2 (liftTiers tiers) tiers Syntax.Context +instance Listable a => Listable (Syntax.Context a) where + tiers = tiers1 + instance Listable1 Syntax.Empty where liftTiers _ = cons0 Syntax.Empty +instance Listable a => Listable (Syntax.Empty a) where + tiers = tiers1 + instance Listable1 Syntax.Identifier where liftTiers _ = cons1 Syntax.Identifier From 395678bcb39b5ded50a557517c1396fb6d072931 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:32:48 -0400 Subject: [PATCH 012/120] Test the Mergeable laws against Context & Empty. --- test/Data/Mergeable/Spec.hs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/Data/Mergeable/Spec.hs b/test/Data/Mergeable/Spec.hs index 3179df746..c6cf28210 100644 --- a/test/Data/Mergeable/Spec.hs +++ b/test/Data/Mergeable/Spec.hs @@ -6,6 +6,7 @@ import Data.Functor.Identity import Data.Functor.Listable import Data.Maybe (catMaybes) import Data.Mergeable +import Data.Syntax import Syntax import Test.Hspec import Test.Hspec.LeanCheck @@ -22,6 +23,12 @@ spec = parallel $ do describe "Identity" $ do withAlternativeInstances sequenceAltLaws (Identity `mapT` tiers :: [Tier (Identity Char)]) withAlternativeInstances mergeLaws (Identity `mapT` tiers :: [Tier (Identity Char)]) + describe "Context" $ do + withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Context Char)]) + withAlternativeInstances mergeLaws (tiers :: [Tier (Context Char)]) + describe "Empty" $ do + withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Empty Char)]) + withAlternativeInstances mergeLaws (tiers :: [Tier (Empty Char)]) describe "Syntax" $ do withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Syntax Char)]) withAlternativeInstances mergeLaws (tiers :: [Tier (Syntax Char)]) From 8245e337ce3d95f5db94583db32a21c4b5ab074d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:36:53 -0400 Subject: [PATCH 013/120] Define a Listable instance for Union. --- test/Data/Functor/Listable.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/Data/Functor/Listable.hs b/test/Data/Functor/Listable.hs index a8fac1800..e4d1d06da 100644 --- a/test/Data/Functor/Listable.hs +++ b/test/Data/Functor/Listable.hs @@ -299,6 +299,9 @@ instance (Listable1 f, Listable1 (Union (g ': fs))) => Listable1 (Union (f ': g instance Listable1 f => Listable1 (Union '[f]) where liftTiers tiers = inj `mapT` ((liftTiers :: [Tier a] -> [Tier (f a)]) tiers) +instance (Listable1 (Union fs), Listable a) => Listable (Union fs a) where + tiers = tiers1 + instance Listable1 Comment.Comment where liftTiers _ = cons1 Comment.Comment From 6a0321c401889c05d296214a76c0d250dddf2692 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:40:33 -0400 Subject: [PATCH 014/120] Test the Mergeable laws against a Union. --- test/Data/Mergeable/Spec.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/Data/Mergeable/Spec.hs b/test/Data/Mergeable/Spec.hs index c6cf28210..f711f7215 100644 --- a/test/Data/Mergeable/Spec.hs +++ b/test/Data/Mergeable/Spec.hs @@ -29,6 +29,9 @@ spec = parallel $ do describe "Empty" $ do withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Empty Char)]) withAlternativeInstances mergeLaws (tiers :: [Tier (Empty Char)]) + describe "Union" $ do + withAlternativeInstances sequenceAltLaws (tiers :: [Tier (ListableSyntax Char)]) + withAlternativeInstances mergeLaws (tiers :: [Tier (ListableSyntax Char)]) describe "Syntax" $ do withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Syntax Char)]) withAlternativeInstances mergeLaws (tiers :: [Tier (Syntax Char)]) From 293731e71b5267d3141f4c2a38f769e26edf9c12 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 10:58:13 -0400 Subject: [PATCH 015/120] Define the Mergeable instance for NonEmpty recursively. This prevents failure in the first element precluding success later on. --- src/Data/Mergeable.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Data/Mergeable.hs b/src/Data/Mergeable.hs index 4cbf8371b..5a4990751 100644 --- a/src/Data/Mergeable.hs +++ b/src/Data/Mergeable.hs @@ -37,7 +37,8 @@ instance Mergeable [] where merge _ [] = pure [] instance Mergeable NonEmpty where - merge f (x:|xs) = (:|) <$> f x <*> merge f xs + merge f (x:|[]) = (:|) <$> f x <*> pure [] + merge f (x1:|x2:xs) = (:|) <$> f x1 <*> merge f (x2 : xs) <|> merge f (x2:|xs) instance Mergeable Maybe where merge f (Just a) = Just <$> f a From 16b86bb9a28e7823e2cd87ffb728c944d287a98e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:10:53 -0400 Subject: [PATCH 016/120] beforeTerm/afterTerm can handle substructural diffs. --- src/Data/Diff.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Data/Diff.hs b/src/Data/Diff.hs index 21ae6e527..7cd689917 100644 --- a/src/Data/Diff.hs +++ b/src/Data/Diff.hs @@ -16,11 +16,12 @@ module Data.Diff , stripDiff ) where +import Control.Applicative ((<|>)) import Data.Aeson import Data.Bifoldable import Data.Bifunctor import Data.Bitraversable -import Data.Foldable (toList) +import Data.Foldable (asum, toList) import Data.Functor.Classes import Data.Functor.Foldable hiding (fold) import Data.JSON.Fields @@ -90,14 +91,14 @@ diffPatches = para $ \ diff -> case diff of -- | Recover the before state of a diff. beforeTerm :: (Mergeable syntax, Traversable syntax) => Diff syntax ann1 ann2 -> Maybe (Term syntax ann1) beforeTerm = cata $ \ diff -> case diff of - Patch patch -> before patch >>= \ (In a l) -> termIn a <$> sequenceAlt l - Merge (In (a, _) l) -> termIn a <$> sequenceAlt l + Patch patch -> (before patch >>= \ (In a l) -> termIn a <$> sequenceAlt l) <|> (after patch >>= asum) + Merge (In (a, _) l) -> termIn a <$> sequenceAlt l -- | Recover the after state of a diff. afterTerm :: (Mergeable syntax, Traversable syntax) => Diff syntax ann1 ann2 -> Maybe (Term syntax ann2) afterTerm = cata $ \ diff -> case diff of - Patch patch -> after patch >>= \ (In b r) -> termIn b <$> sequenceAlt r - Merge (In (_, b) r) -> termIn b <$> sequenceAlt r + Patch patch -> (after patch >>= \ (In b r) -> termIn b <$> sequenceAlt r) <|> (before patch >>= asum) + Merge (In (_, b) r) -> termIn b <$> sequenceAlt r -- | Strips the head annotation off a diff annotated with non-empty records. From c0d833f36118d95797638f16765a6e42558df453 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:53:36 -0400 Subject: [PATCH 017/120] :fire: the Context/Empty tests of the Mergeable laws. --- test/Data/Mergeable/Spec.hs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/Data/Mergeable/Spec.hs b/test/Data/Mergeable/Spec.hs index f711f7215..218adc952 100644 --- a/test/Data/Mergeable/Spec.hs +++ b/test/Data/Mergeable/Spec.hs @@ -23,12 +23,6 @@ spec = parallel $ do describe "Identity" $ do withAlternativeInstances sequenceAltLaws (Identity `mapT` tiers :: [Tier (Identity Char)]) withAlternativeInstances mergeLaws (Identity `mapT` tiers :: [Tier (Identity Char)]) - describe "Context" $ do - withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Context Char)]) - withAlternativeInstances mergeLaws (tiers :: [Tier (Context Char)]) - describe "Empty" $ do - withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Empty Char)]) - withAlternativeInstances mergeLaws (tiers :: [Tier (Empty Char)]) describe "Union" $ do withAlternativeInstances sequenceAltLaws (tiers :: [Tier (ListableSyntax Char)]) withAlternativeInstances mergeLaws (tiers :: [Tier (ListableSyntax Char)]) From 9c1dce7ab960ab27406d427aa310c198f531b4c3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:53:42 -0400 Subject: [PATCH 018/120] :fire: the Context/Empty Listable instances. --- test/Data/Functor/Listable.hs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/Data/Functor/Listable.hs b/test/Data/Functor/Listable.hs index e4d1d06da..93f3f232b 100644 --- a/test/Data/Functor/Listable.hs +++ b/test/Data/Functor/Listable.hs @@ -321,15 +321,9 @@ instance Listable1 Statement.Return where instance Listable1 Syntax.Context where liftTiers tiers = liftCons2 (liftTiers tiers) tiers Syntax.Context -instance Listable a => Listable (Syntax.Context a) where - tiers = tiers1 - instance Listable1 Syntax.Empty where liftTiers _ = cons0 Syntax.Empty -instance Listable a => Listable (Syntax.Empty a) where - tiers = tiers1 - instance Listable1 Syntax.Identifier where liftTiers _ = cons1 Syntax.Identifier From 1652dc1147c666ba64da7a90ad30ea961ef8b7d0 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:55:44 -0400 Subject: [PATCH 019/120] :fire: redundant tests of the lossless property. --- test/DiffSpec.hs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/test/DiffSpec.hs b/test/DiffSpec.hs index 2ee30fe12..9e491de15 100644 --- a/test/DiffSpec.hs +++ b/test/DiffSpec.hs @@ -21,16 +21,6 @@ spec = parallel $ do prop "equal terms produce identity diffs" $ \ term -> length (diffPatches (diffTerms term (term :: Term ListableSyntax (Record '[])))) `shouldBe` 0 - describe "beforeTerm" $ do - prop "recovers the before term" $ - \ a b -> let diff = diffTerms a b :: Diff ListableSyntax (Record '[]) (Record '[]) in - beforeTerm diff `shouldBe` Just a - - describe "afterTerm" $ do - prop "recovers the after term" $ - \ a b -> let diff = diffTerms a b :: Diff ListableSyntax (Record '[]) (Record '[]) in - afterTerm diff `shouldBe` Just b - prop "forward permutations are changes" $ \ a -> let wrap = termIn Nil . inj b = wrap [a] From a23d60dc097a42af10b9b407eeb4578d1f69a43f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:57:36 -0400 Subject: [PATCH 020/120] :fire: a redundant test that equal terms produce identity diffs. --- test/DiffSpec.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/DiffSpec.hs b/test/DiffSpec.hs index 9e491de15..c522a0cc4 100644 --- a/test/DiffSpec.hs +++ b/test/DiffSpec.hs @@ -18,9 +18,6 @@ spec = parallel $ do prop "equality is reflexive" $ \ diff -> diff `shouldBe` (diff :: Diff ListableSyntax (Record '[]) (Record '[])) - prop "equal terms produce identity diffs" $ - \ term -> length (diffPatches (diffTerms term (term :: Term ListableSyntax (Record '[])))) `shouldBe` 0 - prop "forward permutations are changes" $ \ a -> let wrap = termIn Nil . inj b = wrap [a] From 46e2b5fb156aa23d8ade5bc4eb6efc665442f1df Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:58:31 -0400 Subject: [PATCH 021/120] Rename the interpreter spec. --- test/InterpreterSpec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index b72ca8f25..4488e09d3 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -16,7 +16,7 @@ import Test.Hspec.LeanCheck spec :: Spec spec = parallel $ do - describe "interpret" $ do + describe "diffTerms" $ do it "returns a replacement when comparing two unicode equivalent terms" $ let termA = termIn Nil (inj (Syntax.Identifier "t\776")) termB = termIn Nil (inj (Syntax.Identifier "\7831")) in From 6200a00b6e82e072a6a7ebe5041b6dec116369d4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:58:43 -0400 Subject: [PATCH 022/120] Rename the identity diff property. --- test/InterpreterSpec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index 4488e09d3..a1c039c45 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -26,7 +26,7 @@ spec = parallel $ do \ a b -> let diff = diffTerms a b :: Diff ListableSyntax (Record '[]) (Record '[]) in (beforeTerm diff, afterTerm diff) `shouldBe` (Just a, Just b) - prop "constructs zero-cost diffs of equal terms" $ + prop "produces identity diffs for equal terms " $ \ a -> let diff = diffTerms a a :: Diff ListableSyntax (Record '[]) (Record '[]) in length (diffPatches diff) `shouldBe` 0 From 3351c608596c41fb8ea0add45929ef7cb1f6e58e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:58:52 -0400 Subject: [PATCH 023/120] Move the permutation tests into InterpreterSpec. --- test/DiffSpec.hs | 12 ------------ test/InterpreterSpec.hs | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/DiffSpec.hs b/test/DiffSpec.hs index c522a0cc4..f455d6955 100644 --- a/test/DiffSpec.hs +++ b/test/DiffSpec.hs @@ -17,15 +17,3 @@ spec :: Spec spec = parallel $ do prop "equality is reflexive" $ \ diff -> diff `shouldBe` (diff :: Diff ListableSyntax (Record '[]) (Record '[])) - - prop "forward permutations are changes" $ - \ a -> let wrap = termIn Nil . inj - b = wrap [a] - c = wrap [a, b] in - diffTerms (wrap [a, b, c]) (wrap [c, a, b :: Term ListableSyntax (Record '[])]) `shouldBe` merge (Nil, Nil) (inj [ inserting c, merging a, merging b, deleting c ]) - - prop "backward permutations are changes" $ - \ a -> let wrap = termIn Nil . inj - b = wrap [a] - c = wrap [a, b] in - diffTerms (wrap [a, b, c]) (wrap [b, c, a :: Term ListableSyntax (Record '[])]) `shouldBe` merge (Nil, Nil) (inj [ deleting a, merging b, merging c, inserting a ]) diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index a1c039c45..afa5bbb59 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -37,3 +37,15 @@ spec = parallel $ do prop "compares nodes against context" $ \ a b -> diffTerms a (termIn Nil (inj (Syntax.Context (pure b) a))) `shouldBe` insertF (In Nil (inj (Syntax.Context (pure (inserting b)) (merging (a :: Term ListableSyntax (Record '[])))))) + + prop "diffs forward permutations as changes" $ + \ a -> let wrap = termIn Nil . inj + b = wrap [a] + c = wrap [a, b] in + diffTerms (wrap [a, b, c]) (wrap [c, a, b :: Term ListableSyntax (Record '[])]) `shouldBe` merge (Nil, Nil) (inj [ inserting c, merging a, merging b, deleting c ]) + + prop "diffs backward permutations as changes" $ + \ a -> let wrap = termIn Nil . inj + b = wrap [a] + c = wrap [a, b] in + diffTerms (wrap [a, b, c]) (wrap [b, c, a :: Term ListableSyntax (Record '[])]) `shouldBe` merge (Nil, Nil) (inj [ deleting a, merging b, merging c, inserting a ]) From 3462892131b08b9a16b3d2ec8c18825edbbd1ff2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 11:59:19 -0400 Subject: [PATCH 024/120] :fire: redundant imports. --- test/DiffSpec.hs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/DiffSpec.hs b/test/DiffSpec.hs index f455d6955..75f49c11b 100644 --- a/test/DiffSpec.hs +++ b/test/DiffSpec.hs @@ -2,14 +2,8 @@ module DiffSpec where import Data.Diff -import Data.Functor.Both -import Data.Functor.Foldable (cata) import Data.Functor.Listable (ListableSyntax) import Data.Record -import Data.Term -import Data.Union -import Interpreter -import RWS import Test.Hspec import Test.Hspec.LeanCheck From 0fb84d5ff387e3a861332ae4a8108d6208547833 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 12:20:22 -0400 Subject: [PATCH 025/120] Filter the sets of candidates by comparability. --- src/RWS.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index f67b0a473..186976c5e 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -159,16 +159,15 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB term) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped canCompare (termsWithinMoveBoundsFrom previous unmappedA) kdTreeA term + foundA@(UnmappedTerm i _ a) <- nearestUnmapped canCompare (IntMap.filterWithKey (\ k (UnmappedTerm _ _ a) -> isInMoveBounds previous k && canCompareTerms canCompare a b) unmappedA) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (flip canCompare) (termsWithinMoveBoundsFrom (pred j) unmappedB) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (flip canCompare) (IntMap.filterWithKey (\ k (UnmappedTerm _ _ b) -> isInMoveBounds (pred j) k && canCompareTerms canCompare a b) unmappedB) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') guard (canCompareTerms canCompare a b) pure $! do put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) pure (These i j, These a b) - where termsWithinMoveBoundsFrom bound = IntMap.filterWithKey (\ k _ -> isInMoveBounds bound k) isInMoveBounds :: Int -> Int -> Bool isInMoveBounds previous i = previous < i && i < previous + defaultMoveBound From b41fa03a3e4f20c2cab180db4a462d160be20f36 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 12:20:55 -0400 Subject: [PATCH 026/120] :fire: a redundant comparability check. --- src/RWS.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 186976c5e..9235c2e0e 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -164,7 +164,6 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do UnmappedTerm j' _ _ <- nearestUnmapped (flip canCompare) (IntMap.filterWithKey (\ k (UnmappedTerm _ _ b) -> isInMoveBounds (pred j) k && canCompareTerms canCompare a b) unmappedB) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') - guard (canCompareTerms canCompare a b) pure $! do put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) pure (These i j, These a b) From 598901b8daf2831e02a14b6b9ddf6713316682da Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 12:22:00 -0400 Subject: [PATCH 027/120] :fire: the comparability test for the approximate edit distance computation. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 9235c2e0e..f024a5a2a 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -184,7 +184,7 @@ nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. nearestUnmapped canCompare unmapped tree key = listToMaybe (sortOn approximateEditDistance candidates) where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) - approximateEditDistance = editDistanceIfComparable (flip canCompare) (term key) . term + approximateEditDistance = editDistanceUpTo defaultM . These (term key) . term editDistanceIfComparable :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 From 8cd4c596bdbb7acd9717f8d0513f9b307447b033 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 12:22:14 -0400 Subject: [PATCH 028/120] :fire: editDistanceIfComparable. --- src/RWS.hs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index f024a5a2a..210d90e42 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -186,15 +186,6 @@ nearestUnmapped canCompare unmapped tree key = listToMaybe (sortOn approximateEd where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) approximateEditDistance = editDistanceUpTo defaultM . These (term key) . term -editDistanceIfComparable :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax ann1 ann2 - -> Term syntax ann1 - -> Term syntax ann2 - -> Int -editDistanceIfComparable canCompare a b = if canCompareTerms canCompare a b - then editDistanceUpTo defaultM (These a b) - else maxBound - defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int defaultD = 15 defaultL = 2 From e9f7e7e09e9a849b0a42c9e43e7cec3b55b03748 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 12:23:12 -0400 Subject: [PATCH 029/120] =?UTF-8?q?Don=E2=80=99t=20pass=20the=20comparabil?= =?UTF-8?q?ity=20relation=20to=20nearestUnmapped.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RWS.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 210d90e42..012205750 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -159,9 +159,9 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB term) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped canCompare (IntMap.filterWithKey (\ k (UnmappedTerm _ _ a) -> isInMoveBounds previous k && canCompareTerms canCompare a b) unmappedA) kdTreeA term + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (IntMap.filterWithKey (\ k (UnmappedTerm _ _ a) -> isInMoveBounds previous k && canCompareTerms canCompare a b) unmappedA) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (flip canCompare) (IntMap.filterWithKey (\ k (UnmappedTerm _ _ b) -> isInMoveBounds (pred j) k && canCompareTerms canCompare a b) unmappedB) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (IntMap.filterWithKey (\ k (UnmappedTerm _ _ b) -> isInMoveBounds (pred j) k && canCompareTerms canCompare a b) unmappedB) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! do @@ -177,12 +177,11 @@ isInMoveBounds previous i = previous < i && i < previous + defaultMoveBound -- -- cf §4.2 of RWS-Diff nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. - -> UnmappedTerms syntax ann1 -- ^ A set of terms eligible for matching against. + => UnmappedTerms syntax ann1 -- ^ A set of terms eligible for matching against. -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d tree to look up nearest neighbours within. -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. -nearestUnmapped canCompare unmapped tree key = listToMaybe (sortOn approximateEditDistance candidates) +nearestUnmapped unmapped tree key = listToMaybe (sortOn approximateEditDistance candidates) where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) approximateEditDistance = editDistanceUpTo defaultM . These (term key) . term From f721f287b0bf60f1c11edb7f7b91f07569747c79 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 12:40:46 -0400 Subject: [PATCH 030/120] Extract a function for the nearby/comparability filter. --- src/RWS.hs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 012205750..19bfdcf2d 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -159,15 +159,18 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB term) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (IntMap.filterWithKey (\ k (UnmappedTerm _ _ a) -> isInMoveBounds previous k && canCompareTerms canCompare a b) unmappedA) kdTreeA term + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (nearAndComparableTo canCompare previous b unmappedA) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (IntMap.filterWithKey (\ k (UnmappedTerm _ _ b) -> isInMoveBounds (pred j) k && canCompareTerms canCompare a b) unmappedB) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped ((nearAndComparableTo (flip canCompare) (pred j) a) unmappedB) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! do put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) pure (These i j, These a b) +nearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerms syntax ann1 -> UnmappedTerms syntax ann1 +nearAndComparableTo canCompare index term = IntMap.filterWithKey (\ k (UnmappedTerm _ _ term') -> isInMoveBounds index k && canCompareTerms canCompare term' term) + isInMoveBounds :: Int -> Int -> Bool isInMoveBounds previous i = previous < i && i < previous + defaultMoveBound From 1b432d9cbaa1ae31e064e3c53fb246ef0a86c23d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 13:27:01 -0400 Subject: [PATCH 031/120] Inline the definition of isInMoveBounds. --- src/RWS.hs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 19bfdcf2d..882204bad 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -25,7 +25,7 @@ import Data.Functor.Classes import Data.Functor.Foldable import Data.Hashable import qualified Data.IntMap as IntMap -import Data.KdMap.Static hiding (elems, empty) +import Data.KdMap.Static hiding (elems, empty, inRange) import Data.List (sortOn) import Data.Maybe import Data.Record @@ -169,10 +169,7 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do pure (These i j, These a b) nearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerms syntax ann1 -> UnmappedTerms syntax ann1 -nearAndComparableTo canCompare index term = IntMap.filterWithKey (\ k (UnmappedTerm _ _ term') -> isInMoveBounds index k && canCompareTerms canCompare term' term) - -isInMoveBounds :: Int -> Int -> Bool -isInMoveBounds previous i = previous < i && i < previous + defaultMoveBound +nearAndComparableTo canCompare index term = IntMap.filterWithKey (\ k (UnmappedTerm _ _ term') -> k == succ index && canCompareTerms canCompare term' term) -- | Finds the most-similar unmapped term to the passed-in term, if any. -- @@ -188,12 +185,11 @@ nearestUnmapped unmapped tree key = listToMaybe (sortOn approximateEditDistance where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) approximateEditDistance = editDistanceUpTo defaultM . These (term key) . term -defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int +defaultD, defaultL, defaultP, defaultQ :: Int defaultD = 15 defaultL = 2 defaultP = 2 defaultQ = 3 -defaultMoveBound = 2 -- Returns a state (insertion index, old unmapped terms, new unmapped terms), and value of (index, inserted diff), From 3729189aad3c1ceda4b8f227dba4b7946a1dd501 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 13:35:00 -0400 Subject: [PATCH 032/120] Restore defaultMoveBound & interpret it as the number of places following the previous index. --- src/RWS.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 882204bad..70e3dfa27 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -169,7 +169,7 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do pure (These i j, These a b) nearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerms syntax ann1 -> UnmappedTerms syntax ann1 -nearAndComparableTo canCompare index term = IntMap.filterWithKey (\ k (UnmappedTerm _ _ term') -> k == succ index && canCompareTerms canCompare term' term) +nearAndComparableTo canCompare index term = IntMap.filterWithKey (\ k (UnmappedTerm _ _ term') -> inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term) -- | Finds the most-similar unmapped term to the passed-in term, if any. -- @@ -185,11 +185,12 @@ nearestUnmapped unmapped tree key = listToMaybe (sortOn approximateEditDistance where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) approximateEditDistance = editDistanceUpTo defaultM . These (term key) . term -defaultD, defaultL, defaultP, defaultQ :: Int +defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int defaultD = 15 defaultL = 2 defaultP = 2 defaultQ = 3 +defaultMoveBound = 1 -- Returns a state (insertion index, old unmapped terms, new unmapped terms), and value of (index, inserted diff), From 76cfa737e11e4eb636dc8fae181242ab7f4174fb Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 14:14:10 -0400 Subject: [PATCH 033/120] Represent MappedDiff as These of pairs rather than pairs of Theses. --- src/RWS.hs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 70e3dfa27..b4ee07214 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -18,6 +18,7 @@ import Control.Monad.Random import Control.Monad.State.Strict import Data.Align.Generic import Data.Array.Unboxed +import Data.Bifunctor (bimap) import Data.Diff (DiffF(..), deleting, inserting, merge, replacing) import Data.Foldable import Data.Function ((&)) @@ -71,7 +72,7 @@ rws canCompare equivalent as bs = (diffs, remaining) = findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs diffs' = deleteRemaining diffs remaining rwsDiffs = insertMapped mappedDiffs diffs' - in fmap snd rwsDiffs + in bimap snd snd <$> rwsDiffs -- | An IntMap of unmapped terms keyed by their position in a list of terms. type UnmappedTerms syntax ann = IntMap.IntMap (UnmappedTerm syntax ann) @@ -79,7 +80,7 @@ type UnmappedTerms syntax ann = IntMap.IntMap (UnmappedTerm syntax ann) type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2) -- A Diff paired with both its indices -type MappedDiff syntax ann1 ann2 = (These Int Int, Edit syntax ann1 ann2) +type MappedDiff syntax ann1 ann2 = These (Int, Term syntax ann1) (Int, Term syntax ann2) type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2] @@ -94,14 +95,14 @@ deleteRemaining :: Traversable t -> t (UnmappedTerm syntax ann1) -> [MappedDiff syntax ann1 ann2] deleteRemaining diffs unmappedAs = - foldl' (flip insertDiff) diffs ((This . termIndex &&& This . term) <$> unmappedAs) + foldl' (flip insertDiff) diffs (This . (termIndex &&& term) <$> unmappedAs) -- | Inserts an index and diff pair into a list of indices and diffs. insertDiff :: MappedDiff syntax ann1 ann2 -> [MappedDiff syntax ann1 ann2] -> [MappedDiff syntax ann1 ann2] insertDiff inserted [] = [ inserted ] -insertDiff a@(ij1, _) (b@(ij2, _):rest) = case (ij1, ij2) of +insertDiff a (b:rest) = case (bimap fst fst a, bimap fst fst b) of (These i1 i2, These j1 j2) -> if i1 <= j1 && i2 <= j2 then a : b : rest else b : insertDiff a rest (This i, This j) -> if i <= j then a : b : rest else b : insertDiff a rest (That i, That j) -> if i <= j then a : b : rest else b : insertDiff a rest @@ -111,13 +112,13 @@ insertDiff a@(ij1, _) (b@(ij2, _):rest) = case (ij1, ij2) of (This _, That _) -> b : insertDiff a rest (That _, This _) -> b : insertDiff a rest - (These i1 i2, _) -> case break (isThese . fst) rest of + (These i1 i2, _) -> case break isThese rest of (rest, tail) -> let (before, after) = foldr' (combine i1 i2) ([], []) (b : rest) in case after of [] -> before <> insertDiff a tail _ -> before <> (a : after) <> tail where - combine i1 i2 each (before, after) = case fst each of + combine i1 i2 each (before, after) = case bimap fst fst each of This j1 -> if i1 <= j1 then (before, each : after) else (each : before, after) That j2 -> if i2 <= j2 then (before, each : after) else (each : before, after) These _ _ -> (before, after) @@ -166,7 +167,7 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do guard (j == j') pure $! do put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) - pure (These i j, These a b) + pure (These (i, a) (j, b)) nearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerms syntax ann1 -> UnmappedTerms syntax ann1 nearAndComparableTo canCompare index term = IntMap.filterWithKey (\ k (UnmappedTerm _ _ term') -> inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term) @@ -203,7 +204,7 @@ insertion :: Int (MappedDiff syntax ann1 ann2) insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do put (previous, unmappedA, IntMap.delete j unmappedB) - pure (That j, That b) + pure (That (j, b)) genFeaturizedTermsAndDiffs :: Functor syntax => RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) @@ -216,7 +217,7 @@ genFeaturizedTermsAndDiffs sesDiffs = let Mapping _ _ a b c d = foldl' combine ( where combine (Mapping counterA counterB as bs mappedDiffs allDiffs) diff = case diff of This term -> Mapping (succ counterA) counterB (featurize counterA term : as) bs mappedDiffs (None : allDiffs) That term -> Mapping counterA (succ counterB) as (featurize counterB term : bs) mappedDiffs (RWS.Term (featurize counterB term) : allDiffs) - These a b -> Mapping (succ counterA) (succ counterB) as bs ((These counterA counterB, These a b) : mappedDiffs) (Index counterA : allDiffs) + These a b -> Mapping (succ counterA) (succ counterB) as bs ((These (counterA, a) (counterB, b)) : mappedDiffs) (Index counterA : allDiffs) data Mapping syntax ann1 ann2 = Mapping From 6bd44acdda65725d394877177057a8cb5832acc5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 14:37:11 -0400 Subject: [PATCH 034/120] Chain the high-level flow. --- src/RWS.hs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index b4ee07214..dabea1af1 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -66,13 +66,14 @@ rws :: (Foldable syntax, Functor syntax, GAlign syntax) rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] -rws canCompare equivalent as bs = - let sesDiffs = ses equivalent as bs - (featureAs, featureBs, mappedDiffs, allDiffs) = genFeaturizedTermsAndDiffs sesDiffs - (diffs, remaining) = findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs - diffs' = deleteRemaining diffs remaining - rwsDiffs = insertMapped mappedDiffs diffs' - in bimap snd snd <$> rwsDiffs +rws canCompare equivalent as bs + = ses equivalent as bs + & genFeaturizedTermsAndDiffs + & \ (featureAs, featureBs, mappedDiffs, allDiffs) -> + findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs + & uncurry deleteRemaining + & insertMapped mappedDiffs + & fmap (bimap snd snd) -- | An IntMap of unmapped terms keyed by their position in a list of terms. type UnmappedTerms syntax ann = IntMap.IntMap (UnmappedTerm syntax ann) From 1dcc904639446e97f8cb0f20d9ae41e3302ccbb5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 14:45:11 -0400 Subject: [PATCH 035/120] Define deleteRemaining in terms of insertMapped. --- src/RWS.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index dabea1af1..0e784b1d9 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -95,8 +95,7 @@ deleteRemaining :: Traversable t => [MappedDiff syntax ann1 ann2] -> t (UnmappedTerm syntax ann1) -> [MappedDiff syntax ann1 ann2] -deleteRemaining diffs unmappedAs = - foldl' (flip insertDiff) diffs (This . (termIndex &&& term) <$> unmappedAs) +deleteRemaining diffs remaining = insertMapped (This . (termIndex &&& term) <$> remaining) diffs -- | Inserts an index and diff pair into a list of indices and diffs. insertDiff :: MappedDiff syntax ann1 ann2 From c7bf64213d2400140b560bfef98663ee681cd3c7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 14:57:01 -0400 Subject: [PATCH 036/120] Simplify the selection of the minimum term index. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 0e784b1d9..50f2857c7 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -241,7 +241,7 @@ setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record setFeatureVector = setField minimumTermIndex :: [UnmappedTerm syntax ann] -> Int -minimumTermIndex = pred . maybe 0 getMin . getOption . foldMap (Option . Just . Min . termIndex) +minimumTermIndex = pred . fromMaybe 0 . foldr (min . Just . termIndex) Nothing toMap :: [UnmappedTerm syntax ann] -> IntMap.IntMap (UnmappedTerm syntax ann) toMap = IntMap.fromList . fmap (termIndex &&& id) From bd3e1d35ba6c8f18e46f932a07f659acc808ad5b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 14:57:44 -0400 Subject: [PATCH 037/120] The minimum index is the first index. --- src/RWS.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 50f2857c7..894e6d345 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -134,7 +134,7 @@ findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs = (diffs, re (diffs, (_, remaining, _)) = traverse (findNearestNeighbourToDiff' canCompare (toKdMap featureAs) (toKdMap featureBs)) allDiffs & fmap catMaybes & - (`runState` (minimumTermIndex featureAs, toMap featureAs, toMap featureBs)) + (`runState` (pred (maybe 0 termIndex (listToMaybe featureAs)), toMap featureAs, toMap featureBs)) findNearestNeighbourToDiff' :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. @@ -240,9 +240,6 @@ nullFeatureVector = FV $ listArray (0, 0) [0] setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record (FeatureVector ': fields) setFeatureVector = setField -minimumTermIndex :: [UnmappedTerm syntax ann] -> Int -minimumTermIndex = pred . fromMaybe 0 . foldr (min . Just . termIndex) Nothing - toMap :: [UnmappedTerm syntax ann] -> IntMap.IntMap (UnmappedTerm syntax ann) toMap = IntMap.fromList . fmap (termIndex &&& id) From 919862a1a26d513780bbc9ce58dacdd50f935b4e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:04:44 -0400 Subject: [PATCH 038/120] editDistanceUpTo does not need to be computed on pure insertions/deletions. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 894e6d345..5b03c1537 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -184,7 +184,7 @@ nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. nearestUnmapped unmapped tree key = listToMaybe (sortOn approximateEditDistance candidates) where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) - approximateEditDistance = editDistanceUpTo defaultM . These (term key) . term + approximateEditDistance = editDistanceUpTo defaultM (term key) . term defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int defaultD = 15 @@ -320,8 +320,8 @@ defaultM = 10 -- | Return an edit distance as the sum of it's term sizes, given an cutoff and a syntax of terms 'f a'. -- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost. -editDistanceUpTo :: (GAlign syntax, Foldable syntax, Functor syntax) => Integer -> Edit syntax ann1 ann2 -> Int -editDistanceUpTo m = these termSize termSize (\ a b -> diffCost m (approximateDiff a b)) +editDistanceUpTo :: (GAlign syntax, Foldable syntax, Functor syntax) => Integer -> Term syntax ann1 -> Term syntax ann2 -> Int +editDistanceUpTo m a b = diffCost m (approximateDiff a b) where diffCost = flip . cata $ \ diff m -> case diff of _ | m <= 0 -> 0 Merge body -> sum (fmap ($ pred m) body) From dda974e4c5c026af2186a94a759de57507726b41 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:25:55 -0400 Subject: [PATCH 039/120] Express nearAndComparableTo without reference to the keys. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 5b03c1537..ee9af1575 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -170,7 +170,7 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do pure (These (i, a) (j, b)) nearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerms syntax ann1 -> UnmappedTerms syntax ann1 -nearAndComparableTo canCompare index term = IntMap.filterWithKey (\ k (UnmappedTerm _ _ term') -> inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term) +nearAndComparableTo canCompare index term = IntMap.filter (\ (UnmappedTerm k _ term') -> inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term) -- | Finds the most-similar unmapped term to the passed-in term, if any. -- From 910862060743e78d15c71915bd6ba107537260e8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:29:12 -0400 Subject: [PATCH 040/120] :fire: redundant parens. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index ee9af1575..9cdfc52e2 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -162,7 +162,7 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do -- Look up the nearest unmapped term in `unmappedA`. foundA@(UnmappedTerm i _ a) <- nearestUnmapped (nearAndComparableTo canCompare previous b unmappedA) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped ((nearAndComparableTo (flip canCompare) (pred j) a) unmappedB) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (nearAndComparableTo (flip canCompare) (pred j) a unmappedB) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! do From c1fe9d588ccf78ac3de882d84c385e282661a62a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:32:09 -0400 Subject: [PATCH 041/120] Move the filter back into findNearestNeighbourTo. --- src/RWS.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 9cdfc52e2..cea7605fa 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -160,17 +160,17 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB term) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (nearAndComparableTo canCompare previous b unmappedA) kdTreeA term + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (IntMap.filter (isNearAndComparableTo canCompare previous b) unmappedA) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (nearAndComparableTo (flip canCompare) (pred j) a unmappedB) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (IntMap.filter (isNearAndComparableTo (flip canCompare) (pred j) a) unmappedB) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! do put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) pure (These (i, a) (j, b)) -nearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerms syntax ann1 -> UnmappedTerms syntax ann1 -nearAndComparableTo canCompare index term = IntMap.filter (\ (UnmappedTerm k _ term') -> inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term) +isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool +isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term -- | Finds the most-similar unmapped term to the passed-in term, if any. -- From 52921e876725c06a4991a2e0104c0789fc2111a3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:43:02 -0400 Subject: [PATCH 042/120] Give an alternative definition of nearestUnmapped based on lists of unmapped terms. --- src/RWS.hs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index cea7605fa..493ee6a9a 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -21,13 +21,13 @@ import Data.Array.Unboxed import Data.Bifunctor (bimap) import Data.Diff (DiffF(..), deleting, inserting, merge, replacing) import Data.Foldable -import Data.Function ((&)) +import Data.Function ((&), on) import Data.Functor.Classes import Data.Functor.Foldable import Data.Hashable import qualified Data.IntMap as IntMap import Data.KdMap.Static hiding (elems, empty, inRange) -import Data.List (sortOn) +import Data.List (intersectBy, sortOn) import Data.Maybe import Data.Record import Data.Semigroup hiding (First(..)) @@ -186,6 +186,20 @@ nearestUnmapped unmapped tree key = listToMaybe (sortOn approximateEditDistance where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) approximateEditDistance = editDistanceUpTo defaultM (term key) . term +-- | Finds the most-similar unmapped term to the passed-in term, if any. +-- +-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance. +-- +-- cf §4.2 of RWS-Diff +nearestUnmapped' :: (Foldable syntax, Functor syntax, GAlign syntax) + => [UnmappedTerm syntax ann1] -- ^ A set of terms eligible for matching against. + -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d tree to look up nearest neighbours within. + -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. + -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. +nearestUnmapped' unmapped tree key = listToMaybe (sortOn approximateEditDistance candidates) + where candidates = intersectBy ((==) `on` termIndex) unmapped (fmap snd (kNearest tree defaultL (feature key))) + approximateEditDistance = editDistanceUpTo defaultM (term key) . term + defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int defaultD = 15 defaultL = 2 From 2f205739916a08737789da390f4fae55f06d1a9c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:43:22 -0400 Subject: [PATCH 043/120] Give an alternative definition of findNearestNeighbourTo based on lists of unmapped terms. --- src/RWS.hs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/RWS.hs b/src/RWS.hs index 493ee6a9a..30f9ddf88 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -169,6 +169,30 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) pure (These (i, a) (j, b)) +findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) + => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. + -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) + -> KdMap Double FeatureVector (UnmappedTerm syntax ann2) + -> Int + -> [UnmappedTerm syntax ann1] + -> [UnmappedTerm syntax ann2] + -> [MappedDiff syntax ann1 ann2] +findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go + where go _ [] [] = [] + go _ as [] = This . (termIndex &&& term) <$> as + go _ [] bs = That . (termIndex &&& term) <$> bs + go previous unmappedA unmappedB@(termB@(UnmappedTerm j _ b) : restUnmappedB) = + fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do + -- Look up the nearest unmapped term in `unmappedA`. + foundA@(UnmappedTerm i _ a) <- nearestUnmapped' (filter (canCompareTerms (flip canCompare) b . term) (takeWhile (inRange (succ previous, previous + defaultMoveBound) . termIndex) unmappedA)) kdTreeA termB + -- Look up the nearest `foundA` in `unmappedB` + UnmappedTerm j' _ _ <- nearestUnmapped' (filter (canCompareTerms canCompare a . term) (takeWhile (inRange (j, pred j + defaultMoveBound) . termIndex) unmappedB)) kdTreeB foundA + -- Return Nothing if their indices don't match + guard (j == j') + pure $! + -- put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) + These (i, a) (j, b) : go i (tail unmappedA) restUnmappedB + isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term From 226ba36c600b25251219d15e79ac9ff84b9f7ec9 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:49:35 -0400 Subject: [PATCH 044/120] Pass the predicate directly to nearestUnmapped'. --- src/RWS.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 30f9ddf88..a25b3a944 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -21,13 +21,13 @@ import Data.Array.Unboxed import Data.Bifunctor (bimap) import Data.Diff (DiffF(..), deleting, inserting, merge, replacing) import Data.Foldable -import Data.Function ((&), on) +import Data.Function ((&)) import Data.Functor.Classes import Data.Functor.Foldable import Data.Hashable import qualified Data.IntMap as IntMap import Data.KdMap.Static hiding (elems, empty, inRange) -import Data.List (intersectBy, sortOn) +import Data.List (sortOn) import Data.Maybe import Data.Record import Data.Semigroup hiding (First(..)) @@ -181,12 +181,12 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go where go _ [] [] = [] go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs - go previous unmappedA unmappedB@(termB@(UnmappedTerm j _ b) : restUnmappedB) = + go previous unmappedA (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped' (filter (canCompareTerms (flip canCompare) b . term) (takeWhile (inRange (succ previous, previous + defaultMoveBound) . termIndex) unmappedA)) kdTreeA termB + foundA@(UnmappedTerm i _ a) <- nearestUnmapped' (isNearAndComparableTo canCompare previous b) kdTreeA termB -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped' (filter (canCompareTerms canCompare a . term) (takeWhile (inRange (j, pred j + defaultMoveBound) . termIndex) unmappedB)) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped' (isNearAndComparableTo (flip canCompare) (pred j) a) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! @@ -216,12 +216,12 @@ nearestUnmapped unmapped tree key = listToMaybe (sortOn approximateEditDistance -- -- cf §4.2 of RWS-Diff nearestUnmapped' :: (Foldable syntax, Functor syntax, GAlign syntax) - => [UnmappedTerm syntax ann1] -- ^ A set of terms eligible for matching against. + => (UnmappedTerm syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d tree to look up nearest neighbours within. -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. -nearestUnmapped' unmapped tree key = listToMaybe (sortOn approximateEditDistance candidates) - where candidates = intersectBy ((==) `on` termIndex) unmapped (fmap snd (kNearest tree defaultL (feature key))) +nearestUnmapped' isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) + where candidates = filter isEligible (fmap snd (kNearest tree defaultL (feature key))) approximateEditDistance = editDistanceUpTo defaultM (term key) . term defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int From 76863d9e520e39ad86ba426a5843539efd4223f7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:50:06 -0400 Subject: [PATCH 045/120] Use the fmap operator. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index a25b3a944..e9dcc228c 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -221,7 +221,7 @@ nearestUnmapped' :: (Foldable syntax, Functor syntax, GAlign syntax) -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. nearestUnmapped' isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) - where candidates = filter isEligible (fmap snd (kNearest tree defaultL (feature key))) + where candidates = filter isEligible (snd <$> kNearest tree defaultL (feature key)) approximateEditDistance = editDistanceUpTo defaultM (term key) . term defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int From dcb4f256c128c16de628fa2a34ac43083b7c1050 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 15:54:09 -0400 Subject: [PATCH 046/120] Use the predicate definition of nearestUnmapped. --- src/RWS.hs | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index e9dcc228c..34750b220 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -160,9 +160,9 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB term) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (IntMap.filter (isNearAndComparableTo canCompare previous b) unmappedA) kdTreeA term + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare previous b) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (IntMap.filter (isNearAndComparableTo (flip canCompare) (pred j) a) unmappedB) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) (pred j) a) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! do @@ -184,9 +184,9 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go go previous unmappedA (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped' (isNearAndComparableTo canCompare previous b) kdTreeA termB + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare previous b) kdTreeA termB -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped' (isNearAndComparableTo (flip canCompare) (pred j) a) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) (pred j) a) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! @@ -202,25 +202,11 @@ isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange ( -- -- cf §4.2 of RWS-Diff nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) - => UnmappedTerms syntax ann1 -- ^ A set of terms eligible for matching against. + => (UnmappedTerm syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d tree to look up nearest neighbours within. -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. -nearestUnmapped unmapped tree key = listToMaybe (sortOn approximateEditDistance candidates) - where candidates = toList (IntMap.intersection unmapped (toMap (fmap snd (kNearest tree defaultL (feature key))))) - approximateEditDistance = editDistanceUpTo defaultM (term key) . term - --- | Finds the most-similar unmapped term to the passed-in term, if any. --- --- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance. --- --- cf §4.2 of RWS-Diff -nearestUnmapped' :: (Foldable syntax, Functor syntax, GAlign syntax) - => (UnmappedTerm syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. - -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d tree to look up nearest neighbours within. - -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. - -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. -nearestUnmapped' isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) +nearestUnmapped isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) where candidates = filter isEligible (snd <$> kNearest tree defaultL (feature key)) approximateEditDistance = editDistanceUpTo defaultM (term key) . term From 192b7975768195bef742c5edff0315c16ea93522 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:05:58 -0400 Subject: [PATCH 047/120] =?UTF-8?q?Fail=20fast=20when=20we=E2=80=99ve=20ma?= =?UTF-8?q?pped=20everything.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RWS.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 34750b220..10102dd07 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -26,7 +26,7 @@ import Data.Functor.Classes import Data.Functor.Foldable import Data.Hashable import qualified Data.IntMap as IntMap -import Data.KdMap.Static hiding (elems, empty, inRange) +import Data.KdMap.Static hiding (elems, empty, inRange, null) import Data.List (sortOn) import Data.Maybe import Data.Record @@ -159,6 +159,7 @@ findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB term) $ do + guard (not (null unmappedA)) -- Look up the nearest unmapped term in `unmappedA`. foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare previous b) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` From 9a61d32ae0e9340593a21de6fcdb583b75d8e457 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:08:19 -0400 Subject: [PATCH 048/120] Add the mappedness condition to the predicate. --- src/RWS.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 10102dd07..7f5d912d8 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -161,9 +161,9 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do fromMaybe (insertion previous unmappedA unmappedB term) $ do guard (not (null unmappedA)) -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare previous b) kdTreeA term + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= fst (IntMap.findMin unmappedA)) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) (pred j) a) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (\ b -> isNearAndComparableTo (flip canCompare) (pred j) a b && termIndex b >= j) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! do From 95cba6a24421b3709c78220863c9ad2f692f895a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:09:56 -0400 Subject: [PATCH 049/120] Find the min element once, early. --- src/RWS.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 7f5d912d8..95015b1c8 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -160,8 +160,9 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do (previous, unmappedA, unmappedB) <- get fromMaybe (insertion previous unmappedA unmappedB term) $ do guard (not (null unmappedA)) + let (minA, _) = IntMap.findMin unmappedA -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= fst (IntMap.findMin unmappedA)) kdTreeA term + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= minA) kdTreeA term -- Look up the nearest `foundA` in `unmappedB` UnmappedTerm j' _ _ <- nearestUnmapped (\ b -> isNearAndComparableTo (flip canCompare) (pred j) a b && termIndex b >= j) kdTreeB foundA -- Return Nothing if their indices don't match From e57b96c1de88110e57e38eb32bc436a0fc20acd5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:11:37 -0400 Subject: [PATCH 050/120] Bind the tail of the list. --- src/RWS.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 95015b1c8..d9efd1ba1 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -183,7 +183,7 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go where go _ [] [] = [] go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs - go previous unmappedA (termB@(UnmappedTerm j _ b) : restUnmappedB) = + go previous unmappedA@(_ : restUnmappedA) (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare previous b) kdTreeA termB @@ -193,7 +193,7 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go guard (j == j') pure $! -- put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) - These (i, a) (j, b) : go i (tail unmappedA) restUnmappedB + These (i, a) (j, b) : go i restUnmappedA restUnmappedB isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term From 44e58494afdec5cd5b725470616ad3ef223e0688 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:12:38 -0400 Subject: [PATCH 051/120] Add the mappedness conditions to the alternative definition. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index d9efd1ba1..ed5eab0fd 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -183,12 +183,12 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go where go _ [] [] = [] go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs - go previous unmappedA@(_ : restUnmappedA) (termB@(UnmappedTerm j _ b) : restUnmappedB) = + go previous unmappedA@(UnmappedTerm minA _ _ : restUnmappedA) (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare previous b) kdTreeA termB + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= minA) kdTreeA termB -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) (pred j) a) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (\ b -> isNearAndComparableTo (flip canCompare) (pred j) a b && termIndex b >= j) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! From 2be26f291a2c706a8d62de16120e484a64b0c7db Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:13:46 -0400 Subject: [PATCH 052/120] :fire: an obsolete commented out line. --- src/RWS.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index ed5eab0fd..db105e3b9 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -192,7 +192,6 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go -- Return Nothing if their indices don't match guard (j == j') pure $! - -- put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) These (i, a) (j, b) : go i restUnmappedA restUnmappedB isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool From 39a1c46f6ae7dfe83cace2bb8fc371d58ab7d41b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:19:35 -0400 Subject: [PATCH 053/120] Delete elements before the mapped element. --- src/RWS.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index db105e3b9..76bcda4d1 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -183,7 +183,7 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go where go _ [] [] = [] go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs - go previous unmappedA@(UnmappedTerm minA _ _ : restUnmappedA) (termB@(UnmappedTerm j _ b) : restUnmappedB) = + go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= minA) kdTreeA termB @@ -192,7 +192,8 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go -- Return Nothing if their indices don't match guard (j == j') pure $! - These (i, a) (j, b) : go i restUnmappedA restUnmappedB + let (deleted, _ : restUnmappedA) = span ((< i) . termIndex) unmappedA in + (This . (termIndex &&& term) <$> deleted) <> (These (i, a) (j, b) : go i restUnmappedA restUnmappedB) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term From 9c37f641dacd6e0d845a9c62ef919ba4ec7e1d69 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:43:17 -0400 Subject: [PATCH 054/120] Define a function mapping contiguous chunks of non-equivalent terms. --- src/RWS.hs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/RWS.hs b/src/RWS.hs index 76bcda4d1..d18f66de5 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -232,6 +232,20 @@ insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do put (previous, unmappedA, IntMap.delete j unmappedB) pure (That (j, b)) +mapContiguous :: Functor syntax + => RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) + -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] +mapContiguous = go 0 0 [] + where go _ _ chunk [] = mapChunk chunk [] + go i j chunk (first : rest) = case first of + This a -> go (succ i) j (Left (i, a) : chunk) rest + That b -> go i (succ j) (Right (j, b) : chunk) rest + These a b -> mapChunk chunk (These (i, a) (j, b) : go (succ i) (succ j) [] rest) + mapChunk [] rest = rest + mapChunk [only] rest = either This That only : rest + mapChunk more rest = (either This That <$> reverse more) <> rest + + genFeaturizedTermsAndDiffs :: Functor syntax => RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> ( [UnmappedTerm syntax (Record (FeatureVector ': fields1))] From 76d3373d39c345e33d66c7eb0c26c2660d1be8d3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:45:25 -0400 Subject: [PATCH 055/120] mapContiguous takes a ComparabilityRelation. --- src/RWS.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index d18f66de5..bbb58b12d 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -233,9 +233,10 @@ insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do pure (That (j, b)) mapContiguous :: Functor syntax - => RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) + => ComparabilityRelation syntax ann1 ann2 + -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] -mapContiguous = go 0 0 [] +mapContiguous canCompare = go 0 0 [] where go _ _ chunk [] = mapChunk chunk [] go i j chunk (first : rest) = case first of This a -> go (succ i) j (Left (i, a) : chunk) rest From 6134a24d8d48a6f5357f5ce8d602e1cc55380fc1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 16:56:15 -0400 Subject: [PATCH 056/120] Map the ls and rs separately. --- src/RWS.hs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index bbb58b12d..bb1888263 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -236,15 +236,15 @@ mapContiguous :: Functor syntax => ComparabilityRelation syntax ann1 ann2 -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] -mapContiguous canCompare = go 0 0 [] - where go _ _ chunk [] = mapChunk chunk [] - go i j chunk (first : rest) = case first of - This a -> go (succ i) j (Left (i, a) : chunk) rest - That b -> go i (succ j) (Right (j, b) : chunk) rest - These a b -> mapChunk chunk (These (i, a) (j, b) : go (succ i) (succ j) [] rest) - mapChunk [] rest = rest - mapChunk [only] rest = either This That only : rest - mapChunk more rest = (either This That <$> reverse more) <> rest +mapContiguous canCompare = go 0 0 [] [] + where go _ _ ls rs [] = mapChunk ls rs + go i j ls rs (first : rest) = case first of + This a -> go (succ i) j ((i, a) : ls) rs rest + That b -> go i (succ j) ls ((j, b) : rs) rest + These a b -> mapChunk ls rs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) + mapChunk ls [] = This <$> reverse ls + mapChunk [] rs = That <$> reverse rs + mapChunk ls rs = (This <$> reverse ls) <> (That <$> reverse rs) genFeaturizedTermsAndDiffs :: Functor syntax From ed6be539aa31e058d39ea76b074818033f3a0ef5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 20:41:10 -0400 Subject: [PATCH 057/120] =?UTF-8?q?Correct=20the=20ComparabilityRelation?= =?UTF-8?q?=E2=80=99s=20type.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index bb1888263..986de9f1c 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -233,7 +233,7 @@ insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do pure (That (j, b)) mapContiguous :: Functor syntax - => ComparabilityRelation syntax ann1 ann2 + => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] mapContiguous canCompare = go 0 0 [] [] From 72f6981f15beaaeead4f1e7d451abe4d8a32d91d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 20:41:21 -0400 Subject: [PATCH 058/120] Run RWS over the contiguous chunks. --- src/RWS.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 986de9f1c..9c00fb529 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -232,19 +232,19 @@ insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do put (previous, unmappedA, IntMap.delete j unmappedB) pure (That (j, b)) -mapContiguous :: Functor syntax +mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] mapContiguous canCompare = go 0 0 [] [] where go _ _ ls rs [] = mapChunk ls rs go i j ls rs (first : rest) = case first of - This a -> go (succ i) j ((i, a) : ls) rs rest - That b -> go i (succ j) ls ((j, b) : rs) rest + This a -> go (succ i) j (featurize i a : ls) rs rest + That b -> go i (succ j) ls (featurize j b : rs) rest These a b -> mapChunk ls rs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) - mapChunk ls [] = This <$> reverse ls - mapChunk [] rs = That <$> reverse rs - mapChunk ls rs = (This <$> reverse ls) <> (That <$> reverse rs) + mapChunk ls [] = This . (termIndex &&& term) <$> reverse ls + mapChunk [] rs = That . (termIndex &&& term) <$> reverse rs + mapChunk ls rs = findNearestNeighbourTo' canCompare (toKdMap ls) (toKdMap rs) (termIndex (last ls)) ls rs genFeaturizedTermsAndDiffs :: Functor syntax From 40631d8d00facee58d23693f03020033026c744c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 20:43:22 -0400 Subject: [PATCH 059/120] Define top-level RWS in terms of mapContiguous. --- src/RWS.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/RWS.hs b/src/RWS.hs index 9c00fb529..caf77f963 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -66,6 +66,10 @@ rws :: (Foldable syntax, Functor syntax, GAlign syntax) rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] +rws canCompare equivalent as bs + = ses equivalent as bs + & mapContiguous canCompare + & fmap (bimap snd snd) rws canCompare equivalent as bs = ses equivalent as bs & genFeaturizedTermsAndDiffs From f30029d16de10a28ed168c4b743fdc26a3d36bbf Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 20:50:01 -0400 Subject: [PATCH 060/120] Build up diff lists for contiguous chunks --- src/RWS.hs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index caf77f963..0177664d2 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -240,14 +240,14 @@ mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] -mapContiguous canCompare = go 0 0 [] [] - where go _ _ ls rs [] = mapChunk ls rs +mapContiguous canCompare = go 0 0 id id + where go _ _ ls rs [] = mapChunk (ls []) (rs []) go i j ls rs (first : rest) = case first of - This a -> go (succ i) j (featurize i a : ls) rs rest - That b -> go i (succ j) ls (featurize j b : rs) rest - These a b -> mapChunk ls rs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) - mapChunk ls [] = This . (termIndex &&& term) <$> reverse ls - mapChunk [] rs = That . (termIndex &&& term) <$> reverse rs + This a -> go (succ i) j (ls . (featurize i a :)) rs rest + That b -> go i (succ j) ls (rs . (featurize j b :)) rest + These a b -> mapChunk (ls []) (rs []) <> (These (i, a) (j, b) : go (succ i) (succ j) id id rest) + mapChunk ls [] = This . (termIndex &&& term) <$> ls + mapChunk [] rs = That . (termIndex &&& term) <$> rs mapChunk ls rs = findNearestNeighbourTo' canCompare (toKdMap ls) (toKdMap rs) (termIndex (last ls)) ls rs From 2ca40cad2b945cab2ff6b2819ee8463b6bffc168 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:00:15 -0400 Subject: [PATCH 061/120] Take the index of the first, not last, element. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 0177664d2..2a696824b 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -248,7 +248,7 @@ mapContiguous canCompare = go 0 0 id id These a b -> mapChunk (ls []) (rs []) <> (These (i, a) (j, b) : go (succ i) (succ j) id id rest) mapChunk ls [] = This . (termIndex &&& term) <$> ls mapChunk [] rs = That . (termIndex &&& term) <$> rs - mapChunk ls rs = findNearestNeighbourTo' canCompare (toKdMap ls) (toKdMap rs) (termIndex (last ls)) ls rs + mapChunk ls rs = findNearestNeighbourTo' canCompare (toKdMap ls) (toKdMap rs) (termIndex (head ls)) ls rs genFeaturizedTermsAndDiffs :: Functor syntax From 95b50b2dc4d311fe8b7d52c41d9a1eb3eb2ff07f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:01:38 -0400 Subject: [PATCH 062/120] :fire: a redundant case. --- src/RWS.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 2a696824b..fd8313058 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -184,8 +184,7 @@ findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) -> [UnmappedTerm syntax ann2] -> [MappedDiff syntax ann1 ann2] findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go - where go _ [] [] = [] - go _ as [] = This . (termIndex &&& term) <$> as + where go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do From d5c9a68f7838e73a4615acc03670e21c2503bb61 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:05:30 -0400 Subject: [PATCH 063/120] Handle the short-circuit cases early. --- src/RWS.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index fd8313058..904add26b 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -183,7 +183,9 @@ findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) -> [UnmappedTerm syntax ann1] -> [UnmappedTerm syntax ann2] -> [MappedDiff syntax ann1 ann2] -findNearestNeighbourTo' canCompare kdTreeA kdTreeB = go +findNearestNeighbourTo' _ _ _ _ as [] = This . (termIndex &&& term) <$> as +findNearestNeighbourTo' _ _ _ _ [] bs = That . (termIndex &&& term) <$> bs +findNearestNeighbourTo' canCompare kdTreeA kdTreeB i as bs = go i as bs where go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = From 65f749080974c4c32638a65acd8b1a3bc88329f8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:06:18 -0400 Subject: [PATCH 064/120] =?UTF-8?q?Don=E2=80=99t=20pass=20the=20term=20ind?= =?UTF-8?q?ex=20to=20findNearestNeighbourTo'.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RWS.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 904add26b..641166e52 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -179,13 +179,12 @@ findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -> KdMap Double FeatureVector (UnmappedTerm syntax ann2) - -> Int -> [UnmappedTerm syntax ann1] -> [UnmappedTerm syntax ann2] -> [MappedDiff syntax ann1 ann2] -findNearestNeighbourTo' _ _ _ _ as [] = This . (termIndex &&& term) <$> as -findNearestNeighbourTo' _ _ _ _ [] bs = That . (termIndex &&& term) <$> bs -findNearestNeighbourTo' canCompare kdTreeA kdTreeB i as bs = go i as bs +findNearestNeighbourTo' _ _ _ as [] = This . (termIndex &&& term) <$> as +findNearestNeighbourTo' _ _ _ [] bs = That . (termIndex &&& term) <$> bs +findNearestNeighbourTo' canCompare kdTreeA kdTreeB as bs = go (termIndex (head as)) as bs where go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = @@ -249,7 +248,7 @@ mapContiguous canCompare = go 0 0 id id These a b -> mapChunk (ls []) (rs []) <> (These (i, a) (j, b) : go (succ i) (succ j) id id rest) mapChunk ls [] = This . (termIndex &&& term) <$> ls mapChunk [] rs = That . (termIndex &&& term) <$> rs - mapChunk ls rs = findNearestNeighbourTo' canCompare (toKdMap ls) (toKdMap rs) (termIndex (head ls)) ls rs + mapChunk ls rs = findNearestNeighbourTo' canCompare (toKdMap ls) (toKdMap rs) ls rs genFeaturizedTermsAndDiffs :: Functor syntax From 4e5e1423817516ee0c42b4861f9aa3cb8db564ad Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:07:45 -0400 Subject: [PATCH 065/120] =?UTF-8?q?Don=E2=80=99t=20pass=20the=20k-d=20maps?= =?UTF-8?q?=20to=20findNearestNeighbourTo'.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/RWS.hs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 641166e52..80b6e5264 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -177,14 +177,12 @@ findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. - -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) - -> KdMap Double FeatureVector (UnmappedTerm syntax ann2) -> [UnmappedTerm syntax ann1] -> [UnmappedTerm syntax ann2] -> [MappedDiff syntax ann1 ann2] -findNearestNeighbourTo' _ _ _ as [] = This . (termIndex &&& term) <$> as -findNearestNeighbourTo' _ _ _ [] bs = That . (termIndex &&& term) <$> bs -findNearestNeighbourTo' canCompare kdTreeA kdTreeB as bs = go (termIndex (head as)) as bs +findNearestNeighbourTo' _ as [] = This . (termIndex &&& term) <$> as +findNearestNeighbourTo' _ [] bs = That . (termIndex &&& term) <$> bs +findNearestNeighbourTo' canCompare as bs = go (termIndex (head as)) as bs where go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = @@ -198,6 +196,7 @@ findNearestNeighbourTo' canCompare kdTreeA kdTreeB as bs = go (termIndex (head a pure $! let (deleted, _ : restUnmappedA) = span ((< i) . termIndex) unmappedA in (This . (termIndex &&& term) <$> deleted) <> (These (i, a) (j, b) : go i restUnmappedA restUnmappedB) + (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term @@ -248,7 +247,7 @@ mapContiguous canCompare = go 0 0 id id These a b -> mapChunk (ls []) (rs []) <> (These (i, a) (j, b) : go (succ i) (succ j) id id rest) mapChunk ls [] = This . (termIndex &&& term) <$> ls mapChunk [] rs = That . (termIndex &&& term) <$> rs - mapChunk ls rs = findNearestNeighbourTo' canCompare (toKdMap ls) (toKdMap rs) ls rs + mapChunk ls rs = findNearestNeighbourTo' canCompare ls rs genFeaturizedTermsAndDiffs :: Functor syntax From 4ea2a9dea81e1bd917f6d022b0b369c602731bc7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:10:19 -0400 Subject: [PATCH 066/120] Just reverse the lists. --- src/RWS.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 80b6e5264..bc72f54f4 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -239,15 +239,15 @@ mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] -mapContiguous canCompare = go 0 0 id id - where go _ _ ls rs [] = mapChunk (ls []) (rs []) +mapContiguous canCompare = go 0 0 [] [] + where go _ _ ls rs [] = mapChunk ls rs go i j ls rs (first : rest) = case first of - This a -> go (succ i) j (ls . (featurize i a :)) rs rest - That b -> go i (succ j) ls (rs . (featurize j b :)) rest - These a b -> mapChunk (ls []) (rs []) <> (These (i, a) (j, b) : go (succ i) (succ j) id id rest) - mapChunk ls [] = This . (termIndex &&& term) <$> ls - mapChunk [] rs = That . (termIndex &&& term) <$> rs - mapChunk ls rs = findNearestNeighbourTo' canCompare ls rs + This a -> go (succ i) j (featurize i a : ls) rs rest + That b -> go i (succ j) ls (featurize j b : rs) rest + These a b -> mapChunk ls rs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) + mapChunk ls [] = This . (termIndex &&& term) <$> reverse ls + mapChunk [] rs = That . (termIndex &&& term) <$> reverse rs + mapChunk ls rs = findNearestNeighbourTo' canCompare (reverse ls) (reverse rs) genFeaturizedTermsAndDiffs :: Functor syntax From 01a1ab3b9453206aa337e188e5f5200a2030d3dd Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:16:55 -0400 Subject: [PATCH 067/120] Name the lists consistently. --- src/RWS.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index bc72f54f4..9058d23c1 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -240,14 +240,14 @@ mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] mapContiguous canCompare = go 0 0 [] [] - where go _ _ ls rs [] = mapChunk ls rs - go i j ls rs (first : rest) = case first of - This a -> go (succ i) j (featurize i a : ls) rs rest - That b -> go i (succ j) ls (featurize j b : rs) rest - These a b -> mapChunk ls rs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) - mapChunk ls [] = This . (termIndex &&& term) <$> reverse ls - mapChunk [] rs = That . (termIndex &&& term) <$> reverse rs - mapChunk ls rs = findNearestNeighbourTo' canCompare (reverse ls) (reverse rs) + where go _ _ as bs [] = mapChunk as bs + go i j as bs (first : rest) = case first of + This a -> go (succ i) j (featurize i a : as) bs rest + That b -> go i (succ j) as (featurize j b : bs) rest + These a b -> mapChunk as bs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) + mapChunk as [] = This . (termIndex &&& term) <$> reverse as + mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs + mapChunk as bs = findNearestNeighbourTo' canCompare (reverse as) (reverse bs) genFeaturizedTermsAndDiffs :: Functor syntax From f20d5e41acf90f3d2e34079eb2fe949c20b38a03 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 21:20:24 -0400 Subject: [PATCH 068/120] Correct & align the docs for nearestUnmapped. --- src/RWS.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 9058d23c1..30c659db2 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -207,10 +207,10 @@ isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange ( -- -- cf §4.2 of RWS-Diff nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) - => (UnmappedTerm syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. - -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d tree to look up nearest neighbours within. - -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. - -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term, if any. + => (UnmappedTerm syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. + -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d map to look up nearest neighbours within. + -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. + -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term matched by the predicate, if any. nearestUnmapped isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) where candidates = filter isEligible (snd <$> kNearest tree defaultL (feature key)) approximateEditDistance = editDistanceUpTo defaultM (term key) . term From 6d4067854d99abd50b3c95fa0c2e92ab9ceeccc8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 22:27:40 -0400 Subject: [PATCH 069/120] Correct the initial value for the previous index. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 30c659db2..eac69c608 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -182,7 +182,7 @@ findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) -> [MappedDiff syntax ann1 ann2] findNearestNeighbourTo' _ as [] = This . (termIndex &&& term) <$> as findNearestNeighbourTo' _ [] bs = That . (termIndex &&& term) <$> bs -findNearestNeighbourTo' canCompare as bs = go (termIndex (head as)) as bs +findNearestNeighbourTo' canCompare as bs = go (pred (termIndex (head as))) as bs where go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = From d77aed7c2b2bfe15b89f5814eb3b0358eacabe9c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 22:28:20 -0400 Subject: [PATCH 070/120] :fire: the redundant short-circuit cases. --- src/RWS.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index eac69c608..b5fd33636 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -180,8 +180,6 @@ findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) -> [UnmappedTerm syntax ann1] -> [UnmappedTerm syntax ann2] -> [MappedDiff syntax ann1 ann2] -findNearestNeighbourTo' _ as [] = This . (termIndex &&& term) <$> as -findNearestNeighbourTo' _ [] bs = That . (termIndex &&& term) <$> bs findNearestNeighbourTo' canCompare as bs = go (pred (termIndex (head as))) as bs where go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs From 171decf71f5b1c0df65c99ab4362f849a4ce455b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:00:45 -0400 Subject: [PATCH 071/120] :fire: the old rws code path. --- src/RWS.hs | 146 ++--------------------------------------------------- 1 file changed, 5 insertions(+), 141 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index b5fd33636..8895dde9c 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -25,7 +25,6 @@ import Data.Function ((&)) import Data.Functor.Classes import Data.Functor.Foldable import Data.Hashable -import qualified Data.IntMap as IntMap import Data.KdMap.Static hiding (elems, empty, inRange, null) import Data.List (sortOn) import Data.Maybe @@ -54,9 +53,6 @@ data UnmappedTerm syntax ann = UnmappedTerm , term :: Term syntax ann -- ^ The unmapped term } --- | Either a `term`, an index of a matched term, or nil. -data TermOrIndexOrNone term = Term term | Index {-# UNPACK #-} !Int | None - rws :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool) @@ -70,17 +66,6 @@ rws canCompare equivalent as bs = ses equivalent as bs & mapContiguous canCompare & fmap (bimap snd snd) -rws canCompare equivalent as bs - = ses equivalent as bs - & genFeaturizedTermsAndDiffs - & \ (featureAs, featureBs, mappedDiffs, allDiffs) -> - findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs - & uncurry deleteRemaining - & insertMapped mappedDiffs - & fmap (bimap snd snd) - --- | An IntMap of unmapped terms keyed by their position in a list of terms. -type UnmappedTerms syntax ann = IntMap.IntMap (UnmappedTerm syntax ann) type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2) @@ -89,98 +74,13 @@ type MappedDiff syntax ann1 ann2 = These (Int, Term syntax ann1) (Int, Term synt type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2] -insertMapped :: Foldable t - => t (MappedDiff syntax ann1 ann2) - -> [MappedDiff syntax ann1 ann2] - -> [MappedDiff syntax ann1 ann2] -insertMapped diffs into = foldl' (flip insertDiff) into diffs - -deleteRemaining :: Traversable t - => [MappedDiff syntax ann1 ann2] - -> t (UnmappedTerm syntax ann1) - -> [MappedDiff syntax ann1 ann2] -deleteRemaining diffs remaining = insertMapped (This . (termIndex &&& term) <$> remaining) diffs - --- | Inserts an index and diff pair into a list of indices and diffs. -insertDiff :: MappedDiff syntax ann1 ann2 - -> [MappedDiff syntax ann1 ann2] - -> [MappedDiff syntax ann1 ann2] -insertDiff inserted [] = [ inserted ] -insertDiff a (b:rest) = case (bimap fst fst a, bimap fst fst b) of - (These i1 i2, These j1 j2) -> if i1 <= j1 && i2 <= j2 then a : b : rest else b : insertDiff a rest - (This i, This j) -> if i <= j then a : b : rest else b : insertDiff a rest - (That i, That j) -> if i <= j then a : b : rest else b : insertDiff a rest - (This i, These j _) -> if i <= j then a : b : rest else b : insertDiff a rest - (That i, These _ j) -> if i <= j then a : b : rest else b : insertDiff a rest - - (This _, That _) -> b : insertDiff a rest - (That _, This _) -> b : insertDiff a rest - - (These i1 i2, _) -> case break isThese rest of - (rest, tail) -> let (before, after) = foldr' (combine i1 i2) ([], []) (b : rest) in - case after of - [] -> before <> insertDiff a tail - _ -> before <> (a : after) <> tail - where - combine i1 i2 each (before, after) = case bimap fst fst each of - This j1 -> if i1 <= j1 then (before, each : after) else (each : before, after) - That j2 -> if i2 <= j2 then (before, each : after) else (each : before, after) - These _ _ -> (before, after) - -findNearestNeighboursToDiff :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. - -> [TermOrIndexOrNone (UnmappedTerm syntax ann2)] - -> [UnmappedTerm syntax ann1] - -> [UnmappedTerm syntax ann2] - -> ([MappedDiff syntax ann1 ann2], UnmappedTerms syntax ann1) -findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs = (diffs, remaining) - where - (diffs, (_, remaining, _)) = - traverse (findNearestNeighbourToDiff' canCompare (toKdMap featureAs) (toKdMap featureBs)) allDiffs & - fmap catMaybes & - (`runState` (pred (maybe 0 termIndex (listToMaybe featureAs)), toMap featureAs, toMap featureBs)) - -findNearestNeighbourToDiff' :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. - -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) - -> KdMap Double FeatureVector (UnmappedTerm syntax ann2) - -> TermOrIndexOrNone (UnmappedTerm syntax ann2) - -> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2) - (Maybe (MappedDiff syntax ann1 ann2)) -findNearestNeighbourToDiff' canCompare kdTreeA kdTreeB termThing = case termThing of - None -> pure Nothing - RWS.Term term -> Just <$> findNearestNeighbourTo canCompare kdTreeA kdTreeB term - Index i -> modify' (\ (_, unA, unB) -> (i, unA, unB)) >> pure Nothing - -- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches. findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. - -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) - -> KdMap Double FeatureVector (UnmappedTerm syntax ann2) - -> UnmappedTerm syntax ann2 - -> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2) - (MappedDiff syntax ann1 ann2) -findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do - (previous, unmappedA, unmappedB) <- get - fromMaybe (insertion previous unmappedA unmappedB term) $ do - guard (not (null unmappedA)) - let (minA, _) = IntMap.findMin unmappedA - -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= minA) kdTreeA term - -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (\ b -> isNearAndComparableTo (flip canCompare) (pred j) a b && termIndex b >= j) kdTreeB foundA - -- Return Nothing if their indices don't match - guard (j == j') - pure $! do - put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB) - pure (These (i, a) (j, b)) - -findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. - -> [UnmappedTerm syntax ann1] - -> [UnmappedTerm syntax ann2] - -> [MappedDiff syntax ann1 ann2] -findNearestNeighbourTo' canCompare as bs = go (pred (termIndex (head as))) as bs + -> [UnmappedTerm syntax ann1] + -> [UnmappedTerm syntax ann2] + -> [MappedDiff syntax ann1 ann2] +findNearestNeighbourTo canCompare as bs = go (pred (termIndex (head as))) as bs where go _ as [] = This . (termIndex &&& term) <$> as go _ [] bs = That . (termIndex &&& term) <$> bs go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = @@ -221,18 +121,6 @@ defaultQ = 3 defaultMoveBound = 1 --- Returns a state (insertion index, old unmapped terms, new unmapped terms), and value of (index, inserted diff), --- given a previous index, two sets of umapped terms, and an unmapped term to insert. -insertion :: Int - -> UnmappedTerms syntax ann1 - -> UnmappedTerms syntax ann2 - -> UnmappedTerm syntax ann2 - -> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2) - (MappedDiff syntax ann1 ann2) -insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do - put (previous, unmappedA, IntMap.delete j unmappedB) - pure (That (j, b)) - mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) @@ -245,31 +133,9 @@ mapContiguous canCompare = go 0 0 [] [] These a b -> mapChunk as bs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) mapChunk as [] = This . (termIndex &&& term) <$> reverse as mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs - mapChunk as bs = findNearestNeighbourTo' canCompare (reverse as) (reverse bs) + mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs) -genFeaturizedTermsAndDiffs :: Functor syntax - => RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) - -> ( [UnmappedTerm syntax (Record (FeatureVector ': fields1))] - , [UnmappedTerm syntax (Record (FeatureVector ': fields2))] - , [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] - , [TermOrIndexOrNone (UnmappedTerm syntax (Record (FeatureVector ': fields2)))] - ) -genFeaturizedTermsAndDiffs sesDiffs = let Mapping _ _ a b c d = foldl' combine (Mapping 0 0 [] [] [] []) sesDiffs in (reverse a, reverse b, reverse c, reverse d) - where combine (Mapping counterA counterB as bs mappedDiffs allDiffs) diff = case diff of - This term -> Mapping (succ counterA) counterB (featurize counterA term : as) bs mappedDiffs (None : allDiffs) - That term -> Mapping counterA (succ counterB) as (featurize counterB term : bs) mappedDiffs (RWS.Term (featurize counterB term) : allDiffs) - These a b -> Mapping (succ counterA) (succ counterB) as bs ((These (counterA, a) (counterB, b)) : mappedDiffs) (Index counterA : allDiffs) - -data Mapping syntax ann1 ann2 - = Mapping - {-# UNPACK #-} !Int - {-# UNPACK #-} !Int - ![UnmappedTerm syntax ann1] - ![UnmappedTerm syntax ann2] - ![MappedDiff syntax ann1 ann2] - ![TermOrIndexOrNone (UnmappedTerm syntax ann2)] - featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields)) featurize index term = UnmappedTerm index (getField (extract term)) (eraseFeatureVector term) @@ -282,8 +148,6 @@ nullFeatureVector = FV $ listArray (0, 0) [0] setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record (FeatureVector ': fields) setFeatureVector = setField -toMap :: [UnmappedTerm syntax ann] -> IntMap.IntMap (UnmappedTerm syntax ann) -toMap = IntMap.fromList . fmap (termIndex &&& id) toKdMap :: [UnmappedTerm syntax ann] -> KdMap Double FeatureVector (UnmappedTerm syntax ann) toKdMap = build (elems . unFV) . fmap (feature &&& id) From 74c1c1882203d1778648d7eb9e42d53b6dddb215 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:12:07 -0400 Subject: [PATCH 072/120] Compute the indices up front. --- src/RWS.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 8895dde9c..26890a47f 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -63,7 +63,7 @@ rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] rws canCompare equivalent as bs - = ses equivalent as bs + = ses (\ a b -> equivalent (snd a) (snd b)) (zip [0..] as) (zip [0..] bs) & mapContiguous canCompare & fmap (bimap snd snd) @@ -123,14 +123,14 @@ defaultMoveBound = 1 mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) - -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) + -> [These (Int, Term syntax (Record (FeatureVector ': fields1))) (Int, Term syntax (Record (FeatureVector ': fields2)))] -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] -mapContiguous canCompare = go 0 0 [] [] - where go _ _ as bs [] = mapChunk as bs - go i j as bs (first : rest) = case first of - This a -> go (succ i) j (featurize i a : as) bs rest - That b -> go i (succ j) as (featurize j b : bs) rest - These a b -> mapChunk as bs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) +mapContiguous canCompare = go [] [] + where go as bs [] = mapChunk as bs + go as bs (first : rest) = case first of + This (i, a) -> go (featurize i a : as) bs rest + That (j, b) -> go as (featurize j b : bs) rest + These _ _ -> mapChunk as bs <> (first : go [] [] rest) mapChunk as [] = This . (termIndex &&& term) <$> reverse as mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs) From e0ca6c7ec43537955f75b98ed3502e026fc70634 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:18:01 -0400 Subject: [PATCH 073/120] Compute the move bound relative to the current index. --- src/RWS.hs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 26890a47f..043e30cda 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -80,24 +80,24 @@ findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) -> [UnmappedTerm syntax ann1] -> [UnmappedTerm syntax ann2] -> [MappedDiff syntax ann1 ann2] -findNearestNeighbourTo canCompare as bs = go (pred (termIndex (head as))) as bs - where go _ as [] = This . (termIndex &&& term) <$> as - go _ [] bs = That . (termIndex &&& term) <$> bs - go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = - fromMaybe (That (j, b) : go previous unmappedA restUnmappedB) $ do +findNearestNeighbourTo canCompare as bs = go as bs + where go as [] = This . (termIndex &&& term) <$> as + go [] bs = That . (termIndex &&& term) <$> bs + go unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = + fromMaybe (That (j, b) : go unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= minA) kdTreeA termB + foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare minA b) kdTreeA termB -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (\ b -> isNearAndComparableTo (flip canCompare) (pred j) a b && termIndex b >= j) kdTreeB foundA + UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! let (deleted, _ : restUnmappedA) = span ((< i) . termIndex) unmappedA in - (This . (termIndex &&& term) <$> deleted) <> (These (i, a) (j, b) : go i restUnmappedA restUnmappedB) + (This . (termIndex &&& term) <$> deleted) <> (These (i, a) (j, b) : go restUnmappedA restUnmappedB) (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool -isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (succ index, index + defaultMoveBound) k && canCompareTerms canCompare term' term +isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (index, index + defaultMoveBound) k && canCompareTerms canCompare term' term -- | Finds the most-similar unmapped term to the passed-in term, if any. -- @@ -118,7 +118,7 @@ defaultD = 15 defaultL = 2 defaultP = 2 defaultQ = 3 -defaultMoveBound = 1 +defaultMoveBound = 0 mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) From 7c193c0b5ee3efb8f4aed63f97b7cecfb93cfd63 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:23:08 -0400 Subject: [PATCH 074/120] Short-circuit single-element lists. --- src/RWS.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/RWS.hs b/src/RWS.hs index 043e30cda..9af6e2275 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -83,6 +83,8 @@ findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) findNearestNeighbourTo canCompare as bs = go as bs where go as [] = This . (termIndex &&& term) <$> as go [] bs = That . (termIndex &&& term) <$> bs + go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These (termIndex a, term a) (termIndex b, term b)] + | otherwise = [That (termIndex b, term b), This (termIndex a, term a)] go unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That (j, b) : go unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. From fedf85e823b25e57b00d91be48e9e478767f5b6e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:41:00 -0400 Subject: [PATCH 075/120] Featurize the unmapped terms early. --- src/RWS.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 9af6e2275..8d144307b 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -63,7 +63,7 @@ rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] rws canCompare equivalent as bs - = ses (\ a b -> equivalent (snd a) (snd b)) (zip [0..] as) (zip [0..] bs) + = ses (\ a b -> equivalent (term a) (term b)) (zipWith featurize [0..] as) (zipWith featurize [0..] bs) & mapContiguous canCompare & fmap (bimap snd snd) @@ -125,14 +125,14 @@ defaultMoveBound = 0 mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) - -> [These (Int, Term syntax (Record (FeatureVector ': fields1))) (Int, Term syntax (Record (FeatureVector ': fields2)))] + -> [These (UnmappedTerm syntax (Record (FeatureVector ': fields1))) (UnmappedTerm syntax (Record (FeatureVector ': fields2)))] -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] mapContiguous canCompare = go [] [] where go as bs [] = mapChunk as bs go as bs (first : rest) = case first of - This (i, a) -> go (featurize i a : as) bs rest - That (j, b) -> go as (featurize j b : bs) rest - These _ _ -> mapChunk as bs <> (first : go [] [] rest) + This a -> go (a : as) bs rest + That b -> go as (b : bs) rest + These _ _ -> mapChunk as bs <> (bimap (termIndex &&& term) (termIndex &&& term) first : go [] [] rest) mapChunk as [] = This . (termIndex &&& term) <$> reverse as mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs) From e2d5e81f7fc84a1c42c7eb10c6c0752d6452f4de Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:45:22 -0400 Subject: [PATCH 076/120] Leave terms in UnmappedTerm. --- src/RWS.hs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 8d144307b..24aa34210 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -65,12 +65,12 @@ rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [Thes rws canCompare equivalent as bs = ses (\ a b -> equivalent (term a) (term b)) (zipWith featurize [0..] as) (zipWith featurize [0..] bs) & mapContiguous canCompare - & fmap (bimap snd snd) + & fmap (bimap term term) type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2) -- A Diff paired with both its indices -type MappedDiff syntax ann1 ann2 = These (Int, Term syntax ann1) (Int, Term syntax ann2) +type MappedDiff syntax ann1 ann2 = These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2) type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2] @@ -81,12 +81,12 @@ findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) -> [UnmappedTerm syntax ann2] -> [MappedDiff syntax ann1 ann2] findNearestNeighbourTo canCompare as bs = go as bs - where go as [] = This . (termIndex &&& term) <$> as - go [] bs = That . (termIndex &&& term) <$> bs - go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These (termIndex a, term a) (termIndex b, term b)] - | otherwise = [That (termIndex b, term b), This (termIndex a, term a)] - go unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = - fromMaybe (That (j, b) : go unmappedA restUnmappedB) $ do + where go as [] = This <$> as + go [] bs = That <$> bs + go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These a b] + | otherwise = [That b, This a] + go unmappedA@(termA@(UnmappedTerm minA _ _) : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = + fromMaybe (That termB : go unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare minA b) kdTreeA termB -- Look up the nearest `foundA` in `unmappedB` @@ -95,7 +95,7 @@ findNearestNeighbourTo canCompare as bs = go as bs guard (j == j') pure $! let (deleted, _ : restUnmappedA) = span ((< i) . termIndex) unmappedA in - (This . (termIndex &&& term) <$> deleted) <> (These (i, a) (j, b) : go restUnmappedA restUnmappedB) + (This <$> deleted) <> (These termA termB : go restUnmappedA restUnmappedB) (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool @@ -132,9 +132,9 @@ mapContiguous canCompare = go [] [] go as bs (first : rest) = case first of This a -> go (a : as) bs rest That b -> go as (b : bs) rest - These _ _ -> mapChunk as bs <> (bimap (termIndex &&& term) (termIndex &&& term) first : go [] [] rest) - mapChunk as [] = This . (termIndex &&& term) <$> reverse as - mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs + These _ _ -> mapChunk as bs <> (first : go [] [] rest) + mapChunk as [] = This <$> reverse as + mapChunk [] bs = That <$> reverse bs mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs) From 0766bb4c1b77f219db11b6f3e51be0b37b584e71 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:50:10 -0400 Subject: [PATCH 077/120] Generalize mapContiguous over the annotation types. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 24aa34210..d6ac21eb3 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -124,9 +124,9 @@ defaultMoveBound = 0 mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) - -> [These (UnmappedTerm syntax (Record (FeatureVector ': fields1))) (UnmappedTerm syntax (Record (FeatureVector ': fields2)))] - -> [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))] + => ComparabilityRelation syntax ann1 ann2 + -> [These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2)] + -> [MappedDiff syntax ann1 ann2] mapContiguous canCompare = go [] [] where go as bs [] = mapChunk as bs go as bs (first : rest) = case first of From f6ea602ccac863dde272bacddca882603ab0b0ce Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 23 Oct 2017 23:56:52 -0400 Subject: [PATCH 078/120] Replicate the random double action directly. --- src/RWS.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index d6ac21eb3..5f317b2fb 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -14,6 +14,7 @@ module RWS import Control.Applicative (empty) import Control.Arrow ((&&&)) +import Control.Monad (replicateM) import Control.Monad.Random import Control.Monad.State.Strict import Data.Align.Generic @@ -210,7 +211,7 @@ unitVector :: Int -> Int -> FeatureVector unitVector d hash = FV $ listArray (0, d - 1) ((* invMagnitude) <$> components) where invMagnitude = 1 / sqrt (sum (fmap (** 2) components)) - components = evalRand (sequenceA (replicate d (liftRand randomDouble))) (pureMT (fromIntegral hash)) + components = evalRand (replicateM d (liftRand randomDouble)) (pureMT (fromIntegral hash)) -- | Test the comparability of two root 'Term's in O(1). canCompareTerms :: ComparabilityRelation syntax ann1 ann2 -> Term syntax ann1 -> Term syntax ann2 -> Bool From b2ef834746e183b1e4f11b3e8fc6f122d1f1ff75 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 00:00:33 -0400 Subject: [PATCH 079/120] Rename the min index in A. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 5f317b2fb..a5c64612e 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -86,16 +86,16 @@ findNearestNeighbourTo canCompare as bs = go as bs go [] bs = That <$> bs go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These a b] | otherwise = [That b, This a] - go unmappedA@(termA@(UnmappedTerm minA _ _) : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = + go unmappedA@(termA@(UnmappedTerm i _ _) : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = fromMaybe (That termB : go unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i _ a) <- nearestUnmapped (isNearAndComparableTo canCompare minA b) kdTreeA termB + foundA@(UnmappedTerm i' _ a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdTreeA termB -- Look up the nearest `foundA` in `unmappedB` UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdTreeB foundA -- Return Nothing if their indices don't match guard (j == j') pure $! - let (deleted, _ : restUnmappedA) = span ((< i) . termIndex) unmappedA in + let (deleted, _ : restUnmappedA) = span ((< i') . termIndex) unmappedA in (This <$> deleted) <> (These termA termB : go restUnmappedA restUnmappedB) (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) From fc65ef95b26168ffd96a16c1d1912fa081b5aba4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 00:03:20 -0400 Subject: [PATCH 080/120] Featurize without getField. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index a5c64612e..c74288bb6 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -140,7 +140,7 @@ mapContiguous canCompare = go [] [] featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields)) -featurize index term = UnmappedTerm index (getField (extract term)) (eraseFeatureVector term) +featurize index term = UnmappedTerm index (rhead (extract term)) (eraseFeatureVector term) eraseFeatureVector :: Functor syntax => Term syntax (Record (FeatureVector ': fields)) -> Term syntax (Record (FeatureVector ': fields)) eraseFeatureVector (Term.Term (In record functor)) = termIn (setFeatureVector record nullFeatureVector) functor From a87d33032370f8296bc8c4e3e5acd3da33ed269a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:05:50 -0400 Subject: [PATCH 081/120] :fire: redundant cases. --- src/RWS.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index c74288bb6..fbb76af39 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -134,8 +134,6 @@ mapContiguous canCompare = go [] [] This a -> go (a : as) bs rest That b -> go as (b : bs) rest These _ _ -> mapChunk as bs <> (first : go [] [] rest) - mapChunk as [] = This <$> reverse as - mapChunk [] bs = That <$> reverse bs mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs) From 7692812a38b47c55578ecc2421b2c75bd5528fe2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:06:20 -0400 Subject: [PATCH 082/120] :fire: mapChunk. --- src/RWS.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index fbb76af39..f060ce03a 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -129,12 +129,11 @@ mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) -> [These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2)] -> [MappedDiff syntax ann1 ann2] mapContiguous canCompare = go [] [] - where go as bs [] = mapChunk as bs + where go as bs [] = findNearestNeighbourTo canCompare (reverse as) (reverse bs) go as bs (first : rest) = case first of This a -> go (a : as) bs rest That b -> go as (b : bs) rest - These _ _ -> mapChunk as bs <> (first : go [] [] rest) - mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs) + These _ _ -> findNearestNeighbourTo canCompare (reverse as) (reverse bs) <> (first : go [] [] rest) featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields)) From 354204107c8ffb6733d0d3d809360ec709ae9f24 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:08:03 -0400 Subject: [PATCH 083/120] Inline the definition of mapContiguous. --- src/RWS.hs | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index f060ce03a..6b01a9ac1 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -65,8 +65,13 @@ rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] rws canCompare equivalent as bs = ses (\ a b -> equivalent (term a) (term b)) (zipWith featurize [0..] as) (zipWith featurize [0..] bs) - & mapContiguous canCompare + & mapContiguous [] [] & fmap (bimap term term) + where mapContiguous as bs [] = findNearestNeighbourTo canCompare (reverse as) (reverse bs) + mapContiguous as bs (first : rest) = case first of + This a -> mapContiguous (a : as) bs rest + That b -> mapContiguous as (b : bs) rest + These _ _ -> findNearestNeighbourTo canCompare (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2) @@ -124,18 +129,6 @@ defaultQ = 3 defaultMoveBound = 0 -mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax ann1 ann2 - -> [These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2)] - -> [MappedDiff syntax ann1 ann2] -mapContiguous canCompare = go [] [] - where go as bs [] = findNearestNeighbourTo canCompare (reverse as) (reverse bs) - go as bs (first : rest) = case first of - This a -> go (a : as) bs rest - That b -> go as (b : bs) rest - These _ _ -> findNearestNeighbourTo canCompare (reverse as) (reverse bs) <> (first : go [] [] rest) - - featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields)) featurize index term = UnmappedTerm index (rhead (extract term)) (eraseFeatureVector term) From 96337277901e8c476d0c1585bc4e0e43c98b1121 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:20:34 -0400 Subject: [PATCH 084/120] :fire: the Edit synonym. --- src/RWS.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 6b01a9ac1..1b66b8203 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -73,12 +73,10 @@ rws canCompare equivalent as bs That b -> mapContiguous as (b : bs) rest These _ _ -> findNearestNeighbourTo canCompare (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) -type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2) - -- A Diff paired with both its indices type MappedDiff syntax ann1 ann2 = These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2) -type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2] +type RWSEditScript syntax ann1 ann2 = [These (Term syntax ann1) (Term syntax ann2)] -- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches. findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) From f7ddbd81cc75bccc3baa9b42bd642bde750d3545 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:21:21 -0400 Subject: [PATCH 085/120] :fire: the RWSEditScript synonym. --- src/RWS.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 1b66b8203..3bdfb65d5 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -59,7 +59,7 @@ rws :: (Foldable syntax, Functor syntax, GAlign syntax) -> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool) -> [Term syntax (Record (FeatureVector ': fields1))] -> [Term syntax (Record (FeatureVector ': fields2))] - -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) + -> [These (Term syntax (Record (FeatureVector ': fields1))) (Term syntax (Record (FeatureVector ': fields2)))] rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] @@ -76,8 +76,6 @@ rws canCompare equivalent as bs -- A Diff paired with both its indices type MappedDiff syntax ann1 ann2 = These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2) -type RWSEditScript syntax ann1 ann2 = [These (Term syntax ann1) (Term syntax ann2)] - -- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches. findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. From 16d5c977bd471fd06f211b0b3d5dddd4fe525639 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:21:40 -0400 Subject: [PATCH 086/120] :fire: the MappedDiff synonym. --- src/RWS.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 3bdfb65d5..49eabd768 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -73,15 +73,12 @@ rws canCompare equivalent as bs That b -> mapContiguous as (b : bs) rest These _ _ -> findNearestNeighbourTo canCompare (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) --- A Diff paired with both its indices -type MappedDiff syntax ann1 ann2 = These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2) - -- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches. findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. -> [UnmappedTerm syntax ann1] -> [UnmappedTerm syntax ann2] - -> [MappedDiff syntax ann1 ann2] + -> [These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2)] findNearestNeighbourTo canCompare as bs = go as bs where go as [] = This <$> as go [] bs = That <$> bs From fa2769ff9a99b27349b05a2d6f9dd4d50bb2758e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:34:29 -0400 Subject: [PATCH 087/120] Use the EditScript synonym from SES. --- src/RWS.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 49eabd768..410fd2f05 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -59,7 +59,7 @@ rws :: (Foldable syntax, Functor syntax, GAlign syntax) -> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool) -> [Term syntax (Record (FeatureVector ': fields1))] -> [Term syntax (Record (FeatureVector ': fields2))] - -> [These (Term syntax (Record (FeatureVector ': fields1))) (Term syntax (Record (FeatureVector ': fields2)))] + -> EditScript (Term syntax (Record (FeatureVector ': fields1))) (Term syntax (Record (FeatureVector ': fields2))) rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] @@ -78,7 +78,7 @@ findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. -> [UnmappedTerm syntax ann1] -> [UnmappedTerm syntax ann2] - -> [These (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2)] + -> EditScript (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2) findNearestNeighbourTo canCompare as bs = go as bs where go as [] = This <$> as go [] bs = That <$> bs From 613a920ed17c615b5599ce9c48c5d801e5b4d59e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 10:35:22 -0400 Subject: [PATCH 088/120] Qualify the import of KdMap. --- src/RWS.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 410fd2f05..ce2a0a614 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -26,7 +26,7 @@ import Data.Function ((&)) import Data.Functor.Classes import Data.Functor.Foldable import Data.Hashable -import Data.KdMap.Static hiding (elems, empty, inRange, null) +import qualified Data.KdMap.Static as KdMap import Data.List (sortOn) import Data.Maybe import Data.Record @@ -107,11 +107,11 @@ isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange ( -- cf §4.2 of RWS-Diff nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) => (UnmappedTerm syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. - -> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d map to look up nearest neighbours within. + -> KdMap.KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d map to look up nearest neighbours within. -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term matched by the predicate, if any. nearestUnmapped isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) - where candidates = filter isEligible (snd <$> kNearest tree defaultL (feature key)) + where candidates = filter isEligible (snd <$> KdMap.kNearest tree defaultL (feature key)) approximateEditDistance = editDistanceUpTo defaultM (term key) . term defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int @@ -135,8 +135,8 @@ setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record setFeatureVector = setField -toKdMap :: [UnmappedTerm syntax ann] -> KdMap Double FeatureVector (UnmappedTerm syntax ann) -toKdMap = build (elems . unFV) . fmap (feature &&& id) +toKdMap :: [UnmappedTerm syntax ann] -> KdMap.KdMap Double FeatureVector (UnmappedTerm syntax ann) +toKdMap = KdMap.build (elems . unFV) . fmap (feature &&& id) -- | A `Gram` is a fixed-size view of some portion of a tree, consisting of a `stem` of _p_ labels for parent nodes, and a `base` of _q_ labels of sibling nodes. Collectively, the bag of `Gram`s for each node of a tree (e.g. as computed by `pqGrams`) form a summary of the tree. data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } From 99ef60fbe6ce691a6bc5a7d518550a8eb90ba209 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 11:11:42 -0400 Subject: [PATCH 089/120] Use the strict random monad. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index ce2a0a614..7f2abc346 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -15,7 +15,7 @@ module RWS import Control.Applicative (empty) import Control.Arrow ((&&&)) import Control.Monad (replicateM) -import Control.Monad.Random +import Control.Monad.Random.Strict import Control.Monad.State.Strict import Data.Align.Generic import Data.Array.Unboxed From 3ba39eeb6aab4e2c8b898b44b057ebe47219c757 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 11:37:43 -0400 Subject: [PATCH 090/120] Inline findNearestNeighbourTo. --- src/RWS.hs | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 7f2abc346..ae5a3d068 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -67,35 +67,29 @@ rws canCompare equivalent as bs = ses (\ a b -> equivalent (term a) (term b)) (zipWith featurize [0..] as) (zipWith featurize [0..] bs) & mapContiguous [] [] & fmap (bimap term term) - where mapContiguous as bs [] = findNearestNeighbourTo canCompare (reverse as) (reverse bs) + where mapContiguous as bs [] = findNearestNeighbourTo (reverse as) (reverse bs) mapContiguous as bs (first : rest) = case first of This a -> mapContiguous (a : as) bs rest That b -> mapContiguous as (b : bs) rest - These _ _ -> findNearestNeighbourTo canCompare (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) + These _ _ -> findNearestNeighbourTo (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) --- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches. -findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) - => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. - -> [UnmappedTerm syntax ann1] - -> [UnmappedTerm syntax ann2] - -> EditScript (UnmappedTerm syntax ann1) (UnmappedTerm syntax ann2) -findNearestNeighbourTo canCompare as bs = go as bs - where go as [] = This <$> as - go [] bs = That <$> bs - go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These a b] - | otherwise = [That b, This a] - go unmappedA@(termA@(UnmappedTerm i _ _) : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = - fromMaybe (That termB : go unmappedA restUnmappedB) $ do - -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i' _ a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdTreeA termB - -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdTreeB foundA - -- Return Nothing if their indices don't match - guard (j == j') - pure $! - let (deleted, _ : restUnmappedA) = span ((< i') . termIndex) unmappedA in - (This <$> deleted) <> (These termA termB : go restUnmappedA restUnmappedB) - (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) + findNearestNeighbourTo as bs = go as bs + where go as [] = This <$> as + go [] bs = That <$> bs + go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These a b] + | otherwise = [That b, This a] + go unmappedA@(termA@(UnmappedTerm i _ _) : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = + fromMaybe (That termB : go unmappedA restUnmappedB) $ do + -- Look up the nearest unmapped term in `unmappedA`. + foundA@(UnmappedTerm i' _ a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdTreeA termB + -- Look up the nearest `foundA` in `unmappedB` + UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdTreeB foundA + -- Return Nothing if their indices don't match + guard (j == j') + pure $! + let (deleted, _ : restUnmappedA) = span ((< i') . termIndex) unmappedA in + (This <$> deleted) <> (These termA termB : go restUnmappedA restUnmappedB) + (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (index, index + defaultMoveBound) k && canCompareTerms canCompare term' term From 2fa878e9284b4084b6d63edecc6070de096904cd Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 11:39:30 -0400 Subject: [PATCH 091/120] Rename findNearestNeighbourTo to mapSimilar. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index ae5a3d068..a1f9790cf 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -67,13 +67,13 @@ rws canCompare equivalent as bs = ses (\ a b -> equivalent (term a) (term b)) (zipWith featurize [0..] as) (zipWith featurize [0..] bs) & mapContiguous [] [] & fmap (bimap term term) - where mapContiguous as bs [] = findNearestNeighbourTo (reverse as) (reverse bs) + where mapContiguous as bs [] = mapSimilar (reverse as) (reverse bs) mapContiguous as bs (first : rest) = case first of This a -> mapContiguous (a : as) bs rest That b -> mapContiguous as (b : bs) rest - These _ _ -> findNearestNeighbourTo (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) + These _ _ -> mapSimilar (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) - findNearestNeighbourTo as bs = go as bs + mapSimilar as bs = go as bs where go as [] = This <$> as go [] bs = That <$> bs go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These a b] From cd66c9c2aa1c1c0b1c00da317a4ca9963b262d46 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:04:56 -0400 Subject: [PATCH 092/120] :memo: mapContiguous. --- src/RWS.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index a1f9790cf..94e918edc 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -67,7 +67,8 @@ rws canCompare equivalent as bs = ses (\ a b -> equivalent (term a) (term b)) (zipWith featurize [0..] as) (zipWith featurize [0..] bs) & mapContiguous [] [] & fmap (bimap term term) - where mapContiguous as bs [] = mapSimilar (reverse as) (reverse bs) + where -- Map contiguous sequences of unmapped terms separated by SES-mapped equivalencies. + mapContiguous as bs [] = mapSimilar (reverse as) (reverse bs) mapContiguous as bs (first : rest) = case first of This a -> mapContiguous (a : as) bs rest That b -> mapContiguous as (b : bs) rest From 12841d6bfe9d7266ab1edf49872efe752bd1f13e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:05:07 -0400 Subject: [PATCH 093/120] :fire: UnmappedTerm. --- src/RWS.hs | 54 +++++++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 94e918edc..ac52db2d0 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -47,13 +47,6 @@ type ComparabilityRelation syntax ann1 ann2 = forall a b. TermF syntax ann1 a -> newtype FeatureVector = FV { unFV :: UArray Int Double } deriving (Eq, Ord, Show) --- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index. -data UnmappedTerm syntax ann = UnmappedTerm - { termIndex :: {-# UNPACK #-} !Int -- ^ The index of the term within its root term. - , feature :: {-# UNPACK #-} !FeatureVector -- ^ Feature vector - , term :: Term syntax ann -- ^ The unmapped term - } - rws :: (Foldable syntax, Functor syntax, GAlign syntax) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool) @@ -64,9 +57,9 @@ rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] rws canCompare equivalent as bs - = ses (\ a b -> equivalent (term a) (term b)) (zipWith featurize [0..] as) (zipWith featurize [0..] bs) + = ses (\ a b -> equivalent (snd a) (snd b)) (zip [0..] as) (zip [0..] bs) & mapContiguous [] [] - & fmap (bimap term term) + & fmap (bimap snd snd) where -- Map contiguous sequences of unmapped terms separated by SES-mapped equivalencies. mapContiguous as bs [] = mapSimilar (reverse as) (reverse bs) mapContiguous as bs (first : rest) = case first of @@ -77,23 +70,23 @@ rws canCompare equivalent as bs mapSimilar as bs = go as bs where go as [] = This <$> as go [] bs = That <$> bs - go [a] [b] | canCompareTerms canCompare (term a) (term b) = [These a b] + go [a] [b] | canCompareTerms canCompare (snd a) (snd b) = [These a b] | otherwise = [That b, This a] - go unmappedA@(termA@(UnmappedTerm i _ _) : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = + go unmappedA@(termA@(i, _) : _) (termB@(j, b) : restUnmappedB) = fromMaybe (That termB : go unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. - foundA@(UnmappedTerm i' _ a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdTreeA termB + (i', a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdTreeA b -- Look up the nearest `foundA` in `unmappedB` - UnmappedTerm j' _ _ <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdTreeB foundA + (j', _) <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdTreeB a -- Return Nothing if their indices don't match guard (j == j') pure $! - let (deleted, _ : restUnmappedA) = span ((< i') . termIndex) unmappedA in + let (deleted, _ : restUnmappedA) = span ((< i') . fst) unmappedA in (This <$> deleted) <> (These termA termB : go restUnmappedA restUnmappedB) (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) -isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> UnmappedTerm syntax ann1 -> Bool -isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange (index, index + defaultMoveBound) k && canCompareTerms canCompare term' term +isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> Int -> Term syntax ann1 -> Bool +isNearAndComparableTo canCompare index term k term' = inRange (index, index + defaultMoveBound) k && canCompareTerms canCompare term' term -- | Finds the most-similar unmapped term to the passed-in term, if any. -- @@ -101,13 +94,13 @@ isNearAndComparableTo canCompare index term (UnmappedTerm k _ term') = inRange ( -- -- cf §4.2 of RWS-Diff nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) - => (UnmappedTerm syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. - -> KdMap.KdMap Double FeatureVector (UnmappedTerm syntax ann1) -- ^ The k-d map to look up nearest neighbours within. - -> UnmappedTerm syntax ann2 -- ^ The term to find the nearest neighbour to. - -> Maybe (UnmappedTerm syntax ann1) -- ^ The most similar unmapped term matched by the predicate, if any. + => (Int -> Term syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. + -> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within. + -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. + -> Maybe (Int, Term syntax ann1) -- ^ The most similar unmapped term matched by the predicate, if any. nearestUnmapped isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) - where candidates = filter isEligible (snd <$> KdMap.kNearest tree defaultL (feature key)) - approximateEditDistance = editDistanceUpTo defaultM (term key) . term + where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract key))) + approximateEditDistance = editDistanceUpTo defaultM key . snd defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int defaultD = 15 @@ -117,21 +110,8 @@ defaultQ = 3 defaultMoveBound = 0 -featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields)) -featurize index term = UnmappedTerm index (rhead (extract term)) (eraseFeatureVector term) - -eraseFeatureVector :: Functor syntax => Term syntax (Record (FeatureVector ': fields)) -> Term syntax (Record (FeatureVector ': fields)) -eraseFeatureVector (Term.Term (In record functor)) = termIn (setFeatureVector record nullFeatureVector) functor - -nullFeatureVector :: FeatureVector -nullFeatureVector = FV $ listArray (0, 0) [0] - -setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record (FeatureVector ': fields) -setFeatureVector = setField - - -toKdMap :: [UnmappedTerm syntax ann] -> KdMap.KdMap Double FeatureVector (UnmappedTerm syntax ann) -toKdMap = KdMap.build (elems . unFV) . fmap (feature &&& id) +toKdMap :: Functor syntax => [(Int, Term syntax (Record (FeatureVector ': fields)))] -> KdMap.KdMap Double FeatureVector (Int, Term syntax (Record (FeatureVector ': fields))) +toKdMap = KdMap.build (elems . unFV) . fmap (rhead . extract . snd &&& id) -- | A `Gram` is a fixed-size view of some portion of a tree, consisting of a `stem` of _p_ labels for parent nodes, and a `base` of _q_ labels of sibling nodes. Collectively, the bag of `Gram`s for each node of a tree (e.g. as computed by `pqGrams`) form a summary of the tree. data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } From 8d095b25d6995e6ff414994614358f7ddf31d2e8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:05:59 -0400 Subject: [PATCH 094/120] Rename the k-d maps. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index ac52db2d0..e090a1ff8 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -75,15 +75,15 @@ rws canCompare equivalent as bs go unmappedA@(termA@(i, _) : _) (termB@(j, b) : restUnmappedB) = fromMaybe (That termB : go unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. - (i', a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdTreeA b + (i', a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdMapA b -- Look up the nearest `foundA` in `unmappedB` - (j', _) <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdTreeB a + (j', _) <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdMapB a -- Return Nothing if their indices don't match guard (j == j') pure $! let (deleted, _ : restUnmappedA) = span ((< i') . fst) unmappedA in (This <$> deleted) <> (These termA termB : go restUnmappedA restUnmappedB) - (kdTreeA, kdTreeB) = (toKdMap as, toKdMap bs) + (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> Int -> Term syntax ann1 -> Bool isNearAndComparableTo canCompare index term k term' = inRange (index, index + defaultMoveBound) k && canCompareTerms canCompare term' term From 0d484a4775e993d5c68ade30008fb4e7cd6edcc7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:07:39 -0400 Subject: [PATCH 095/120] :memo: mapSimilar. --- src/RWS.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/RWS.hs b/src/RWS.hs index e090a1ff8..2e8d15c9c 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -67,6 +67,7 @@ rws canCompare equivalent as bs That b -> mapContiguous as (b : bs) rest These _ _ -> mapSimilar (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) + -- Map comparable, mutually similar terms, inserting & deleting surrounding terms. mapSimilar as bs = go as bs where go as [] = This <$> as go [] bs = That <$> bs From ef7d16657cef75573f03d5eb3c6914103537b63b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:12:28 -0400 Subject: [PATCH 096/120] Index terms in mapSimilar. --- src/RWS.hs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 2e8d15c9c..85fb02c66 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -19,7 +19,6 @@ import Control.Monad.Random.Strict import Control.Monad.State.Strict import Data.Align.Generic import Data.Array.Unboxed -import Data.Bifunctor (bimap) import Data.Diff (DiffF(..), deleting, inserting, merge, replacing) import Data.Foldable import Data.Function ((&)) @@ -57,9 +56,8 @@ rws _ _ as [] = This <$> as rws _ _ [] bs = That <$> bs rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [These a b] else [That b, This a] rws canCompare equivalent as bs - = ses (\ a b -> equivalent (snd a) (snd b)) (zip [0..] as) (zip [0..] bs) + = ses equivalent as bs & mapContiguous [] [] - & fmap (bimap snd snd) where -- Map contiguous sequences of unmapped terms separated by SES-mapped equivalencies. mapContiguous as bs [] = mapSimilar (reverse as) (reverse bs) mapContiguous as bs (first : rest) = case first of @@ -68,13 +66,13 @@ rws canCompare equivalent as bs These _ _ -> mapSimilar (reverse as) (reverse bs) <> (first : mapContiguous [] [] rest) -- Map comparable, mutually similar terms, inserting & deleting surrounding terms. - mapSimilar as bs = go as bs - where go as [] = This <$> as - go [] bs = That <$> bs - go [a] [b] | canCompareTerms canCompare (snd a) (snd b) = [These a b] - | otherwise = [That b, This a] - go unmappedA@(termA@(i, _) : _) (termB@(j, b) : restUnmappedB) = - fromMaybe (That termB : go unmappedA restUnmappedB) $ do + mapSimilar as' bs' = go as bs + where go as [] = This . snd <$> as + go [] bs = That . snd <$> bs + go [a] [b] | canCompareTerms canCompare (snd a) (snd b) = [These (snd a) (snd b)] + | otherwise = [That (snd b), This (snd a)] + go unmappedA@((i, _) : _) ((j, b) : restUnmappedB) = + fromMaybe (That b : go unmappedA restUnmappedB) $ do -- Look up the nearest unmapped term in `unmappedA`. (i', a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdMapA b -- Look up the nearest `foundA` in `unmappedB` @@ -83,7 +81,8 @@ rws canCompare equivalent as bs guard (j == j') pure $! let (deleted, _ : restUnmappedA) = span ((< i') . fst) unmappedA in - (This <$> deleted) <> (These termA termB : go restUnmappedA restUnmappedB) + (This . snd <$> deleted) <> (These a b : go restUnmappedA restUnmappedB) + (as, bs) = (zip [0..] as', zip [0..] bs') (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> Int -> Term syntax ann1 -> Bool From 7f3b1130ee58d37fb5288c007520242bce402a8e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:13:36 -0400 Subject: [PATCH 097/120] Correct indentation. --- src/RWS.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 85fb02c66..0162a4e4b 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -94,9 +94,9 @@ isNearAndComparableTo canCompare index term k term' = inRange (index, index + de -- -- cf §4.2 of RWS-Diff nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) - => (Int -> Term syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. + => (Int -> Term syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. -> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within. - -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. + -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. -> Maybe (Int, Term syntax ann1) -- ^ The most similar unmapped term matched by the predicate, if any. nearestUnmapped isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract key))) From b4903b6b9b608f3ce209cc5852ff00fac0d1e7e6 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:15:09 -0400 Subject: [PATCH 098/120] Rename the unmapped lists. --- src/RWS.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 0162a4e4b..0de0d1008 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -71,8 +71,8 @@ rws canCompare equivalent as bs go [] bs = That . snd <$> bs go [a] [b] | canCompareTerms canCompare (snd a) (snd b) = [These (snd a) (snd b)] | otherwise = [That (snd b), This (snd a)] - go unmappedA@((i, _) : _) ((j, b) : restUnmappedB) = - fromMaybe (That b : go unmappedA restUnmappedB) $ do + go as@((i, _) : _) ((j, b) : restB) = + fromMaybe (That b : go as restB) $ do -- Look up the nearest unmapped term in `unmappedA`. (i', a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdMapA b -- Look up the nearest `foundA` in `unmappedB` @@ -80,8 +80,8 @@ rws canCompare equivalent as bs -- Return Nothing if their indices don't match guard (j == j') pure $! - let (deleted, _ : restUnmappedA) = span ((< i') . fst) unmappedA in - (This . snd <$> deleted) <> (These a b : go restUnmappedA restUnmappedB) + let (deleted, _ : restA) = span ((< i') . fst) as in + (This . snd <$> deleted) <> (These a b : go restA restB) (as, bs) = (zip [0..] as', zip [0..] bs') (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) From 35c121bea1631358b05c538ad998528312dd96a4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:17:58 -0400 Subject: [PATCH 099/120] Rename nearestUnmapped to mostSimilarMatching. --- src/RWS.hs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 0de0d1008..3f2f0bdfd 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -74,9 +74,9 @@ rws canCompare equivalent as bs go as@((i, _) : _) ((j, b) : restB) = fromMaybe (That b : go as restB) $ do -- Look up the nearest unmapped term in `unmappedA`. - (i', a) <- nearestUnmapped (isNearAndComparableTo canCompare i b) kdMapA b + (i', a) <- mostSimilarMatching (isNearAndComparableTo canCompare i b) kdMapA b -- Look up the nearest `foundA` in `unmappedB` - (j', _) <- nearestUnmapped (isNearAndComparableTo (flip canCompare) j a) kdMapB a + (j', _) <- mostSimilarMatching (isNearAndComparableTo (flip canCompare) j a) kdMapB a -- Return Nothing if their indices don't match guard (j == j') pure $! @@ -88,17 +88,17 @@ rws canCompare equivalent as bs isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> Int -> Term syntax ann1 -> Bool isNearAndComparableTo canCompare index term k term' = inRange (index, index + defaultMoveBound) k && canCompareTerms canCompare term' term --- | Finds the most-similar unmapped term to the passed-in term, if any. +-- | Finds the most-similar term to the passed-in term, if any. -- --- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance. +-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which don’t match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance. -- --- cf §4.2 of RWS-Diff -nearestUnmapped :: (Foldable syntax, Functor syntax, GAlign syntax) - => (Int -> Term syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. - -> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within. - -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. - -> Maybe (Int, Term syntax ann1) -- ^ The most similar unmapped term matched by the predicate, if any. -nearestUnmapped isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) +-- cf §4.2 of RWS-Diff +mostSimilarMatching :: (Foldable syntax, Functor syntax, GAlign syntax) + => (Int -> Term syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. + -> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within. + -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. + -> Maybe (Int, Term syntax ann1) -- ^ The most similar term matched by the predicate, if any. +mostSimilarMatching isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract key))) approximateEditDistance = editDistanceUpTo defaultM key . snd From f9d5846ccdec9b1c8066fd707c94309367507f60 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:19:25 -0400 Subject: [PATCH 100/120] Correct some comments. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 3f2f0bdfd..8c394d6d5 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -73,11 +73,11 @@ rws canCompare equivalent as bs | otherwise = [That (snd b), This (snd a)] go as@((i, _) : _) ((j, b) : restB) = fromMaybe (That b : go as restB) $ do - -- Look up the nearest unmapped term in `unmappedA`. + -- Look up the most similar term to b near i. (i', a) <- mostSimilarMatching (isNearAndComparableTo canCompare i b) kdMapA b - -- Look up the nearest `foundA` in `unmappedB` + -- Look up the most similar term to a near j. (j', _) <- mostSimilarMatching (isNearAndComparableTo (flip canCompare) j a) kdMapB a - -- Return Nothing if their indices don't match + -- Fail out if there’s a better match for a nearby. guard (j == j') pure $! let (deleted, _ : restA) = span ((< i') . fst) as in From 0ba1b354adb6a436590d4c2a4610f7cc90f28205 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:23:07 -0400 Subject: [PATCH 101/120] defaultM is an Int. --- src/RWS.hs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 8c394d6d5..f505d404f 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -102,9 +102,10 @@ mostSimilarMatching isEligible tree key = listToMaybe (sortOn approximateEditDis where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract key))) approximateEditDistance = editDistanceUpTo defaultM key . snd -defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int +defaultD, defaultL, defaultM, defaultP, defaultQ, defaultMoveBound :: Int defaultD = 15 defaultL = 2 +defaultM = 10 defaultP = 2 defaultQ = 3 defaultMoveBound = 0 @@ -181,13 +182,9 @@ equalTerms canCompare = go where go a b = canCompareTerms canCompare a b && liftEq go (termOut (unTerm a)) (termOut (unTerm b)) --- | How many nodes to consider for our constant-time approximation to tree edit distance. -defaultM :: Integer -defaultM = 10 - -- | Return an edit distance as the sum of it's term sizes, given an cutoff and a syntax of terms 'f a'. -- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost. -editDistanceUpTo :: (GAlign syntax, Foldable syntax, Functor syntax) => Integer -> Term syntax ann1 -> Term syntax ann2 -> Int +editDistanceUpTo :: (GAlign syntax, Foldable syntax, Functor syntax) => Int -> Term syntax ann1 -> Term syntax ann2 -> Int editDistanceUpTo m a b = diffCost m (approximateDiff a b) where diffCost = flip . cata $ \ diff m -> case diff of _ | m <= 0 -> 0 From 3e03e326c0b31afbd3014362c560fd0bbf7b5716 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:23:14 -0400 Subject: [PATCH 102/120] Correct the docs for editDistanceUpTo. --- src/RWS.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index f505d404f..065a74754 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -182,8 +182,9 @@ equalTerms canCompare = go where go a b = canCompareTerms canCompare a b && liftEq go (termOut (unTerm a)) (termOut (unTerm b)) --- | Return an edit distance as the sum of it's term sizes, given an cutoff and a syntax of terms 'f a'. --- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost. +-- | Return an edit distance between two terms, up to a certain depth. +-- +-- Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost. editDistanceUpTo :: (GAlign syntax, Foldable syntax, Functor syntax) => Int -> Term syntax ann1 -> Term syntax ann2 -> Int editDistanceUpTo m a b = diffCost m (approximateDiff a b) where diffCost = flip . cata $ \ diff m -> case diff of From a27ef1cdeae47373419d88e4f8ee5742c49a64cd Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:27:07 -0400 Subject: [PATCH 103/120] Rename defaultMoveBound to lookaheadPlaces. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 065a74754..a1cdbad89 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -86,7 +86,7 @@ rws canCompare equivalent as bs (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> Int -> Term syntax ann1 -> Bool -isNearAndComparableTo canCompare index term k term' = inRange (index, index + defaultMoveBound) k && canCompareTerms canCompare term' term +isNearAndComparableTo canCompare index term k term' = inRange (index, index + lookaheadPlaces) k && canCompareTerms canCompare term' term -- | Finds the most-similar term to the passed-in term, if any. -- @@ -102,13 +102,13 @@ mostSimilarMatching isEligible tree key = listToMaybe (sortOn approximateEditDis where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract key))) approximateEditDistance = editDistanceUpTo defaultM key . snd -defaultD, defaultL, defaultM, defaultP, defaultQ, defaultMoveBound :: Int +defaultD, defaultL, defaultM, defaultP, defaultQ, lookaheadPlaces :: Int defaultD = 15 defaultL = 2 defaultM = 10 defaultP = 2 defaultQ = 3 -defaultMoveBound = 0 +lookaheadPlaces = 0 toKdMap :: Functor syntax => [(Int, Term syntax (Record (FeatureVector ': fields)))] -> KdMap.KdMap Double FeatureVector (Int, Term syntax (Record (FeatureVector ': fields))) From e2cf00235c0ba81c1fb44f0b74f45364aeee252e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:29:38 -0400 Subject: [PATCH 104/120] :memo: lookaheadPlaces. --- src/RWS.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index a1cdbad89..7f5c47889 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -102,14 +102,16 @@ mostSimilarMatching isEligible tree key = listToMaybe (sortOn approximateEditDis where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract key))) approximateEditDistance = editDistanceUpTo defaultM key . snd -defaultD, defaultL, defaultM, defaultP, defaultQ, lookaheadPlaces :: Int +defaultD, defaultL, defaultM, defaultP, defaultQ :: Int defaultD = 15 defaultL = 2 defaultM = 10 defaultP = 2 defaultQ = 3 -lookaheadPlaces = 0 +-- | How many places ahead should we look for similar terms? +lookaheadPlaces :: Int +lookaheadPlaces = 0 toKdMap :: Functor syntax => [(Int, Term syntax (Record (FeatureVector ': fields)))] -> KdMap.KdMap Double FeatureVector (Int, Term syntax (Record (FeatureVector ': fields))) toKdMap = KdMap.build (elems . unFV) . fmap (rhead . extract . snd &&& id) From 8ce174e734bf4c953a61f41eccb08f8f9489e6dc Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:37:15 -0400 Subject: [PATCH 105/120] Inline isNearAndComparableTo. --- src/RWS.hs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 7f5c47889..fb500d76b 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -74,9 +74,9 @@ rws canCompare equivalent as bs go as@((i, _) : _) ((j, b) : restB) = fromMaybe (That b : go as restB) $ do -- Look up the most similar term to b near i. - (i', a) <- mostSimilarMatching (isNearAndComparableTo canCompare i b) kdMapA b + (i', a) <- mostSimilarMatching (\ i' a -> inRange (i, i + lookaheadPlaces) i' && canCompareTerms canCompare a b) kdMapA b -- Look up the most similar term to a near j. - (j', _) <- mostSimilarMatching (isNearAndComparableTo (flip canCompare) j a) kdMapB a + (j', _) <- mostSimilarMatching (\ j' b -> inRange (j, j + lookaheadPlaces) j' && canCompareTerms canCompare a b) kdMapB a -- Fail out if there’s a better match for a nearby. guard (j == j') pure $! @@ -85,9 +85,6 @@ rws canCompare equivalent as bs (as, bs) = (zip [0..] as', zip [0..] bs') (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) -isNearAndComparableTo :: ComparabilityRelation syntax ann1 ann2 -> Int -> Term syntax ann2 -> Int -> Term syntax ann1 -> Bool -isNearAndComparableTo canCompare index term k term' = inRange (index, index + lookaheadPlaces) k && canCompareTerms canCompare term' term - -- | Finds the most-similar term to the passed-in term, if any. -- -- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which don’t match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance. From b3f3d09f4a14b00fc22aa57aa7c09b3c12d58c5e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:57:16 -0400 Subject: [PATCH 106/120] Define an Options datatype. --- src/RWS.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/RWS.hs b/src/RWS.hs index fb500d76b..aff13c8de 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -85,6 +85,10 @@ rws canCompare equivalent as bs (as, bs) = (zip [0..] as', zip [0..] bs') (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) +data Options = Options + { optionsLookaheadPlaces :: {-# UNPACK #-} !Int -- ^ How many places ahead should we look for similar terms? + } + -- | Finds the most-similar term to the passed-in term, if any. -- -- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which don’t match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance. From 5603d796f3b5807771965a1eaeeeac38b2b41707 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:57:22 -0400 Subject: [PATCH 107/120] Define default options. --- src/RWS.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/RWS.hs b/src/RWS.hs index aff13c8de..0df1b6525 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -89,6 +89,11 @@ data Options = Options { optionsLookaheadPlaces :: {-# UNPACK #-} !Int -- ^ How many places ahead should we look for similar terms? } +defaultOptions :: Options +defaultOptions = Options + { optionsLookaheadPlaces = 0 + } + -- | Finds the most-similar term to the passed-in term, if any. -- -- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which don’t match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance. From 91778d90a37ea08e7db4ae29812930192e697d60 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:57:35 -0400 Subject: [PATCH 108/120] Use the Options to configure lookahead. --- src/RWS.hs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 0df1b6525..7f6312372 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -58,7 +58,9 @@ rws canCompare _ [a] [b] = if canCompareTerms canCompare a b then [Thes rws canCompare equivalent as bs = ses equivalent as bs & mapContiguous [] [] - where -- Map contiguous sequences of unmapped terms separated by SES-mapped equivalencies. + where Options{..} = defaultOptions + + -- Map contiguous sequences of unmapped terms separated by SES-mapped equivalencies. mapContiguous as bs [] = mapSimilar (reverse as) (reverse bs) mapContiguous as bs (first : rest) = case first of This a -> mapContiguous (a : as) bs rest @@ -74,9 +76,9 @@ rws canCompare equivalent as bs go as@((i, _) : _) ((j, b) : restB) = fromMaybe (That b : go as restB) $ do -- Look up the most similar term to b near i. - (i', a) <- mostSimilarMatching (\ i' a -> inRange (i, i + lookaheadPlaces) i' && canCompareTerms canCompare a b) kdMapA b + (i', a) <- mostSimilarMatching (\ i' a -> inRange (i, i + optionsLookaheadPlaces) i' && canCompareTerms canCompare a b) kdMapA b -- Look up the most similar term to a near j. - (j', _) <- mostSimilarMatching (\ j' b -> inRange (j, j + lookaheadPlaces) j' && canCompareTerms canCompare a b) kdMapB a + (j', _) <- mostSimilarMatching (\ j' b -> inRange (j, j + optionsLookaheadPlaces) j' && canCompareTerms canCompare a b) kdMapB a -- Fail out if there’s a better match for a nearby. guard (j == j') pure $! @@ -115,9 +117,6 @@ defaultM = 10 defaultP = 2 defaultQ = 3 --- | How many places ahead should we look for similar terms? -lookaheadPlaces :: Int -lookaheadPlaces = 0 toKdMap :: Functor syntax => [(Int, Term syntax (Record (FeatureVector ': fields)))] -> KdMap.KdMap Double FeatureVector (Int, Term syntax (Record (FeatureVector ': fields))) toKdMap = KdMap.build (elems . unFV) . fmap (rhead . extract . snd &&& id) From 3d01c913419211ed1acb9fe140d9b18eb3d8dbdb Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:58:30 -0400 Subject: [PATCH 109/120] Rename key to term. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 7f6312372..c13980816 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -106,9 +106,9 @@ mostSimilarMatching :: (Foldable syntax, Functor syntax, GAlign syntax) -> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within. -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. -> Maybe (Int, Term syntax ann1) -- ^ The most similar term matched by the predicate, if any. -mostSimilarMatching isEligible tree key = listToMaybe (sortOn approximateEditDistance candidates) - where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract key))) - approximateEditDistance = editDistanceUpTo defaultM key . snd +mostSimilarMatching isEligible tree term = listToMaybe (sortOn approximateEditDistance candidates) + where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract term))) + approximateEditDistance = editDistanceUpTo defaultM term . snd defaultD, defaultL, defaultM, defaultP, defaultQ :: Int defaultD = 15 From 8e6ec02a3767457eed42c3cdccdaca0e44b20a2e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 12:58:47 -0400 Subject: [PATCH 110/120] Inline approximateEditDistance. --- src/RWS.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index c13980816..7694f2c4e 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -106,9 +106,8 @@ mostSimilarMatching :: (Foldable syntax, Functor syntax, GAlign syntax) -> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within. -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. -> Maybe (Int, Term syntax ann1) -- ^ The most similar term matched by the predicate, if any. -mostSimilarMatching isEligible tree term = listToMaybe (sortOn approximateEditDistance candidates) +mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo defaultM term . snd) candidates) where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract term))) - approximateEditDistance = editDistanceUpTo defaultM term . snd defaultD, defaultL, defaultM, defaultP, defaultQ :: Int defaultD = 15 From fb39252959554e27a1389ec0a67489f0144bd7f9 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 13:00:18 -0400 Subject: [PATCH 111/120] Inline mostSimilarMatching. --- src/RWS.hs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 7694f2c4e..e4e97b119 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -87,6 +87,14 @@ rws canCompare equivalent as bs (as, bs) = (zip [0..] as', zip [0..] bs') (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) + -- Find the most similar term matching a predicate, if any. + -- + -- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which don’t match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance. + -- + -- cf §4.2 of RWS-Diff + mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo defaultM term . snd) candidates) + where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract term))) + data Options = Options { optionsLookaheadPlaces :: {-# UNPACK #-} !Int -- ^ How many places ahead should we look for similar terms? } @@ -96,19 +104,6 @@ defaultOptions = Options { optionsLookaheadPlaces = 0 } --- | Finds the most-similar term to the passed-in term, if any. --- --- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which don’t match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance. --- --- cf §4.2 of RWS-Diff -mostSimilarMatching :: (Foldable syntax, Functor syntax, GAlign syntax) - => (Int -> Term syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against. - -> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within. - -> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to. - -> Maybe (Int, Term syntax ann1) -- ^ The most similar term matched by the predicate, if any. -mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo defaultM term . snd) candidates) - where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract term))) - defaultD, defaultL, defaultM, defaultP, defaultQ :: Int defaultD = 15 defaultL = 2 From 3b9b50996b7de6e57c6cee4df8d6afa028d1f009 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 13:02:50 -0400 Subject: [PATCH 112/120] Add the # of similar terms to Options. --- src/RWS.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index e4e97b119..d8f7c255c 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -93,20 +93,21 @@ rws canCompare equivalent as bs -- -- cf §4.2 of RWS-Diff mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo defaultM term . snd) candidates) - where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract term))) + where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree optionsMaxSimilarTerms (rhead (extract term))) data Options = Options { optionsLookaheadPlaces :: {-# UNPACK #-} !Int -- ^ How many places ahead should we look for similar terms? + , optionsMaxSimilarTerms :: {-# UNPACK #-} !Int -- ^ The maximum number of similar terms to consider. } defaultOptions :: Options defaultOptions = Options { optionsLookaheadPlaces = 0 + , optionsMaxSimilarTerms = 2 } -defaultD, defaultL, defaultM, defaultP, defaultQ :: Int +defaultD, defaultM, defaultP, defaultQ :: Int defaultD = 15 -defaultL = 2 defaultM = 10 defaultP = 2 defaultQ = 3 From 8e554422f4de4d4336c7e3fd188836d12c432ff5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 13:05:47 -0400 Subject: [PATCH 113/120] Add the comparison cutoff to Options. --- src/RWS.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index d8f7c255c..2e3dffc58 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -92,23 +92,24 @@ rws canCompare equivalent as bs -- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which don’t match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance. -- -- cf §4.2 of RWS-Diff - mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo defaultM term . snd) candidates) + mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo optionsNodeComparisons term . snd) candidates) where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree optionsMaxSimilarTerms (rhead (extract term))) data Options = Options { optionsLookaheadPlaces :: {-# UNPACK #-} !Int -- ^ How many places ahead should we look for similar terms? , optionsMaxSimilarTerms :: {-# UNPACK #-} !Int -- ^ The maximum number of similar terms to consider. + , optionsNodeComparisons :: {-# UNPACK #-} !Int -- ^ The number of nodes to compare when selecting the most similar term. } defaultOptions :: Options defaultOptions = Options { optionsLookaheadPlaces = 0 , optionsMaxSimilarTerms = 2 + , optionsNodeComparisons = 10 } -defaultD, defaultM, defaultP, defaultQ :: Int +defaultD, defaultP, defaultQ :: Int defaultD = 15 -defaultM = 10 defaultP = 2 defaultQ = 3 From 14d8a20beaa5d93d8e3bf0a8fc9926e2086918ca Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 13:09:44 -0400 Subject: [PATCH 114/120] :memo: the deletions step. --- src/RWS.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RWS.hs b/src/RWS.hs index 2e3dffc58..9772c0239 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -81,9 +81,9 @@ rws canCompare equivalent as bs (j', _) <- mostSimilarMatching (\ j' b -> inRange (j, j + optionsLookaheadPlaces) j' && canCompareTerms canCompare a b) kdMapB a -- Fail out if there’s a better match for a nearby. guard (j == j') - pure $! - let (deleted, _ : restA) = span ((< i') . fst) as in - (This . snd <$> deleted) <> (These a b : go restA restB) + -- Delete any elements of as before the selected element. + let (deleted, _ : restA) = span ((< i') . fst) as + pure $! (This . snd <$> deleted) <> (These a b : go restA restB) (as, bs) = (zip [0..] as', zip [0..] bs') (kdMapA, kdMapB) = (toKdMap as, toKdMap bs) From 7215098121601657164d38a5183a7ba5ed09e856 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 13:15:43 -0400 Subject: [PATCH 115/120] Look ahead one place. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index 9772c0239..e7f80402e 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -103,7 +103,7 @@ data Options = Options defaultOptions :: Options defaultOptions = Options - { optionsLookaheadPlaces = 0 + { optionsLookaheadPlaces = 1 , optionsMaxSimilarTerms = 2 , optionsNodeComparisons = 10 } From 250cba3aa0ad7a8d1ccbd2ed03a13c4520db438d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 13:23:42 -0400 Subject: [PATCH 116/120] Export Options. --- src/RWS.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/RWS.hs b/src/RWS.hs index e7f80402e..29e550f1f 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -1,6 +1,7 @@ {-# LANGUAGE GADTs, DataKinds, RankNTypes, TypeOperators #-} module RWS ( rws +, Options(..) , ComparabilityRelation , FeatureVector(..) , defaultFeatureVectorDecorator From 4d365565b67b5b27b0cdee75e6019c36b4595127 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 13:23:51 -0400 Subject: [PATCH 117/120] Export defaultOptions. --- src/RWS.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/RWS.hs b/src/RWS.hs index 29e550f1f..a3378dda5 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -2,6 +2,7 @@ module RWS ( rws , Options(..) +, defaultOptions , ComparabilityRelation , FeatureVector(..) , defaultFeatureVectorDecorator From aca688c9899e4c52a0e33ce74b9ccea185bda3d1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 14:01:17 -0400 Subject: [PATCH 118/120] Reset the lookahead places to 0. --- src/RWS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RWS.hs b/src/RWS.hs index a3378dda5..216d7b41c 100644 --- a/src/RWS.hs +++ b/src/RWS.hs @@ -105,7 +105,7 @@ data Options = Options defaultOptions :: Options defaultOptions = Options - { optionsLookaheadPlaces = 1 + { optionsLookaheadPlaces = 0 , optionsMaxSimilarTerms = 2 , optionsNodeComparisons = 10 } From e512398b85b5b8188385adb4295edc2813269c02 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 15:13:47 -0400 Subject: [PATCH 119/120] Update a bunch of fixtures. --- .../go/array-with-implicit-length.diffA-B.txt | 4 +- .../go/array-with-implicit-length.diffB-A.txt | 4 +- .../const-declarations-with-types.diffA-B.txt | 9 ++- .../javascript/anonymous-function.diffB-A.txt | 22 ++++--- test/fixtures/javascript/export.diffA-B.txt | 55 ++++++++-------- test/fixtures/javascript/export.diffB-A.txt | 66 +++++++++---------- .../javascript/function-call-args.diffB-A.txt | 22 ++++--- .../async-function-definition.diffA-B.txt | 45 ++++++------- test/fixtures/python/dictionary.diffA-B.txt | 14 ++-- .../python/exec-statement.diffA-B.txt | 13 ++-- .../python/exec-statement.diffB-A.txt | 13 ++-- .../python/expression-statement.diffA-B.txt | 3 +- .../python/expression-statement.diffB-A.txt | 4 +- .../python/function-definition.diffA-B.txt | 34 +++++----- .../python/function-definition.diffB-A.txt | 20 +++--- test/fixtures/python/if-statement.diffA-B.txt | 6 +- .../python/import-from-statement.diffB-A.txt | 6 +- .../python/print-statement.diffA-B.txt | 4 +- .../python/raise-statement.diffA-B.txt | 44 ++++++++----- .../python/return-statement.diffA-B.txt | 26 ++++---- test/fixtures/python/string.diffA-B.txt | 6 +- test/fixtures/python/string.diffB-A.txt | 2 +- test/fixtures/python/tuple.diffB-A.txt | 3 +- .../python/while-statement.diffB-A.txt | 20 +++--- .../ambient-declarations.diffA-B.txt | 27 ++++---- .../ambient-declarations.diffB-A.txt | 39 +++++------ .../typescript/anonymous-function.diffB-A.txt | 22 ++++--- test/fixtures/typescript/export.diffA-B.txt | 55 ++++++++-------- test/fixtures/typescript/export.diffB-A.txt | 66 +++++++++---------- .../typescript/function-call-args.diffB-A.txt | 22 ++++--- .../public-field-definition.diffA-B.txt | 36 ++++------ .../public-field-definition.diffB-A.txt | 28 ++++---- 32 files changed, 366 insertions(+), 374 deletions(-) diff --git a/test/fixtures/go/array-with-implicit-length.diffA-B.txt b/test/fixtures/go/array-with-implicit-length.diffA-B.txt index b9cadd513..703615aa7 100644 --- a/test/fixtures/go/array-with-implicit-length.diffA-B.txt +++ b/test/fixtures/go/array-with-implicit-length.diffA-B.txt @@ -15,5 +15,5 @@ ->(NumberLiteral) } { (NumberLiteral) ->(NumberLiteral) } - {+(NumberLiteral)+} - {-(NumberLiteral)-})))))) + { (NumberLiteral) + ->(NumberLiteral) })))))) diff --git a/test/fixtures/go/array-with-implicit-length.diffB-A.txt b/test/fixtures/go/array-with-implicit-length.diffB-A.txt index b9cadd513..703615aa7 100644 --- a/test/fixtures/go/array-with-implicit-length.diffB-A.txt +++ b/test/fixtures/go/array-with-implicit-length.diffB-A.txt @@ -15,5 +15,5 @@ ->(NumberLiteral) } { (NumberLiteral) ->(NumberLiteral) } - {+(NumberLiteral)+} - {-(NumberLiteral)-})))))) + { (NumberLiteral) + ->(NumberLiteral) })))))) diff --git a/test/fixtures/go/const-declarations-with-types.diffA-B.txt b/test/fixtures/go/const-declarations-with-types.diffA-B.txt index 13a073d55..f4f232b61 100644 --- a/test/fixtures/go/const-declarations-with-types.diffA-B.txt +++ b/test/fixtures/go/const-declarations-with-types.diffA-B.txt @@ -11,8 +11,7 @@ ->(Identifier) } { (Identifier) ->(Identifier) } - {+(Other "expression_list" - {+(NumberLiteral)+} - {+(NumberLiteral)+})+} - {-(Other "expression_list" - {-(NumberLiteral)-})-})))) + (Other "expression_list" + { (NumberLiteral) + ->(NumberLiteral) } + {+(NumberLiteral)+}))))) diff --git a/test/fixtures/javascript/anonymous-function.diffB-A.txt b/test/fixtures/javascript/anonymous-function.diffB-A.txt index 43ee1632e..378fb36fa 100644 --- a/test/fixtures/javascript/anonymous-function.diffB-A.txt +++ b/test/fixtures/javascript/anonymous-function.diffB-A.txt @@ -3,14 +3,13 @@ (Empty) (Empty) (Empty) - (RequiredParameter - (Empty) - (Empty) - (Empty) - (Assignment - { (Identifier) - ->(Identifier) } - (Empty))) + {+(RequiredParameter + {+(Empty)+} + {+(Empty)+} + {+(Empty)+} + {+(Assignment + {+(Identifier)+} + {+(Empty)+})+})+} (RequiredParameter (Empty) (Empty) @@ -18,6 +17,13 @@ (Assignment (Identifier) (Empty))) + {-(RequiredParameter + {-(Empty)-} + {-(Empty)-} + {-(Empty)-} + {-(Assignment + {-(Identifier)-} + {-(Empty)-})-})-} ( (Return { (Times diff --git a/test/fixtures/javascript/export.diffA-B.txt b/test/fixtures/javascript/export.diffA-B.txt index d3c55c9dd..d50078834 100644 --- a/test/fixtures/javascript/export.diffA-B.txt +++ b/test/fixtures/javascript/export.diffA-B.txt @@ -107,37 +107,36 @@ {-(Empty)-} {-(Identifier)-} {-([])-})-}) -{+(Export - {+(TextElement)+})+} -{+(Export - {+(ExportClause - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+})+} - {+(TextElement)+})+} -{+(Export - {+(ExportClause - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+})+} - {+(TextElement)+})+} -{-(Export + (Export + {+(TextElement)+} {-(ExportClause {-(ImportExportSpecifier {-(Identifier)-} - {-(Identifier)-})-})-})-} + {-(Identifier)-})-})-}) +{+(Export + {+(ExportClause + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+})+} + {+(TextElement)+})+} +{+(Export + {+(ExportClause + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Identifier)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Identifier)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+})+} + {+(TextElement)+})+} {-(Export {-(TextElement)-})-} {-(Export diff --git a/test/fixtures/javascript/export.diffB-A.txt b/test/fixtures/javascript/export.diffB-A.txt index 6629857ba..1009f3a02 100644 --- a/test/fixtures/javascript/export.diffB-A.txt +++ b/test/fixtures/javascript/export.diffB-A.txt @@ -25,17 +25,17 @@ {-(Empty)-})-})) (Export (ExportClause + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Identifier)+})+} + (ImportExportSpecifier + (Identifier) + (Identifier)) (ImportExportSpecifier { (Identifier) ->(Identifier) } { (Identifier) - ->(Identifier) }) - (ImportExportSpecifier - (Identifier) - (Identifier)) - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+} + ->(Empty) }) {-(ImportExportSpecifier {-(Identifier)-} {-(Empty)-})-})) @@ -105,14 +105,14 @@ {-(ImportExportSpecifier {-(Identifier)-} {-(Identifier)-})-})-}) -{+(Export + (Export {+(ExportClause {+(ImportExportSpecifier {+(Identifier)+} - {+(Identifier)+})+})+})+} - (Export - { (TextElement) - ->(TextElement) }) + {+(Identifier)+})+})+} + {-(TextElement)-}) +{+(Export + {+(TextElement)+})+} (Export (ExportClause (ImportExportSpecifier @@ -133,27 +133,21 @@ {-(Empty)-})-}) { (TextElement) ->(TextElement) }) -{+(Export - {+(ExportClause - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+})+} - {+(TextElement)+})+} -{-(Export - {-(ExportClause - {-(ImportExportSpecifier - {-(Identifier)-} - {-(Identifier)-})-} - {-(ImportExportSpecifier - {-(Identifier)-} - {-(Identifier)-})-} - {-(ImportExportSpecifier - {-(Identifier)-} - {-(Empty)-})-})-} - {-(TextElement)-})-}) + (Export + (ExportClause + (ImportExportSpecifier + { (Identifier) + ->(Identifier) } + { (Identifier) + ->(Identifier) }) + (ImportExportSpecifier + { (Identifier) + ->(Identifier) } + { (Identifier) + ->(Identifier) }) + (ImportExportSpecifier + { (Identifier) + ->(Identifier) } + (Empty))) + { (TextElement) + ->(TextElement) })) diff --git a/test/fixtures/javascript/function-call-args.diffB-A.txt b/test/fixtures/javascript/function-call-args.diffB-A.txt index a4b82533e..194e964af 100644 --- a/test/fixtures/javascript/function-call-args.diffB-A.txt +++ b/test/fixtures/javascript/function-call-args.diffB-A.txt @@ -8,14 +8,13 @@ (Empty) (Empty) (Empty) - (RequiredParameter - (Empty) - (Empty) - (Empty) - (Assignment - { (Identifier) - ->(Identifier) } - (Empty))) + {+(RequiredParameter + {+(Empty)+} + {+(Empty)+} + {+(Empty)+} + {+(Assignment + {+(Identifier)+} + {+(Empty)+})+})+} (RequiredParameter (Empty) (Empty) @@ -23,6 +22,13 @@ (Assignment (Identifier) (Empty))) + {-(RequiredParameter + {-(Empty)-} + {-(Empty)-} + {-(Empty)-} + {-(Assignment + {-(Identifier)-} + {-(Empty)-})-})-} ( (Call (MemberAccess diff --git a/test/fixtures/python/async-function-definition.diffA-B.txt b/test/fixtures/python/async-function-definition.diffA-B.txt index 56b1fa7a3..327a2d333 100644 --- a/test/fixtures/python/async-function-definition.diffA-B.txt +++ b/test/fixtures/python/async-function-definition.diffA-B.txt @@ -1,35 +1,36 @@ (Program - (Annotation - (Annotation - (Function - { (Identifier) - ->(Identifier) } - {-(Identifier)-} - (Identifier) - {+(Identifier)+} - ( - { (Identifier) - ->(Identifier) })) - (Empty)) - (Identifier)) - (Annotation - (Annotation - (Function - (Identifier) - ( - { (Identifier) - ->(Identifier) })) - (Empty)) - (Identifier)) {+(Annotation {+(Annotation {+(Function + {+(Identifier)+} {+(Identifier)+} {+(Identifier)+} {+( {+(Identifier)+})+})+} {+(Empty)+})+} {+(Identifier)+})+} + (Annotation + (Annotation + (Function + (Identifier) + ( + { (Identifier) + ->(Identifier) })) + (Empty)) + (Identifier)) + (Annotation + (Annotation + (Function + { (Identifier) + ->(Identifier) } + { (Identifier) + ->(Identifier) } + {-(Identifier)-} + ( + { (Identifier) + ->(Identifier) })) + (Empty)) + (Identifier)) {-(Annotation {-(Annotation {-(Function diff --git a/test/fixtures/python/dictionary.diffA-B.txt b/test/fixtures/python/dictionary.diffA-B.txt index ccd5defe8..1f63215b0 100644 --- a/test/fixtures/python/dictionary.diffA-B.txt +++ b/test/fixtures/python/dictionary.diffA-B.txt @@ -1,10 +1,8 @@ (Program - (Hash - (KeyValue - { (Identifier) - ->(Identifier) } - { (Integer) - ->(Integer) })) +{+(Hash + {+(KeyValue + {+(Identifier)+} + {+(Integer)+})+})+} (Hash) {+(Assignment {+(Identifier)+} @@ -17,6 +15,10 @@ {+(KeyValue {+(Identifier)+} {+(Identifier)+})+})+})+} +{-(Hash + {-(KeyValue + {-(Identifier)-} + {-(Integer)-})-})-} {-(Hash {-(KeyValue {-(Identifier)-} diff --git a/test/fixtures/python/exec-statement.diffA-B.txt b/test/fixtures/python/exec-statement.diffA-B.txt index 8862037e8..cb8df8433 100644 --- a/test/fixtures/python/exec-statement.diffA-B.txt +++ b/test/fixtures/python/exec-statement.diffA-B.txt @@ -12,13 +12,10 @@ {+(Identifier)+} {-(Null)-} (Empty)) -{+(Call - {+(Identifier)+} - {+(TextElement)+} - {+(Empty)+})+} -{-(Call - {-(Identifier)-} - {-(TextElement)-} + (Call + (Identifier) + { (TextElement) + ->(TextElement) } {-(Identifier)-} {-(Identifier)-} - {-(Empty)-})-}) + (Empty))) diff --git a/test/fixtures/python/exec-statement.diffB-A.txt b/test/fixtures/python/exec-statement.diffB-A.txt index 583d2e1de..d61d2f5f2 100644 --- a/test/fixtures/python/exec-statement.diffB-A.txt +++ b/test/fixtures/python/exec-statement.diffB-A.txt @@ -12,13 +12,10 @@ {-(Identifier)-} {-(Identifier)-} (Empty)) -{+(Call - {+(Identifier)+} - {+(TextElement)+} + (Call + (Identifier) + { (TextElement) + ->(TextElement) } {+(Identifier)+} {+(Identifier)+} - {+(Empty)+})+} -{-(Call - {-(Identifier)-} - {-(TextElement)-} - {-(Empty)-})-}) + (Empty))) diff --git a/test/fixtures/python/expression-statement.diffA-B.txt b/test/fixtures/python/expression-statement.diffA-B.txt index 87229cd52..260608ca3 100644 --- a/test/fixtures/python/expression-statement.diffA-B.txt +++ b/test/fixtures/python/expression-statement.diffA-B.txt @@ -9,9 +9,10 @@ (Integer)) {+(Identifier)+} ( + {+(Integer)+} (Integer) (Integer) - (Integer)) + {-(Integer)-}) {+(Plus {+(Identifier)+} {+(Identifier)+})+}) diff --git a/test/fixtures/python/expression-statement.diffB-A.txt b/test/fixtures/python/expression-statement.diffB-A.txt index 3146293b1..1bbccbc08 100644 --- a/test/fixtures/python/expression-statement.diffB-A.txt +++ b/test/fixtures/python/expression-statement.diffB-A.txt @@ -1,6 +1,5 @@ (Program -{ (Identifier) -->(Identifier) } +{+(Identifier)+} {+(Plus {+(Identifier)+} {+(Identifier)+})+} @@ -12,6 +11,7 @@ {+(Integer)+} {+(Integer)+} {+(Integer)+})+} +{-(Identifier)-} {-( {-(Integer)-} {-(Integer)-} diff --git a/test/fixtures/python/function-definition.diffA-B.txt b/test/fixtures/python/function-definition.diffA-B.txt index 168dad37d..7b354fe66 100644 --- a/test/fixtures/python/function-definition.diffA-B.txt +++ b/test/fixtures/python/function-definition.diffA-B.txt @@ -23,28 +23,24 @@ { (Identifier) ->(Identifier) })) (Empty)) -{+(Annotation - {+(Function - {+(Identifier)+} - {+( - {+(Identifier)+})+})+} - {+(Empty)+})+} -{+(Annotation - {+(Function - {+(Identifier)+} - {+(Identifier)+} - {+( - {+(Identifier)+})+})+} - {+(Empty)+})+} -{-(Annotation - {-(Function - {-(Identifier)-} + (Annotation + (Function + { (Identifier) + ->(Identifier) } {-(Assignment {-(Identifier)-} {-(Identifier)-})-} - {-( - {-(Identifier)-})-})-} - {-(Empty)-})-} + ( + { (Identifier) + ->(Identifier) })) + (Empty)) +{+(Annotation + {+(Function + {+(Identifier)+} + {+(Identifier)+} + {+( + {+(Identifier)+})+})+} + {+(Empty)+})+} (Annotation (Function (Identifier) diff --git a/test/fixtures/python/function-definition.diffB-A.txt b/test/fixtures/python/function-definition.diffB-A.txt index bf7d101bf..f781195d0 100644 --- a/test/fixtures/python/function-definition.diffB-A.txt +++ b/test/fixtures/python/function-definition.diffB-A.txt @@ -1,18 +1,14 @@ (Program -{+(Annotation - {+(Function - {+(Identifier)+} - {+( - {+(Identifier)+})+})+} - {+(Empty)+})+} -{-(Annotation - {-(Function + (Annotation + (Function + { (Identifier) + ->(Identifier) } {-(Identifier)-} {-(Identifier)-} - {-(Identifier)-} - {-( - {-(Identifier)-})-})-} - {-(Empty)-})-} + ( + { (Identifier) + ->(Identifier) })) + (Empty)) (Annotation (Function (Identifier) diff --git a/test/fixtures/python/if-statement.diffA-B.txt b/test/fixtures/python/if-statement.diffA-B.txt index 140ca836c..4df75d39b 100644 --- a/test/fixtures/python/if-statement.diffA-B.txt +++ b/test/fixtures/python/if-statement.diffA-B.txt @@ -3,9 +3,9 @@ { (Identifier) ->(Identifier) } ( - { (Identifier) - ->(Identifier) } - (Identifier)) + {+(Identifier)+} + (Identifier) + {-(Identifier)-}) { (If {-(Identifier)-} {-( diff --git a/test/fixtures/python/import-from-statement.diffB-A.txt b/test/fixtures/python/import-from-statement.diffB-A.txt index 56c486f9e..96c7c6c72 100644 --- a/test/fixtures/python/import-from-statement.diffB-A.txt +++ b/test/fixtures/python/import-from-statement.diffB-A.txt @@ -7,12 +7,14 @@ {+(ScopeResolution {+(Identifier)+})+}) (Import + {+(ScopeResolution + {+(Identifier)+})+} (ScopeResolution (Identifier)) (ScopeResolution (Identifier)) - (ScopeResolution - (Identifier))) + {-(ScopeResolution + {-(Identifier)-})-}) (Import (ScopeResolution { (Identifier) diff --git a/test/fixtures/python/print-statement.diffA-B.txt b/test/fixtures/python/print-statement.diffA-B.txt index 08522ada5..0ba7ad0ba 100644 --- a/test/fixtures/python/print-statement.diffA-B.txt +++ b/test/fixtures/python/print-statement.diffA-B.txt @@ -1,9 +1,9 @@ (Program (Call (Identifier) - { (Identifier) - ->(Identifier) } + {+(Identifier)+} (Identifier) + {-(Identifier)-} (Empty)) (Call (Identifier) diff --git a/test/fixtures/python/raise-statement.diffA-B.txt b/test/fixtures/python/raise-statement.diffA-B.txt index 1df340c41..f96cc4efc 100644 --- a/test/fixtures/python/raise-statement.diffA-B.txt +++ b/test/fixtures/python/raise-statement.diffA-B.txt @@ -1,19 +1,29 @@ (Program +{+(Throw + {+( + {+(Call + {+(Identifier)+} + {+(TextElement)+} + {+(Empty)+})+})+})+} +{+(Throw + {+( + {+(Call + {+(Identifier)+} + {+(TextElement)+} + {+(Empty)+})+} + {+(Identifier)+})+})+} (Throw - ( - (Call - (Identifier) - { (TextElement) - ->(TextElement) } - (Empty)))) - (Throw - ( - (Call - (Identifier) - { (TextElement) - ->(TextElement) } - (Empty)) - { (Identifier) - ->(Identifier) })) - (Throw - ([]))) + ([])) +{-(Throw + {-( + {-(Call + {-(Identifier)-} + {-(TextElement)-} + {-(Empty)-})-})-})-} +{-(Throw + {-( + {-(Call + {-(Identifier)-} + {-(TextElement)-} + {-(Empty)-})-} + {-(Identifier)-})-})-}) diff --git a/test/fixtures/python/return-statement.diffA-B.txt b/test/fixtures/python/return-statement.diffA-B.txt index c9e2603b7..0798a925c 100644 --- a/test/fixtures/python/return-statement.diffA-B.txt +++ b/test/fixtures/python/return-statement.diffA-B.txt @@ -1,18 +1,20 @@ (Program - (Return - ( - (Plus - { (Identifier) - ->(Identifier) } - { (Identifier) - ->(Identifier) }) - { (Identifier) - ->(Identifier) })) +{+(Return + {+( + {+(Plus + {+(Identifier)+} + {+(Identifier)+})+} + {+(Identifier)+})+})+} (Return (Empty)) -{+(Return - {+(Not - {+(Identifier)+})+})+} + (Return + { ( + {-(Plus + {-(Identifier)-} + {-(Identifier)-})-} + {-(Identifier)-}) + ->(Not + {+(Identifier)+}) }) {-(Return {-(Not {-(Identifier)-})-})-}) diff --git a/test/fixtures/python/string.diffA-B.txt b/test/fixtures/python/string.diffA-B.txt index 558b37e44..707379f23 100644 --- a/test/fixtures/python/string.diffA-B.txt +++ b/test/fixtures/python/string.diffA-B.txt @@ -1,10 +1,10 @@ (Program -{ (TextElement) -->(TextElement) } +{+(TextElement)+} (TextElement) {+(TextElement)+} {+(TextElement)+} -{+(TextElement)+} +{ (TextElement) +->(TextElement) } {+(TextElement)+} {+(TextElement)+} {-(TextElement)-} diff --git a/test/fixtures/python/string.diffB-A.txt b/test/fixtures/python/string.diffB-A.txt index 55414460b..f96350334 100644 --- a/test/fixtures/python/string.diffB-A.txt +++ b/test/fixtures/python/string.diffB-A.txt @@ -6,9 +6,9 @@ ->(TextElement) } {+(TextElement)+} {+(TextElement)+} -{+(TextElement)+} { (TextElement) ->(TextElement) } +{+(TextElement)+} {-(TextElement)-} {-(TextElement)-} {-(TextElement)-} diff --git a/test/fixtures/python/tuple.diffB-A.txt b/test/fixtures/python/tuple.diffB-A.txt index 718d484cc..eeaeb3a3e 100644 --- a/test/fixtures/python/tuple.diffB-A.txt +++ b/test/fixtures/python/tuple.diffB-A.txt @@ -3,9 +3,10 @@ {+(Identifier)+} {+(Identifier)+})+} (Tuple + {+(Identifier)+} (Identifier) (Identifier) - (Identifier)) + {-(Identifier)-}) {-(Tuple {-(Identifier)-} {-(Identifier)-})-} diff --git a/test/fixtures/python/while-statement.diffB-A.txt b/test/fixtures/python/while-statement.diffB-A.txt index 43bf133ba..bce43a580 100644 --- a/test/fixtures/python/while-statement.diffB-A.txt +++ b/test/fixtures/python/while-statement.diffB-A.txt @@ -3,15 +3,11 @@ { (Identifier) ->(Identifier) } ( - { (Break - {-(Empty)-}) - ->(NoOp - {+(Empty)+}) } - { (Continue - {-(Empty)-}) - ->(Break - {+(Empty)+}) } - { (NoOp - {-(Empty)-}) - ->(Continue - {+(Empty)+}) }))) + {+(NoOp + {+(Empty)+})+} + (Break + (Empty)) + (Continue + (Empty)) + {-(NoOp + {-(Empty)-})-}))) diff --git a/test/fixtures/typescript/ambient-declarations.diffA-B.txt b/test/fixtures/typescript/ambient-declarations.diffA-B.txt index ae97d45c3..fc49501f9 100644 --- a/test/fixtures/typescript/ambient-declarations.diffA-B.txt +++ b/test/fixtures/typescript/ambient-declarations.diffA-B.txt @@ -1,10 +1,4 @@ (Program -{+(AmbientDeclaration - {+(InternalModule - {+(Identifier)+})+})+} -{+(AmbientDeclaration - {+(Class - {+(Identifier)+})+})+} (AmbientDeclaration { (Class {-(Identifier)-} @@ -15,18 +9,23 @@ {-(TypeIdentifier)-})-} {-(Identifier)-} {-(Empty)-})-}) - ->(InterfaceDeclaration - {+(Empty)+} - {+(Empty)+} - {+(Identifier)+} - {+(ObjectType)+}) }) -{-(AmbientDeclaration - {-(VariableDeclaration + ->(InternalModule + {+(Identifier)+}) }) + (AmbientDeclaration + { (VariableDeclaration {-(Assignment {-(Annotation {-(PredefinedType)-})-} {-(Identifier)-} - {-(Empty)-})-})-})-} + {-(Empty)-})-}) + ->(Class + {+(Identifier)+}) }) +{+(AmbientDeclaration + {+(InterfaceDeclaration + {+(Empty)+} + {+(Empty)+} + {+(Identifier)+} + {+(ObjectType)+})+})+} {-(AmbientDeclaration {-(AmbientFunction {-(Empty)-} diff --git a/test/fixtures/typescript/ambient-declarations.diffB-A.txt b/test/fixtures/typescript/ambient-declarations.diffB-A.txt index 8d2ace9cf..81c8abbbd 100644 --- a/test/fixtures/typescript/ambient-declarations.diffB-A.txt +++ b/test/fixtures/typescript/ambient-declarations.diffB-A.txt @@ -1,6 +1,8 @@ (Program -{+(AmbientDeclaration - {+(Class + (AmbientDeclaration + { (InternalModule + {-(Identifier)-}) + ->(Class {+(Identifier)+} {+(PublicFieldDefinition {+(Empty)+} @@ -8,14 +10,16 @@ {+(Annotation {+(TypeIdentifier)+})+} {+(Identifier)+} - {+(Empty)+})+})+})+} -{+(AmbientDeclaration - {+(VariableDeclaration + {+(Empty)+})+}) }) + (AmbientDeclaration + { (Class + {-(Identifier)-}) + ->(VariableDeclaration {+(Assignment {+(Annotation {+(PredefinedType)+})+} {+(Identifier)+} - {+(Empty)+})+})+})+} + {+(Empty)+})+}) }) {+(AmbientDeclaration {+(AmbientFunction {+(Empty)+} @@ -30,8 +34,13 @@ {+(Assignment {+(Identifier)+} {+(Empty)+})+})+})+})+} -{+(AmbientDeclaration - {+(InternalModule + (AmbientDeclaration + { (InterfaceDeclaration + {-(Empty)-} + {-(Empty)-} + {-(Identifier)-} + {-(ObjectType)-}) + ->(InternalModule {+(Identifier)+} {+(AmbientFunction {+(Empty)+} @@ -85,19 +94,7 @@ {+(Empty)+} {+(Annotation {+(PredefinedType)+})+} - {+(Identifier)+})+})+})+})+})+} -{-(AmbientDeclaration - {-(InternalModule - {-(Identifier)-})-})-} -{-(AmbientDeclaration - {-(Class - {-(Identifier)-})-})-} -{-(AmbientDeclaration - {-(InterfaceDeclaration - {-(Empty)-} - {-(Empty)-} - {-(Identifier)-} - {-(ObjectType)-})-})-} + {+(Identifier)+})+})+})+}) }) (AmbientDeclaration (Class (Identifier) diff --git a/test/fixtures/typescript/anonymous-function.diffB-A.txt b/test/fixtures/typescript/anonymous-function.diffB-A.txt index 43ee1632e..378fb36fa 100644 --- a/test/fixtures/typescript/anonymous-function.diffB-A.txt +++ b/test/fixtures/typescript/anonymous-function.diffB-A.txt @@ -3,14 +3,13 @@ (Empty) (Empty) (Empty) - (RequiredParameter - (Empty) - (Empty) - (Empty) - (Assignment - { (Identifier) - ->(Identifier) } - (Empty))) + {+(RequiredParameter + {+(Empty)+} + {+(Empty)+} + {+(Empty)+} + {+(Assignment + {+(Identifier)+} + {+(Empty)+})+})+} (RequiredParameter (Empty) (Empty) @@ -18,6 +17,13 @@ (Assignment (Identifier) (Empty))) + {-(RequiredParameter + {-(Empty)-} + {-(Empty)-} + {-(Empty)-} + {-(Assignment + {-(Identifier)-} + {-(Empty)-})-})-} ( (Return { (Times diff --git a/test/fixtures/typescript/export.diffA-B.txt b/test/fixtures/typescript/export.diffA-B.txt index d3c55c9dd..d50078834 100644 --- a/test/fixtures/typescript/export.diffA-B.txt +++ b/test/fixtures/typescript/export.diffA-B.txt @@ -107,37 +107,36 @@ {-(Empty)-} {-(Identifier)-} {-([])-})-}) -{+(Export - {+(TextElement)+})+} -{+(Export - {+(ExportClause - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+})+} - {+(TextElement)+})+} -{+(Export - {+(ExportClause - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+})+} - {+(TextElement)+})+} -{-(Export + (Export + {+(TextElement)+} {-(ExportClause {-(ImportExportSpecifier {-(Identifier)-} - {-(Identifier)-})-})-})-} + {-(Identifier)-})-})-}) +{+(Export + {+(ExportClause + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+})+} + {+(TextElement)+})+} +{+(Export + {+(ExportClause + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Identifier)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Identifier)+})+} + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Empty)+})+})+} + {+(TextElement)+})+} {-(Export {-(TextElement)-})-} {-(Export diff --git a/test/fixtures/typescript/export.diffB-A.txt b/test/fixtures/typescript/export.diffB-A.txt index 6629857ba..1009f3a02 100644 --- a/test/fixtures/typescript/export.diffB-A.txt +++ b/test/fixtures/typescript/export.diffB-A.txt @@ -25,17 +25,17 @@ {-(Empty)-})-})) (Export (ExportClause + {+(ImportExportSpecifier + {+(Identifier)+} + {+(Identifier)+})+} + (ImportExportSpecifier + (Identifier) + (Identifier)) (ImportExportSpecifier { (Identifier) ->(Identifier) } { (Identifier) - ->(Identifier) }) - (ImportExportSpecifier - (Identifier) - (Identifier)) - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+} + ->(Empty) }) {-(ImportExportSpecifier {-(Identifier)-} {-(Empty)-})-})) @@ -105,14 +105,14 @@ {-(ImportExportSpecifier {-(Identifier)-} {-(Identifier)-})-})-}) -{+(Export + (Export {+(ExportClause {+(ImportExportSpecifier {+(Identifier)+} - {+(Identifier)+})+})+})+} - (Export - { (TextElement) - ->(TextElement) }) + {+(Identifier)+})+})+} + {-(TextElement)-}) +{+(Export + {+(TextElement)+})+} (Export (ExportClause (ImportExportSpecifier @@ -133,27 +133,21 @@ {-(Empty)-})-}) { (TextElement) ->(TextElement) }) -{+(Export - {+(ExportClause - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Identifier)+})+} - {+(ImportExportSpecifier - {+(Identifier)+} - {+(Empty)+})+})+} - {+(TextElement)+})+} -{-(Export - {-(ExportClause - {-(ImportExportSpecifier - {-(Identifier)-} - {-(Identifier)-})-} - {-(ImportExportSpecifier - {-(Identifier)-} - {-(Identifier)-})-} - {-(ImportExportSpecifier - {-(Identifier)-} - {-(Empty)-})-})-} - {-(TextElement)-})-}) + (Export + (ExportClause + (ImportExportSpecifier + { (Identifier) + ->(Identifier) } + { (Identifier) + ->(Identifier) }) + (ImportExportSpecifier + { (Identifier) + ->(Identifier) } + { (Identifier) + ->(Identifier) }) + (ImportExportSpecifier + { (Identifier) + ->(Identifier) } + (Empty))) + { (TextElement) + ->(TextElement) })) diff --git a/test/fixtures/typescript/function-call-args.diffB-A.txt b/test/fixtures/typescript/function-call-args.diffB-A.txt index a4b82533e..194e964af 100644 --- a/test/fixtures/typescript/function-call-args.diffB-A.txt +++ b/test/fixtures/typescript/function-call-args.diffB-A.txt @@ -8,14 +8,13 @@ (Empty) (Empty) (Empty) - (RequiredParameter - (Empty) - (Empty) - (Empty) - (Assignment - { (Identifier) - ->(Identifier) } - (Empty))) + {+(RequiredParameter + {+(Empty)+} + {+(Empty)+} + {+(Empty)+} + {+(Assignment + {+(Identifier)+} + {+(Empty)+})+})+} (RequiredParameter (Empty) (Empty) @@ -23,6 +22,13 @@ (Assignment (Identifier) (Empty))) + {-(RequiredParameter + {-(Empty)-} + {-(Empty)-} + {-(Empty)-} + {-(Assignment + {-(Identifier)-} + {-(Empty)-})-})-} ( (Call (MemberAccess diff --git a/test/fixtures/typescript/public-field-definition.diffA-B.txt b/test/fixtures/typescript/public-field-definition.diffA-B.txt index f76955af6..c56e8c9cc 100644 --- a/test/fixtures/typescript/public-field-definition.diffA-B.txt +++ b/test/fixtures/typescript/public-field-definition.diffA-B.txt @@ -19,33 +19,23 @@ (Empty) (Identifier) (Empty)) - {+(PublicFieldDefinition - {+(Identifier)+} - {+(Readonly)+} + (PublicFieldDefinition + (Identifier) + (Readonly) {+(Annotation {+(TypeIdentifier)+})+} - {+(Identifier)+} - {+(Float)+})+} - {+(PublicFieldDefinition - {+(Identifier)+} - {+(Empty)+} - {+(Annotation - {+(TypeIdentifier)+})+} - {+(Identifier)+} - {+(Float)+})+} - {-(PublicFieldDefinition - {-(Identifier)-} - {-(Readonly)-} {-(Empty)-} - {-(Identifier)-} - {-(Float)-})-} - {-(PublicFieldDefinition - {-(Identifier)-} + (Identifier) + (Float)) + (PublicFieldDefinition + { (Identifier) + ->(Identifier) } + {+(Empty)+} {-(Readonly)-} - {-(Annotation - {-(TypeIdentifier)-})-} - {-(Identifier)-} - {-(Float)-})-} + (Annotation + (TypeIdentifier)) + (Identifier) + (Float)) (PublicFieldDefinition (Empty) (Empty) diff --git a/test/fixtures/typescript/public-field-definition.diffB-A.txt b/test/fixtures/typescript/public-field-definition.diffB-A.txt index 5744d59a4..44f959890 100644 --- a/test/fixtures/typescript/public-field-definition.diffB-A.txt +++ b/test/fixtures/typescript/public-field-definition.diffB-A.txt @@ -19,27 +19,23 @@ (Empty) (Identifier) (Empty)) - {+(PublicFieldDefinition - {+(Identifier)+} - {+(Readonly)+} - {+(Empty)+} - {+(Identifier)+} - {+(Float)+})+} (PublicFieldDefinition (Identifier) (Readonly) - (Annotation - (TypeIdentifier)) - { (Identifier) - ->(Identifier) } - (Float)) - {-(PublicFieldDefinition - {-(Identifier)-} - {-(Empty)-} + {+(Empty)+} {-(Annotation {-(TypeIdentifier)-})-} - {-(Identifier)-} - {-(Float)-})-} + (Identifier) + (Float)) + (PublicFieldDefinition + { (Identifier) + ->(Identifier) } + {+(Readonly)+} + {-(Empty)-} + (Annotation + (TypeIdentifier)) + (Identifier) + (Float)) (PublicFieldDefinition (Empty) (Empty) From 8b281bc3cf00b06d9d7fa4a08151005252279ea3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 24 Oct 2017 15:15:54 -0400 Subject: [PATCH 120/120] Update a couple of ToC fixtures. --- test/TOCSpec.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/TOCSpec.hs b/test/TOCSpec.hs index d0fcad3e0..6e2894c1b 100644 --- a/test/TOCSpec.hs +++ b/test/TOCSpec.hs @@ -66,8 +66,10 @@ spec = parallel $ do sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb") diff <- runTask $ diffWithParser rubyParser sourceBlobs diffTOC diff `shouldBe` - [ JSONSummary "Method" "self.foo" (sourceSpanBetween (1, 1) (2, 4)) "modified" - , JSONSummary "Method" "bar" (sourceSpanBetween (4, 1) (6, 4)) "modified" ] + [ JSONSummary "Method" "self.foo" (sourceSpanBetween (1, 1) (2, 4)) "added" + , JSONSummary "Method" "bar" (sourceSpanBetween (4, 1) (6, 4)) "modified" + , JSONSummary "Method" "baz" (sourceSpanBetween (4, 1) (5, 4)) "removed" + ] it "dedupes changes in same parent method" $ do sourceBlobs <- blobsForPaths (both "javascript/duplicate-parent.A.js" "javascript/duplicate-parent.B.js") @@ -147,7 +149,7 @@ spec = parallel $ do it "produces JSON output" $ do blobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb") output <- runTask (diffBlobPair ToCDiffRenderer blobs) - toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"}]},\"errors\":{}}\n" :: ByteString) + toOutput output `shouldBe` ("{\"changes\":{\"test/fixtures/toc/ruby/methods.A.rb -> test/fixtures/toc/ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}\n" :: ByteString) it "produces JSON output if there are parse errors" $ do blobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")