From 3eb2e9728491c66e1f3026a89380e07e888c0116 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 16:50:21 -0400 Subject: [PATCH 001/208] =?UTF-8?q?breakDownLeavesByWord=20doesn=E2=80=99t?= =?UTF-8?q?=20rewrite=20the=20cost.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Diffing.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 311d87ae8..ce814de29 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -53,10 +53,10 @@ parserForFilepath :: FilePath -> Parser '[Range, Category, Cost] parserForFilepath = parserForType . T.pack . takeExtension -- | Replace every string leaf with leaves of the words in the string. -breakDownLeavesByWord :: (HasField fields Cost, HasField fields Range) => Source Char -> Term T.Text (Record fields) -> Term T.Text (Record fields) +breakDownLeavesByWord :: HasField fields Range => Source Char -> Term T.Text (Record fields) -> Term T.Text (Record fields) breakDownLeavesByWord source = cata replaceIn where - replaceIn (info :< syntax) = let cost' = 1 + sum (cost . extract <$> syntax') in cofree $ setCost info cost' :< syntax' + replaceIn (info :< syntax) = cofree $ info :< syntax' where syntax' = case (ranges, syntax) of (_:_:_, Leaf _) -> Indexed (makeLeaf info <$> ranges) _ -> syntax From 30ef58c6fdafd0af6dc94420883ca3ac5a2f1462 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:24:02 -0400 Subject: [PATCH 002/208] Add a function to compute the cost of a term. --- src/Diffing.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index ce814de29..d79ad2354 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -101,6 +101,9 @@ diffFiles parser renderer sourceBlobs = do Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch))) shouldCompareTerms = (==) `on` category . extract +termCost :: (Prologue.Foldable f, Functor f) => Cofree f (Record a) -> Cost +termCost = cata $ \ c -> 1 + sum (tailF c) + -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer diffCostWithCachedTermCosts diff = unCost $ case runFree diff of From 96c728a9ca139c78b4bea837a2dfd251fbb0098b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:31:19 -0400 Subject: [PATCH 003/208] Add a function to decorate a term with the assistance of a decorating function. --- src/Diffing.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index d79ad2354..afc49c5d8 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -77,6 +77,9 @@ readAndTranscodeFile path = do text <- B1.readFile path transcode text +decorateTerm :: Functor f => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) +decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) + -- | Given a parser and renderer, diff two sources and return the rendered -- | result. -- | Returns the rendered result strictly, so it's always fully evaluated From b975559c6d6b9bc4184870a31bf2b2c5803c1080 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:43:07 -0400 Subject: [PATCH 004/208] Parameterize the Parser type synonym by functor and annotation. --- src/DiffOutput.hs | 5 +++-- src/Diffing.hs | 8 ++++---- src/Parser.hs | 2 +- src/TreeSitter.hs | 5 +++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/DiffOutput.hs b/src/DiffOutput.hs index 79e053fed..facd398ee 100644 --- a/src/DiffOutput.hs +++ b/src/DiffOutput.hs @@ -15,12 +15,13 @@ import qualified Renderer.Summary as S import Renderer import Renderer.Split import Source +import Syntax import System.Directory import System.FilePath import qualified System.IO as IO -- | Returns a rendered diff given a parser, diff arguments and two source blobs. -textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser fields -> DiffArguments -> Both SourceBlob -> IO Text +textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Text textDiff parser arguments sources = case format arguments of Split -> diffFiles parser split sources Patch -> diffFiles parser P.patch sources @@ -36,7 +37,7 @@ truncatedDiff arguments sources = case format arguments of Summary -> pure "" -- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs. -printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser fields -> DiffArguments -> Both SourceBlob -> IO () +printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO () printDiff parser arguments sources = case format arguments of Split -> put (output arguments) =<< diffFiles parser split sources where diff --git a/src/Diffing.hs b/src/Diffing.hs index afc49c5d8..aa790e69d 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -27,7 +27,7 @@ import TreeSitter import Text.Parser.TreeSitter.Language -- | Return a parser based on the file extension (including the "."). -parserForType :: T.Text -> Parser '[Range, Category, Cost] +parserForType :: T.Text -> Parser (Syntax Text) (Record '[Range, Category]) parserForType mediaType = case languageForType mediaType of Just C -> treeSitterParser C ts_language_c Just JavaScript -> treeSitterParser JavaScript ts_language_javascript @@ -35,7 +35,7 @@ parserForType mediaType = case languageForType mediaType of _ -> lineByLineParser -- | A fallback parser that treats a file simply as rows of strings. -lineByLineParser :: Parser '[Range, Category, Cost] +lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category]) lineByLineParser input = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of (leaves, _) -> cofree <$> leaves where @@ -49,7 +49,7 @@ lineByLineParser input = pure . cofree . root $ case foldl' annotateLeaves ([], toText = T.pack . Source.toString -- | Return the parser that should be used for a given path. -parserForFilepath :: FilePath -> Parser '[Range, Category, Cost] +parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Range, Category]) parserForFilepath = parserForType . T.pack . takeExtension -- | Replace every string leaf with leaves of the words in the string. @@ -84,7 +84,7 @@ decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: hea -- | result. -- | Returns the rendered result strictly, so it's always fully evaluated -- | with respect to other IO actions. -diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser fields -> Renderer (Record fields) -> Both SourceBlob -> IO T.Text +diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO T.Text diffFiles parser renderer sourceBlobs = do let sources = source <$> sourceBlobs terms <- sequence $ parser <$> sources diff --git a/src/Parser.hs b/src/Parser.hs index f131f101f..4c46aa650 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -14,7 +14,7 @@ import Source -- | A function that takes a source file and returns an annotated AST. -- | The return is in the IO monad because some of the parsers are written in C -- | and aren't pure. -type Parser fields = Source Char -> IO (Term Text (Record fields)) +type Parser f a = Source Char -> IO (Cofree f a) -- | Categories that are treated as fixed nodes. fixedCategories :: Set.Set Category diff --git a/src/TreeSitter.hs b/src/TreeSitter.hs index 3bc8ed59e..2e35de85f 100644 --- a/src/TreeSitter.hs +++ b/src/TreeSitter.hs @@ -10,13 +10,14 @@ import Language import Parser import Range import Source +import Syntax import Foreign import Foreign.C.String import Text.Parser.TreeSitter hiding (Language(..)) import qualified Text.Parser.TreeSitter as TS -- | Returns a TreeSitter parser for the given language and TreeSitter grammar. -treeSitterParser :: Language -> Ptr TS.Language -> Parser '[Range, Category, Cost] +treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax Text) (Record '[Range, Category, Cost]) treeSitterParser language grammar contents = do document <- ts_document_make ts_document_set_language document grammar @@ -50,7 +51,7 @@ defaultCategoryForNodeName name = case name of _ -> Other name -- | Return a parser for a tree sitter language & document. -documentToTerm :: Language -> Ptr Document -> Parser '[Range, Category, Cost] +documentToTerm :: Language -> Ptr Document -> Parser (Syntax Text) (Record '[Range, Category, Cost]) documentToTerm language document contents = alloca $ \ root -> do ts_document_root_node_p document root toTerm root From 554b04f61190ce7b032757148162c1a520a3f191 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:43:59 -0400 Subject: [PATCH 005/208] Placate hlint. --- test/CorpusSpec.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 1b10f015d..892be55f5 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -26,7 +26,7 @@ import Test.Hspec spec :: Spec spec = parallel $ do - describe "crashers crash" $ runTestsIn "test/crashers-todo/" $ \ a b -> a `deepseq` pure (a == b) `shouldThrow` anyException + describe "crashers crash" . runTestsIn "test/crashers-todo/" $ \ a b -> a `deepseq` pure (a == b) `shouldThrow` anyException describe "crashers should not crash" $ runTestsIn "test/crashers/" shouldBe describe "todos are incorrect" $ runTestsIn "test/diffs-todo/" shouldNotBe describe "should produce the correct diff" $ runTestsIn "test/diffs/" shouldBe @@ -40,7 +40,7 @@ spec = parallel $ do runTestsIn directory matcher = do paths <- runIO $ examples directory let tests = correctTests =<< paths - traverse_ (\ (formatName, renderer, paths, output) -> it (normalizeName (fst paths) ++ " (" ++ formatName ++ ")") $ testDiff renderer paths output matcher) tests + traverse_ (\ (formatName, renderer, paths, output) -> it (normalizeName (fst paths) <> " (" <> formatName <> ")") $ testDiff renderer paths output matcher) tests correctTests paths@(_, Nothing, Nothing, Nothing) = testsForPaths paths correctTests paths = List.filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths From 1b0e1c566508766cc7e4e4b81cca306cf3a9f957 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:44:14 -0400 Subject: [PATCH 006/208] =?UTF-8?q?Add=20a=20function=20to=20decorate=20a?= =?UTF-8?q?=20parser=E2=80=99s=20output.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Diffing.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index aa790e69d..7c5138851 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -77,6 +77,9 @@ readAndTranscodeFile path = do text <- B1.readFile path transcode text +decorateParser :: Functor f => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Parser f (Record fields) -> Parser f (Record (field ': fields)) +decorateParser decorator = (fmap (decorateTerm decorator) .) + decorateTerm :: Functor f => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) From ac3ddea7c171965f43da83ac9af57afa981d25f5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:45:26 -0400 Subject: [PATCH 007/208] Add the missing TypeOperators pragma. --- src/Diffing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 7c5138851..77a6294a4 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DataKinds, TypeOperators #-} module Diffing where import Prologue hiding (fst, snd) From 2f72c6f011224e2a6ad0b833fc3dbe790c119655 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:45:48 -0400 Subject: [PATCH 008/208] Redefine termCost as a decorator. --- src/Diffing.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 77a6294a4..b214f37ad 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -107,8 +107,8 @@ diffFiles parser renderer sourceBlobs = do Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch))) shouldCompareTerms = (==) `on` category . extract -termCost :: (Prologue.Foldable f, Functor f) => Cofree f (Record a) -> Cost -termCost = cata $ \ c -> 1 + sum (tailF c) +termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost +termCost c = 1 + sum (cost <$> tailF c) -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer From 9d495a2936d065e61454eb4c1c436d24cf1fc472 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:49:09 -0400 Subject: [PATCH 009/208] =?UTF-8?q?Decorate=20the=20spec=E2=80=99s=20parse?= =?UTF-8?q?r.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CorpusSpec.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 892be55f5..bb56d7432 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -70,10 +70,10 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte -- | Given file paths for A, B, and, optionally, a diff, return whether diffing -- | the files will produce the diff. If no diff is provided, then the result -- | is true, but the diff will still be calculated. -testDiff :: Renderer (Record '[Range, Category, Cost]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation +testDiff :: Renderer (Record '[Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation testDiff renderer paths diff matcher = do sources <- sequence $ readAndTranscodeFile <$> paths - actual <- Verbatim <$> diffFiles parser renderer (sourceBlobs sources) + actual <- Verbatim <$> diffFiles (decorateParser termCost parser) renderer (sourceBlobs sources) case diff of Nothing -> matcher actual actual Just file -> do From 1127fd2672fc48e6ed6bd2d3ccf34b83b921530e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 17:49:27 -0400 Subject: [PATCH 010/208] :fire: the ad hoc computation of term costs. --- src/Diffing.hs | 9 ++++----- src/TreeSitter.hs | 8 +++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index b214f37ad..8cedc4bdb 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -40,9 +40,8 @@ lineByLineParser input = pure . cofree . root $ case foldl' annotateLeaves ([], (leaves, _) -> cofree <$> leaves where lines = actualLines input - root children = let cost = 1 + fromIntegral (length children) in - ((Range 0 $ length input) .: Other "program" .: cost .: RNil) :< Indexed children - leaf charIndex line = ((Range charIndex $ charIndex + T.length line) .: Other "program" .: 1 .: RNil) :< Leaf line + root children = ((Range 0 $ length input) .: Other "program" .: RNil) :< Indexed children + leaf charIndex line = ((Range charIndex $ charIndex + T.length line) .: Other "program" .: RNil) :< Leaf line annotateLeaves (accum, charIndex) line = (accum <> [ leaf charIndex (toText line) ] , charIndex + length line) @@ -92,12 +91,12 @@ diffFiles parser renderer sourceBlobs = do let sources = source <$> sourceBlobs terms <- sequence $ parser <$> sources - let replaceLeaves = breakDownLeavesByWord <$> sources + let preprocessed = breakDownLeavesByWord <$> sources <*> terms let areNullOids = runJoin $ (== nullOid) . oid <$> sourceBlobs let textDiff = case areNullOids of (True, False) -> pure $ Insert (snd terms) (False, True) -> pure $ Delete (fst terms) - (_, _) -> runBothWith (diffTerms construct shouldCompareTerms diffCostWithCachedTermCosts) $ replaceLeaves <*> terms + _ -> runBothWith (diffTerms construct shouldCompareTerms diffCostWithCachedTermCosts) preprocessed pure $! renderer textDiff sourceBlobs where construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax)) diff --git a/src/TreeSitter.hs b/src/TreeSitter.hs index 2e35de85f..02fed42bc 100644 --- a/src/TreeSitter.hs +++ b/src/TreeSitter.hs @@ -5,7 +5,6 @@ import Prologue hiding (Constructor) import Data.Record import Data.String import Category -import Info import Language import Parser import Range @@ -17,7 +16,7 @@ import Text.Parser.TreeSitter hiding (Language(..)) import qualified Text.Parser.TreeSitter as TS -- | Returns a TreeSitter parser for the given language and TreeSitter grammar. -treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax Text) (Record '[Range, Category, Cost]) +treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax Text) (Record '[Range, Category]) treeSitterParser language grammar contents = do document <- ts_document_make ts_document_set_language document grammar @@ -51,7 +50,7 @@ defaultCategoryForNodeName name = case name of _ -> Other name -- | Return a parser for a tree sitter language & document. -documentToTerm :: Language -> Ptr Document -> Parser (Syntax Text) (Record '[Range, Category, Cost]) +documentToTerm :: Language -> Ptr Document -> Parser (Syntax Text) (Record '[Range, Category]) documentToTerm language document contents = alloca $ \ root -> do ts_document_root_node_p document root toTerm root @@ -63,8 +62,7 @@ documentToTerm language document contents = alloca $ \ root -> do -- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after we’ve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it. range <- pure $! Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node } - let cost' = 1 + sum (cost . extract <$> children) - let info = range .: (categoriesForLanguage language name) .: cost' .: RNil + let info = range .: (categoriesForLanguage language name) .: RNil pure $! termConstructor contents info children getChild node n out = do _ <- ts_node_p_named_child node n out From e8316dece879484846d52e84e88b638aa0f9397a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 15 Jul 2016 18:16:17 -0400 Subject: [PATCH 011/208] Break down leaves by word in the parser. --- src/Diffing.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 8cedc4bdb..c37dee91a 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -49,7 +49,9 @@ lineByLineParser input = pure . cofree . root $ case foldl' annotateLeaves ([], -- | Return the parser that should be used for a given path. parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Range, Category]) -parserForFilepath = parserForType . T.pack . takeExtension +parserForFilepath path source = do + parsed <- parserForType (T.pack (takeExtension path)) source + pure $! breakDownLeavesByWord source parsed -- | Replace every string leaf with leaves of the words in the string. breakDownLeavesByWord :: HasField fields Range => Source Char -> Term T.Text (Record fields) -> Term T.Text (Record fields) @@ -91,12 +93,11 @@ diffFiles parser renderer sourceBlobs = do let sources = source <$> sourceBlobs terms <- sequence $ parser <$> sources - let preprocessed = breakDownLeavesByWord <$> sources <*> terms let areNullOids = runJoin $ (== nullOid) . oid <$> sourceBlobs let textDiff = case areNullOids of (True, False) -> pure $ Insert (snd terms) (False, True) -> pure $ Delete (fst terms) - _ -> runBothWith (diffTerms construct shouldCompareTerms diffCostWithCachedTermCosts) preprocessed + _ -> runBothWith (diffTerms construct shouldCompareTerms diffCostWithCachedTermCosts) terms pure $! renderer textDiff sourceBlobs where construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax)) From 864e8a4539081fb1ac7229942d4949f3b7002417 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 15:34:14 -0400 Subject: [PATCH 012/208] DRY up textDiff. --- src/Diffing.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index d201e70a6..007b84fa6 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -126,11 +126,12 @@ diffCostWithCachedTermCosts diff = unCost $ case runFree diff of -- | Returns a rendered diff given a parser, diff arguments and two source blobs. textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Text -textDiff parser arguments sources = case format arguments of - Split -> diffFiles parser split sources - Patch -> diffFiles parser patch sources - JSON -> diffFiles parser json sources - Summary -> diffFiles parser summary sources +textDiff parser arguments sources = diffFiles parser renderer sources + where renderer = case format arguments of + Split -> split + Patch -> patch + JSON -> json + Summary -> summary -- | Returns a truncated diff given diff arguments and two source blobs. truncatedDiff :: DiffArguments -> Both SourceBlob -> IO Text From 2ed1c4152dd1cc8e292ae64f0c4b33b11354a53b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 17:17:35 -0400 Subject: [PATCH 013/208] Derive some Typeable instances. --- src/Category.hs | 2 +- src/Data/Record.hs | 1 + src/Info.hs | 2 +- src/Range.hs | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Category.hs b/src/Category.hs index 5dd35d817..26aceea4a 100644 --- a/src/Category.hs +++ b/src/Category.hs @@ -30,7 +30,7 @@ data Category | ArrayLiteral -- | A non-standard category, which can be used for comparability. | Other String - deriving (Eq, Generic, Ord, Show) + deriving (Eq, Generic, Ord, Show, Typeable) -- Instances diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 70646facc..c5c2ce65a 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -11,6 +11,7 @@ import Test.QuickCheck data Record :: [*] -> * where RNil :: Record '[] RCons :: h -> Record t -> Record (h ': t) + deriving Typeable infixr 0 .: diff --git a/src/Info.hs b/src/Info.hs index 5c7b8223d..309944bf4 100644 --- a/src/Info.hs +++ b/src/Info.hs @@ -8,7 +8,7 @@ import Range import Test.QuickCheck newtype Cost = Cost { unCost :: Integer } - deriving (Eq, Num, Ord, Show) + deriving (Eq, Num, Ord, Show, Typeable) characterRange :: HasField fields Range => Record fields -> Range characterRange = getField diff --git a/src/Range.hs b/src/Range.hs index 4479253b4..504357edc 100644 --- a/src/Range.hs +++ b/src/Range.hs @@ -9,7 +9,7 @@ import Test.QuickCheck -- | A half-open interval of integers, defined by start & end indices. data Range = Range { start :: !Int, end :: !Int } - deriving (Eq, Show) + deriving (Eq, Show, Typeable) -- | Make a range at a given index. rangeAt :: Int -> Range From a71f9f05cc676c5767a51ff6f0eb1076c941898e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 17:21:13 -0400 Subject: [PATCH 014/208] Simplify the PatchOutputSpec annotations. Also applies some HLint advice. --- test/PatchOutputSpec.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/PatchOutputSpec.hs b/test/PatchOutputSpec.hs index 429d18e70..b03eddf92 100644 --- a/test/PatchOutputSpec.hs +++ b/test/PatchOutputSpec.hs @@ -7,11 +7,10 @@ import Range import Renderer.Patch import Source import Syntax -import Category import Test.Hspec spec :: Spec -spec = parallel $ - describe "hunks" $ +spec = parallel $ do + describe "hunks" $ do it "empty diffs have empty hunks" $ - hunks (free . Free $ pure (Range 0 0 .: StringLiteral .: 1 .: 0 .: RNil) :< Leaf "") (both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob)) (SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = pure 0, changes = [], trailingContext = []}] + hunks (wrap $ pure (Range 0 0 .: RNil) :< Leaf ("" :: Text)) (both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob)) (SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = pure 0, changes = [], trailingContext = []}] From e50b1a5f61c58ca2f5e744034ea0969f531c6920 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 17:25:54 -0400 Subject: [PATCH 015/208] Constrain record fields to be Typeable. --- src/Data/Record.hs | 6 +++--- src/Diffing.hs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index c5c2ce65a..15aa0ccf4 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -10,13 +10,13 @@ import Test.QuickCheck -- | This is heavily inspired by Aaron Levin’s [Extensible Effects in the van Laarhoven Free Monad](http://aaronlevin.ca/post/136494428283/extensible-effects-in-the-van-laarhoven-free-monad). data Record :: [*] -> * where RNil :: Record '[] - RCons :: h -> Record t -> Record (h ': t) + RCons :: Typeable h => h -> Record t -> Record (h ': t) deriving Typeable infixr 0 .: -- | Infix synonym for `RCons`: `a .: b .: RNil == RCons a (RCons b RNil)`. -(.:) :: h -> Record t -> Record (h ': t) +(.:) :: Typeable h => h -> Record t -> Record (h ': t) (.:) = RCons @@ -63,7 +63,7 @@ instance Ord (Record '[]) where _ `compare` _ = EQ -instance (Arbitrary field, Arbitrary (Record fields)) => Arbitrary (Record (field ': fields)) where +instance (Typeable field, Arbitrary field, Arbitrary (Record fields)) => Arbitrary (Record (field ': fields)) where arbitrary = RCons <$> arbitrary <*> arbitrary shrink (RCons h t) = RCons <$> shrink h <*> shrink t diff --git a/src/Diffing.hs b/src/Diffing.hs index 007b84fa6..38081502c 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -85,10 +85,10 @@ readAndTranscodeFile path = do text <- B1.readFile path transcode text -decorateParser :: Functor f => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Parser f (Record fields) -> Parser f (Record (field ': fields)) +decorateParser :: (Typeable field, Functor f) => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Parser f (Record fields) -> Parser f (Record (field ': fields)) decorateParser decorator = (fmap (decorateTerm decorator) .) -decorateTerm :: Functor f => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) +decorateTerm :: (Typeable field, Functor f) => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) -- | Given a parser and renderer, diff two sources and return the rendered From 107077c493884d33ecef23be37d61e589077a4c7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 17:26:03 -0400 Subject: [PATCH 016/208] Add a maybeGetField function over Records. --- src/Data/Record.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 15aa0ccf4..e5babffcc 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -19,6 +19,10 @@ infixr 0 .: (.:) :: Typeable h => h -> Record t -> Record (h ': t) (.:) = RCons +maybeGetField :: Typeable field => Record fields -> Maybe field +maybeGetField (RCons h t) = cast h <|> maybeGetField t +maybeGetField RNil = Nothing + -- Classes From e3eeb617a635698d310ddec4674a1dfcfb6c8e69 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 17:26:51 -0400 Subject: [PATCH 017/208] Annotate with costs when any are provided. --- src/Renderer/Split.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index 7ccf3085b..f33b76387 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -118,10 +118,12 @@ instance (HasField fields Category, HasField fields Cost, HasField fields Range) instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SplitDiff leaf (Record fields))) where toMarkup (Renderable source diff) = Prologue.fst . iter (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) $ toMarkupAndRange <$> diff where toMarkupAndRange patch = let term@(info :< _) = runCofree $ getSplitTerm patch in - ((div ! patchAttribute patch `withCostAttribute` cost info) . toMarkup $ Renderable source (cofree term), characterRange info) + ((div ! patchAttribute patch `withCostAttribute` maybeCost info) . toMarkup $ Renderable source (cofree term), characterRange info) patchAttribute patch = A.class_ (splitPatchToClassName patch) - withCostAttribute a (Cost c) | c > 0 = a ! A.data_ (stringValue (show c)) - | otherwise = identity + withCostAttribute a c | Just (Cost c) <- c, c > 0 = a ! A.data_ (stringValue (show c)) + | otherwise = identity + maybeCost :: Record fields -> Maybe Cost + maybeCost = maybeGetField instance ToMarkup a => ToMarkup (Cell a) where toMarkup (Cell hasChanges num line) = From 590509aaf892bdc7921691225126ef18b2b428b8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 17:27:16 -0400 Subject: [PATCH 018/208] :fire: the Cost constraints. --- src/Renderer/Split.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index f33b76387..c0d8377bc 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -53,7 +53,7 @@ splitPatchToClassName patch = stringValue $ "patch " <> case patch of SplitReplace _ -> "replace" -- | Render a diff as an HTML split diff. -split :: (HasField fields Category, HasField fields Cost, HasField fields Range) => Renderer (Record fields) +split :: (HasField fields Category, HasField fields Range) => Renderer (Record fields) split diff blobs = TL.toStrict . renderHtml . docTypeHtml . ((head $ link ! A.rel "stylesheet" ! A.href "style.css") <>) @@ -107,15 +107,15 @@ wrapIn f p = f p -- Instances -instance (ToMarkup f, HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (CofreeF (Syntax leaf) (Record fields) (f, Range))) where +instance (ToMarkup f, HasField fields Category, HasField fields Range) => ToMarkup (Renderable (CofreeF (Syntax leaf) (Record fields) (f, Range))) where toMarkup (Renderable source (info :< syntax)) = classifyMarkup (category info) $ case syntax of Leaf _ -> span . string . toString $ slice (characterRange info) source _ -> ul . mconcat $ wrapIn li <$> contentElements source (characterRange info) (toList syntax) -instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (Term leaf (Record fields))) where +instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderable (Term leaf (Record fields))) where toMarkup (Renderable source term) = Prologue.fst $ cata (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) term -instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SplitDiff leaf (Record fields))) where +instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderable (SplitDiff leaf (Record fields))) where toMarkup (Renderable source diff) = Prologue.fst . iter (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) $ toMarkupAndRange <$> diff where toMarkupAndRange patch = let term@(info :< _) = runCofree $ getSplitTerm patch in ((div ! patchAttribute patch `withCostAttribute` maybeCost info) . toMarkup $ Renderable source (cofree term), characterRange info) From 318d0015120df4838ea100c2bf1e7704d97fc4a1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 18:53:56 -0400 Subject: [PATCH 019/208] Add a function to update cons records. --- src/Data/Record.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index e5babffcc..613ddabff 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, TypeOperators #-} +{-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, ScopedTypeVariables, TypeOperators #-} module Data.Record where import GHC.Show @@ -23,6 +23,11 @@ maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t maybeGetField RNil = Nothing +updateRCons :: forall h t field. (Typeable h, Typeable field) => Record (h ': t) -> field -> Record (h ': t) +updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of + Just Refl -> RCons a t + Nothing -> RCons h (updateField t a) + -- Classes From 8bc7c4564d759b080013a9bd07dc3ca6c30e723c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 18:54:02 -0400 Subject: [PATCH 020/208] Add a function to update fields. --- src/Data/Record.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 613ddabff..421476c6b 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -23,6 +23,11 @@ maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t maybeGetField RNil = Nothing +updateField :: forall field fields. Typeable field => Record fields -> field -> Record fields +updateField record a = case record of + RNil -> RNil + cons@(RCons _ _) -> updateRCons cons a + updateRCons :: forall h t field. (Typeable h, Typeable field) => Record (h ': t) -> field -> Record (h ': t) updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of Just Refl -> RCons a t From fa0bcecdd7ebffe42099906720c6ac6c22ce0437 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 18:59:57 -0400 Subject: [PATCH 021/208] Update costs. --- src/Diffing.hs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 38081502c..85e4984ce 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -107,11 +107,13 @@ diffFiles parser renderer sourceBlobs = do _ -> runBothWith (diffTerms construct shouldCompareTerms diffCostWithCachedTermCosts) terms pure $! renderer textDiff sourceBlobs - where construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax)) + where construct (info :< syntax) = free (Free ((updateField <$> info <*> sumCost syntax) :< syntax)) sumCost = fmap getSum . foldMap (fmap Sum . getCost) - getCost diff = case runFree diff of - Free (info :< _) -> cost <$> info - Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch))) + getCost diff = fromMaybe 0 <$> case runFree diff of + Free (info :< _) -> maybeCost <$> info + Pure patch -> uncurry both (fromThese Nothing Nothing (unPatch (maybeCost . extract <$> patch))) + maybeCost :: Record fields -> Maybe Cost + maybeCost = maybeGetField shouldCompareTerms = (==) `on` category . extract termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost From a96a52055758fb954e23d4acb36526196f2b8f9b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 19:05:27 -0400 Subject: [PATCH 022/208] Define maybeCost in Info. --- src/Info.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Info.hs b/src/Info.hs index 309944bf4..8c952b436 100644 --- a/src/Info.hs +++ b/src/Info.hs @@ -25,6 +25,9 @@ setCategory = setField cost :: HasField fields Cost => Record fields -> Cost cost = getField +maybeCost :: Record fields -> Maybe Cost +maybeCost = maybeGetField + setCost :: HasField fields Cost => Record fields -> Cost -> Record fields setCost = setField From bd69c14c77e215409fc131e5241233945ed969ba Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 19:06:03 -0400 Subject: [PATCH 023/208] =?UTF-8?q?Use=20Info=E2=80=99s=20maybeCost=20defi?= =?UTF-8?q?nition.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Diffing.hs | 2 -- src/Renderer/Split.hs | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 85e4984ce..926e39f32 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -112,8 +112,6 @@ diffFiles parser renderer sourceBlobs = do getCost diff = fromMaybe 0 <$> case runFree diff of Free (info :< _) -> maybeCost <$> info Pure patch -> uncurry both (fromThese Nothing Nothing (unPatch (maybeCost . extract <$> patch))) - maybeCost :: Record fields -> Maybe Cost - maybeCost = maybeGetField shouldCompareTerms = (==) `on` category . extract termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index c0d8377bc..65ed13fae 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -122,8 +122,6 @@ instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderab patchAttribute patch = A.class_ (splitPatchToClassName patch) withCostAttribute a c | Just (Cost c) <- c, c > 0 = a ! A.data_ (stringValue (show c)) | otherwise = identity - maybeCost :: Record fields -> Maybe Cost - maybeCost = maybeGetField instance ToMarkup a => ToMarkup (Cell a) where toMarkup (Cell hasChanges num line) = From be5748bde1e2456f5633078dbd615d6effd3fd19 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 19:06:17 -0400 Subject: [PATCH 024/208] Use toValue. :tophat: @joshvera. --- src/Renderer/Split.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index 65ed13fae..11e188206 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -120,7 +120,7 @@ instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderab where toMarkupAndRange patch = let term@(info :< _) = runCofree $ getSplitTerm patch in ((div ! patchAttribute patch `withCostAttribute` maybeCost info) . toMarkup $ Renderable source (cofree term), characterRange info) patchAttribute patch = A.class_ (splitPatchToClassName patch) - withCostAttribute a c | Just (Cost c) <- c, c > 0 = a ! A.data_ (stringValue (show c)) + withCostAttribute a c | Just (Cost c) <- c, c > 0 = a ! A.data_ (toValue c) | otherwise = identity instance ToMarkup a => ToMarkup (Cell a) where From 39e4b26bb9d679e956e4628a7095f0af7444fa06 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 19:06:48 -0400 Subject: [PATCH 025/208] Generalize diffCostWithCachedTermCosts to work without caches. --- src/Diffing.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 926e39f32..73bd502e1 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -118,10 +118,10 @@ termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Co termCost c = 1 + sum (cost <$> tailF c) -- | The sum of the node count of the diff’s patches. -diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer -diffCostWithCachedTermCosts diff = unCost $ case runFree diff of - Free (info :< _) -> sum (cost <$> info) - Pure patch -> sum (cost . extract <$> patch) +diffCostWithCachedTermCosts :: Diff leaf (Record fields) -> Integer +diffCostWithCachedTermCosts diff = maybe 0 (unCost . getSum) $ case runFree diff of + Free (info :< _) -> foldMap (fmap Sum . maybeCost) info + Pure patch -> foldMap (fmap Sum . maybeCost . extract) patch -- | Returns a rendered diff given a parser, diff arguments and two source blobs. From d7939a54f34ab0626d036f329696933dbf1521c5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 21 Jul 2016 19:07:55 -0400 Subject: [PATCH 026/208] Generalize diffFiles, textDiff, and printDiff to not require Cost fields. --- src/Diffing.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 73bd502e1..0ee7ac326 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -95,7 +95,7 @@ decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: hea -- | result. -- | Returns the rendered result strictly, so it's always fully evaluated -- | with respect to other IO actions. -diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO T.Text +diffFiles :: (HasField fields Category, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO T.Text diffFiles parser renderer sourceBlobs = do let sources = source <$> sourceBlobs terms <- sequence $ parser <$> sources @@ -125,7 +125,7 @@ diffCostWithCachedTermCosts diff = maybe 0 (unCost . getSum) $ case runFree diff -- | Returns a rendered diff given a parser, diff arguments and two source blobs. -textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Text +textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Text textDiff parser arguments sources = diffFiles parser renderer sources where renderer = case format arguments of Split -> split @@ -142,7 +142,7 @@ truncatedDiff arguments sources = case format arguments of Summary -> pure "" -- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs. -printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO () +printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO () printDiff parser arguments sources = do rendered <- textDiff parser arguments sources case (output arguments) of From 7be312197d0dc351123c0172761166b83607db13 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:02:12 -0400 Subject: [PATCH 027/208] Revert "Generalize diffFiles, textDiff, and printDiff to not require Cost fields." This reverts commit 9e9e368520263ab6c7c00dd79ed10001a6886f95. --- src/Diffing.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 0ee7ac326..73bd502e1 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -95,7 +95,7 @@ decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: hea -- | result. -- | Returns the rendered result strictly, so it's always fully evaluated -- | with respect to other IO actions. -diffFiles :: (HasField fields Category, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO T.Text +diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO T.Text diffFiles parser renderer sourceBlobs = do let sources = source <$> sourceBlobs terms <- sequence $ parser <$> sources @@ -125,7 +125,7 @@ diffCostWithCachedTermCosts diff = maybe 0 (unCost . getSum) $ case runFree diff -- | Returns a rendered diff given a parser, diff arguments and two source blobs. -textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Text +textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Text textDiff parser arguments sources = diffFiles parser renderer sources where renderer = case format arguments of Split -> split @@ -142,7 +142,7 @@ truncatedDiff arguments sources = case format arguments of Summary -> pure "" -- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs. -printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO () +printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO () printDiff parser arguments sources = do rendered <- textDiff parser arguments sources case (output arguments) of From 8c7ba2dadb2cb5e7122462b3a50c375e8fb8bb03 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:02:15 -0400 Subject: [PATCH 028/208] Revert "Generalize diffCostWithCachedTermCosts to work without caches." This reverts commit 793ebe0059dadc656c880ef82cd4808ec1a75cc3. --- src/Diffing.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 73bd502e1..926e39f32 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -118,10 +118,10 @@ termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Co termCost c = 1 + sum (cost <$> tailF c) -- | The sum of the node count of the diff’s patches. -diffCostWithCachedTermCosts :: Diff leaf (Record fields) -> Integer -diffCostWithCachedTermCosts diff = maybe 0 (unCost . getSum) $ case runFree diff of - Free (info :< _) -> foldMap (fmap Sum . maybeCost) info - Pure patch -> foldMap (fmap Sum . maybeCost . extract) patch +diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer +diffCostWithCachedTermCosts diff = unCost $ case runFree diff of + Free (info :< _) -> sum (cost <$> info) + Pure patch -> sum (cost . extract <$> patch) -- | Returns a rendered diff given a parser, diff arguments and two source blobs. From 7a4585d621cb2f13e8c78893858daaeb8267ba12 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:02:24 -0400 Subject: [PATCH 029/208] =?UTF-8?q?Revert=20"Use=20Info=E2=80=99s=20maybeC?= =?UTF-8?q?ost=20definition."?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 881e56b48bf186e996ff740412290f4786852e44. --- src/Diffing.hs | 2 ++ src/Renderer/Split.hs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index 926e39f32..85e4984ce 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -112,6 +112,8 @@ diffFiles parser renderer sourceBlobs = do getCost diff = fromMaybe 0 <$> case runFree diff of Free (info :< _) -> maybeCost <$> info Pure patch -> uncurry both (fromThese Nothing Nothing (unPatch (maybeCost . extract <$> patch))) + maybeCost :: Record fields -> Maybe Cost + maybeCost = maybeGetField shouldCompareTerms = (==) `on` category . extract termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index 11e188206..a7ac1683e 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -122,6 +122,8 @@ instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderab patchAttribute patch = A.class_ (splitPatchToClassName patch) withCostAttribute a c | Just (Cost c) <- c, c > 0 = a ! A.data_ (toValue c) | otherwise = identity + maybeCost :: Record fields -> Maybe Cost + maybeCost = maybeGetField instance ToMarkup a => ToMarkup (Cell a) where toMarkup (Cell hasChanges num line) = From 880b660f6f7017e5d9aa4fdd09190928c992b854 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:02:27 -0400 Subject: [PATCH 030/208] Revert "Define maybeCost in Info." This reverts commit 28b097f2f37abe6ee155f19b89dad86009997b6e. --- src/Info.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Info.hs b/src/Info.hs index 8c952b436..309944bf4 100644 --- a/src/Info.hs +++ b/src/Info.hs @@ -25,9 +25,6 @@ setCategory = setField cost :: HasField fields Cost => Record fields -> Cost cost = getField -maybeCost :: Record fields -> Maybe Cost -maybeCost = maybeGetField - setCost :: HasField fields Cost => Record fields -> Cost -> Record fields setCost = setField From 405850a4a260ec8496b23e52b6d9d901dc3ab55d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:02:37 -0400 Subject: [PATCH 031/208] Revert "Update costs." This reverts commit 1905bda8deebed96a502ea8e8bd97e5f78eba943. --- src/Diffing.hs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 85e4984ce..38081502c 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -107,13 +107,11 @@ diffFiles parser renderer sourceBlobs = do _ -> runBothWith (diffTerms construct shouldCompareTerms diffCostWithCachedTermCosts) terms pure $! renderer textDiff sourceBlobs - where construct (info :< syntax) = free (Free ((updateField <$> info <*> sumCost syntax) :< syntax)) + where construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax)) sumCost = fmap getSum . foldMap (fmap Sum . getCost) - getCost diff = fromMaybe 0 <$> case runFree diff of - Free (info :< _) -> maybeCost <$> info - Pure patch -> uncurry both (fromThese Nothing Nothing (unPatch (maybeCost . extract <$> patch))) - maybeCost :: Record fields -> Maybe Cost - maybeCost = maybeGetField + getCost diff = case runFree diff of + Free (info :< _) -> cost <$> info + Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch))) shouldCompareTerms = (==) `on` category . extract termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost From 831dad15d4108423f0af0d012474a0ea168e2665 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:02:52 -0400 Subject: [PATCH 032/208] Revert ":fire: the Cost constraints." This reverts commit 555418568439514a83b98444840d3df038d82750. --- src/Renderer/Split.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index a7ac1683e..414915b09 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -53,7 +53,7 @@ splitPatchToClassName patch = stringValue $ "patch " <> case patch of SplitReplace _ -> "replace" -- | Render a diff as an HTML split diff. -split :: (HasField fields Category, HasField fields Range) => Renderer (Record fields) +split :: (HasField fields Category, HasField fields Cost, HasField fields Range) => Renderer (Record fields) split diff blobs = TL.toStrict . renderHtml . docTypeHtml . ((head $ link ! A.rel "stylesheet" ! A.href "style.css") <>) @@ -107,15 +107,15 @@ wrapIn f p = f p -- Instances -instance (ToMarkup f, HasField fields Category, HasField fields Range) => ToMarkup (Renderable (CofreeF (Syntax leaf) (Record fields) (f, Range))) where +instance (ToMarkup f, HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (CofreeF (Syntax leaf) (Record fields) (f, Range))) where toMarkup (Renderable source (info :< syntax)) = classifyMarkup (category info) $ case syntax of Leaf _ -> span . string . toString $ slice (characterRange info) source _ -> ul . mconcat $ wrapIn li <$> contentElements source (characterRange info) (toList syntax) -instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderable (Term leaf (Record fields))) where +instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (Term leaf (Record fields))) where toMarkup (Renderable source term) = Prologue.fst $ cata (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) term -instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderable (SplitDiff leaf (Record fields))) where +instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SplitDiff leaf (Record fields))) where toMarkup (Renderable source diff) = Prologue.fst . iter (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) $ toMarkupAndRange <$> diff where toMarkupAndRange patch = let term@(info :< _) = runCofree $ getSplitTerm patch in ((div ! patchAttribute patch `withCostAttribute` maybeCost info) . toMarkup $ Renderable source (cofree term), characterRange info) From ab4b0d4563c0f26828ceb4e0d58b3b59815c3a59 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:03:43 -0400 Subject: [PATCH 033/208] Revert "Annotate with costs when any are provided." This reverts commit 214fdc3b25ad04e2feeb90e088861b2dcf30fe62. --- src/Renderer/Split.hs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Renderer/Split.hs b/src/Renderer/Split.hs index 414915b09..7ccf3085b 100644 --- a/src/Renderer/Split.hs +++ b/src/Renderer/Split.hs @@ -118,12 +118,10 @@ instance (HasField fields Category, HasField fields Cost, HasField fields Range) instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SplitDiff leaf (Record fields))) where toMarkup (Renderable source diff) = Prologue.fst . iter (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) $ toMarkupAndRange <$> diff where toMarkupAndRange patch = let term@(info :< _) = runCofree $ getSplitTerm patch in - ((div ! patchAttribute patch `withCostAttribute` maybeCost info) . toMarkup $ Renderable source (cofree term), characterRange info) + ((div ! patchAttribute patch `withCostAttribute` cost info) . toMarkup $ Renderable source (cofree term), characterRange info) patchAttribute patch = A.class_ (splitPatchToClassName patch) - withCostAttribute a c | Just (Cost c) <- c, c > 0 = a ! A.data_ (toValue c) - | otherwise = identity - maybeCost :: Record fields -> Maybe Cost - maybeCost = maybeGetField + withCostAttribute a (Cost c) | c > 0 = a ! A.data_ (stringValue (show c)) + | otherwise = identity instance ToMarkup a => ToMarkup (Cell a) where toMarkup (Cell hasChanges num line) = From d37b9be8d2a4311e538ccb3b6f2e5157abb94cb1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:07:04 -0400 Subject: [PATCH 034/208] Specify the export list for the record module. --- src/Data/Record.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 421476c6b..25c8283a9 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -1,5 +1,11 @@ {-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, ScopedTypeVariables, TypeOperators #-} -module Data.Record where +module Data.Record +( Record(RNil) +, (.:) +, HasField(..) +, maybeGetField +, updateField +) where import GHC.Show import Prologue From ca15d9db939072f11970b4f9a726577c8cd9fda7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:08:02 -0400 Subject: [PATCH 035/208] :fire: a redundant constraint. --- src/Data/Record.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 25c8283a9..f98284045 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -34,7 +34,7 @@ updateField record a = case record of RNil -> RNil cons@(RCons _ _) -> updateRCons cons a -updateRCons :: forall h t field. (Typeable h, Typeable field) => Record (h ': t) -> field -> Record (h ': t) +updateRCons :: forall h t field. Typeable field => Record (h ': t) -> field -> Record (h ': t) updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of Just Refl -> RCons a t Nothing -> RCons h (updateField t a) From 23690a28f038728690b71c1f00e4ae8576ac28af Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 10:18:43 -0400 Subject: [PATCH 036/208] :memo: updateRCons, updateField, and maybeGetField. --- src/Data/Record.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index f98284045..ce0e7607d 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -25,15 +25,18 @@ infixr 0 .: (.:) :: Typeable h => h -> Record t -> Record (h ': t) (.:) = RCons +-- | Return 'Just' a 'field', if it exists in a record. Otherwise, return 'Nothing'. maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t maybeGetField RNil = Nothing +-- | Update (replace the value of) 'field' in a record, if it exists. Otherwise, return the record unchanged. updateField :: forall field fields. Typeable field => Record fields -> field -> Record fields updateField record a = case record of RNil -> RNil cons@(RCons _ _) -> updateRCons cons a +-- | Update (replace the value of) 'field' in a non-empty record, if it exists. Otherwise return the record unchanged. updateRCons :: forall h t field. Typeable field => Record (h ': t) -> field -> Record (h ': t) updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of Just Refl -> RCons a t From 163f94b87e36950b3114ccc9d7c9d87f92db0639 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 12:36:49 -0400 Subject: [PATCH 037/208] :memo: termCost. --- src/Diffing.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index 38081502c..3d4625a95 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -114,6 +114,7 @@ diffFiles parser renderer sourceBlobs = do Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch))) shouldCompareTerms = (==) `on` category . extract +-- | Compute the cost of an unpacked term. termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost termCost c = 1 + sum (cost <$> tailF c) From b64b9658932e85c53701da3884c6f804b9d626ab Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 12:38:59 -0400 Subject: [PATCH 038/208] :memo: decorateTerm. --- src/Diffing.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index 3d4625a95..349d373c8 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -88,6 +88,7 @@ readAndTranscodeFile path = do decorateParser :: (Typeable field, Functor f) => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Parser f (Record fields) -> Parser f (Record (field ': fields)) decorateParser decorator = (fmap (decorateTerm decorator) .) +-- | Decorate a 'Term' using a function to compute the annotation values at every node. decorateTerm :: (Typeable field, Functor f) => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) From 1b229e32b925d29c059ea009f25747a4ab0b186b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 22 Jul 2016 12:39:03 -0400 Subject: [PATCH 039/208] :memo: decorateParser. --- src/Diffing.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index 349d373c8..e77af6723 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -85,6 +85,7 @@ readAndTranscodeFile path = do text <- B1.readFile path transcode text +-- | Decorate the 'Term's produced by a 'Parser' using a function to compute the annotation values at every node. decorateParser :: (Typeable field, Functor f) => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Parser f (Record fields) -> Parser f (Record (field ': fields)) decorateParser decorator = (fmap (decorateTerm decorator) .) From 893c1d9067e8e39be61966bd4f8b6d768256d98a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:01:54 -0400 Subject: [PATCH 040/208] Eta reduce textDiff. --- src/Diffing.hs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index b192f7e02..db52afeab 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -131,12 +131,11 @@ diffCostWithCachedTermCosts diff = unCost $ case runFree diff of -- | Returns a rendered diff given a parser, diff arguments and two source blobs. textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Text -textDiff parser arguments sources = diffFiles parser renderer sources - where renderer = case format arguments of - Split -> split - Patch -> patch - JSON -> json - Summary -> summary +textDiff parser arguments = diffFiles parser $ case format arguments of + Split -> split + Patch -> patch + JSON -> json + Summary -> summary -- | Returns a truncated diff given diff arguments and two source blobs. truncatedDiff :: DiffArguments -> Both SourceBlob -> IO Text From af88fad1024a35baffb75899b0511edadc273c8d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:03:00 -0400 Subject: [PATCH 041/208] Explicitly note that termCost is a decorator. --- src/Diffing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index db52afeab..5e5882223 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -118,7 +118,7 @@ diffFiles parser renderer sourceBlobs = do compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Term leaf (Record fields) -> Bool compareCategoryEq = (==) `on` category . extract --- | Compute the cost of an unpacked term. +-- | Term decorator computing the cost of an unpacked term. termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost termCost c = 1 + sum (cost <$> tailF c) From 120c77ba294de8f1e075643cbdd8a0f296092859 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:04:40 -0400 Subject: [PATCH 042/208] Add & use a TermDecorator type synonym. --- src/Diffing.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 5e5882223..9e5107856 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -84,12 +84,14 @@ readAndTranscodeFile path = do text <- B1.readFile path transcode text +type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field + -- | Decorate the 'Term's produced by a 'Parser' using a function to compute the annotation values at every node. -decorateParser :: (Typeable field, Functor f) => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Parser f (Record fields) -> Parser f (Record (field ': fields)) +decorateParser :: (Typeable field, Functor f) => TermDecorator f fields field -> Parser f (Record fields) -> Parser f (Record (field ': fields)) decorateParser decorator = (fmap (decorateTerm decorator) .) -- | Decorate a 'Term' using a function to compute the annotation values at every node. -decorateTerm :: (Typeable field, Functor f) => (CofreeF f (Record fields) (Record (field ': fields)) -> field) -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) +decorateTerm :: (Typeable field, Functor f) => TermDecorator f fields field -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) -- | Given a parser and renderer, diff two sources and return the rendered @@ -119,7 +121,7 @@ compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Te compareCategoryEq = (==) `on` category . extract -- | Term decorator computing the cost of an unpacked term. -termCost :: (Prologue.Foldable f, Functor f) => CofreeF f (Record a) (Record (Cost ': a)) -> Cost +termCost :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCost c = 1 + sum (cost <$> tailF c) -- | The sum of the node count of the diff’s patches. From afc0738c08ec1be1b8d53ec5b41e8ca6d5caf5a5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:05:47 -0400 Subject: [PATCH 043/208] :memo: TermDecorator. --- src/Diffing.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index 9e5107856..854f888ae 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -84,6 +84,7 @@ readAndTranscodeFile path = do text <- B1.readFile path transcode text +-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms. type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field -- | Decorate the 'Term's produced by a 'Parser' using a function to compute the annotation values at every node. From 08f7d717919572ebb6df379e679c0031dc2158ff Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:08:42 -0400 Subject: [PATCH 044/208] Rename termCost to termCostDecorator. --- src/Diffing.hs | 4 ++-- test/CorpusSpec.hs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 854f888ae..1e172d51c 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -122,8 +122,8 @@ compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Te compareCategoryEq = (==) `on` category . extract -- | Term decorator computing the cost of an unpacked term. -termCost :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost -termCost c = 1 + sum (cost <$> tailF c) +termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost +termCostDecorator c = 1 + sum (cost <$> tailF c) -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index d15c0454d..a9c196c27 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -74,7 +74,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte testDiff :: Renderer (Record '[Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation testDiff renderer paths diff matcher = do sources <- sequence $ readAndTranscodeFile <$> paths - actual <- Verbatim <$> diffFiles (decorateParser termCost parser) renderer (sourceBlobs sources) + actual <- Verbatim <$> diffFiles (decorateParser termCostDecorator parser) renderer (sourceBlobs sources) case diff of Nothing -> matcher actual actual Just file -> do From 00cf4bc008c837264dcff227db9dabfd198b331c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:12:56 -0400 Subject: [PATCH 045/208] Traverse instead of separately sequencing and fmapping. --- src/Diffing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 1e172d51c..9d1e1eff2 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -102,7 +102,7 @@ decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: hea diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text diffFiles parser renderer sourceBlobs = do let sources = source <$> sourceBlobs - terms <- sequence $ parser <$> sources + terms <- traverse parser sources let areNullOids = runJoin $ (== nullOid) . oid <$> sourceBlobs let textDiff = case areNullOids of From 94a00db41af12107c2d3dab30bfc253e4cec2fb9 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:14:23 -0400 Subject: [PATCH 046/208] Eliminate a redundant binding. --- src/Diffing.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 9d1e1eff2..0fa5b4565 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -101,8 +101,7 @@ decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: hea -- | with respect to other IO actions. diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text diffFiles parser renderer sourceBlobs = do - let sources = source <$> sourceBlobs - terms <- traverse parser sources + terms <- traverse (parser . source) sourceBlobs let areNullOids = runJoin $ (== nullOid) . oid <$> sourceBlobs let textDiff = case areNullOids of From bc1e8f8dee5c2861b066b1fac3b23fb6c4b86092 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:20:25 -0400 Subject: [PATCH 047/208] Stub in a decorator for p,q-grams. --- src/Diffing.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 0fa5b4565..5702389cf 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -1,10 +1,12 @@ -{-# LANGUAGE DataKinds, TypeOperators #-} +{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-} module Diffing where import Prologue hiding (fst, snd) import qualified Data.ByteString.Char8 as B1 +import qualified Data.DList as DList import Data.Functor.Both import Data.Functor.Foldable +import Data.RandomWalkSimilarity import Data.Record import qualified Data.Text.IO as TextIO import qualified Data.Text.ICU.Detect as Detect @@ -124,6 +126,9 @@ compareCategoryEq = (==) `on` category . extract termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) +pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> TermDecorator f a (DList.DList (Gram label)) +pqGramDecorator getLabel (a :< s) = empty + -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer diffCostWithCachedTermCosts diff = unCost $ case runFree diff of From 755d9877b4fbbcaab00975baa251ebcf04f2b32a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:22:22 -0400 Subject: [PATCH 048/208] pqGrams takes the label function before p and q. --- src/Data/RandomWalkSimilarity.hs | 6 +++--- test/Data/RandomWalkSimilarity/Spec.hs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 6f95a90bf..74d6fc718 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -39,7 +39,7 @@ rws compare getLabel as bs fas = zipWith featurize [0..] as fbs = zipWith featurize [0..] bs kdas = KdTree.build (Vector.toList . feature) fas - featurize index term = UnmappedTerm index (featureVector d (pqGrams p q getLabel term)) term + featurize index term = UnmappedTerm index (featureVector d (pqGrams getLabel p q term)) term findNearestNeighbourTo kv@(UnmappedTerm _ _ v) = do (previous, unmapped) <- get let (UnmappedTerm i _ _) = KdTree.nearest kdas kv @@ -62,8 +62,8 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } deriving (Eq, Show) -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. -pqGrams :: (Prologue.Foldable f, Functor f) => Int -> Int -> (forall b. CofreeF f annotation b -> label) -> Cofree f annotation -> DList.DList (Gram label) -pqGrams p q getLabel = uncurry DList.cons . cata merge . setRootBase . setRootStem . cata go +pqGrams :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f annotation b -> label) -> Int -> Int -> Cofree f annotation -> DList.DList (Gram label) +pqGrams getLabel p q = uncurry DList.cons . cata merge . setRootBase . setRootStem . cata go where go c = cofree (Gram [] [ Just (getLabel c) ] :< (assignParent (Just (getLabel c)) p <$> tailF c)) merge (head :< tail) = let tail' = toList tail in (head, DList.fromList (windowed q setBases [] (fst <$> tail')) <> foldMap snd tail') assignParent parentLabel n tree diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 97074107b..145d65a84 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -17,10 +17,10 @@ spec :: Spec spec = parallel $ do describe "pqGrams" $ do prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams p q headF (toTerm term :: Term Text Text) `shouldSatisfy` all ((== p) . length . stem) + \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text Text) `shouldSatisfy` all ((== p) . length . stem) prop "produces grams with bases of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams p q headF (toTerm term :: Term Text Text) `shouldSatisfy` all ((== q) . length . base) + \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text Text) `shouldSatisfy` all ((== q) . length . base) describe "featureVector" $ do prop "produces a vector of the specified dimension" . forAll (arbitrary `suchThat` ((> 0) . Prologue.snd)) $ From 91e25e811ea2a4a9725fdbed881b7850d98a8473 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 27 Jul 2016 00:22:31 -0400 Subject: [PATCH 049/208] pqGramDecorator takes p and q parameters. --- src/Diffing.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 5702389cf..e01a17389 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -126,8 +126,8 @@ compareCategoryEq = (==) `on` category . extract termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) -pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> TermDecorator f a (DList.DList (Gram label)) -pqGramDecorator getLabel (a :< s) = empty +pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (DList.DList (Gram label)) +pqGramDecorator getLabel p q (a :< s) = empty -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer From a5dce511fa69a9a9f590bdfeda63f1ff2242da83 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 12:08:26 -0400 Subject: [PATCH 050/208] Fix a missing import. --- src/TreeSitter.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/TreeSitter.hs b/src/TreeSitter.hs index 8962b3748..ed81b4222 100644 --- a/src/TreeSitter.hs +++ b/src/TreeSitter.hs @@ -6,6 +6,7 @@ import Data.Record import Category import Language import Parser +import Range import Source import qualified Syntax import Foreign From 3c93b41f25354dce22652b20de68bedf19a6f69f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 12:10:54 -0400 Subject: [PATCH 051/208] :fire: a redundant import. --- src/Data/Record.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index eee4da541..ce0e7607d 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -10,7 +10,6 @@ module Data.Record import GHC.Show import Prologue import Test.QuickCheck -import GHC.Show (Show(..)) -- | A type-safe, extensible record structure. -- | From 82aae9b37afec3527fadc3ead9c1b59c2cba6890 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 12:11:05 -0400 Subject: [PATCH 052/208] Parsers take blobs now. --- src/Diffing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index a8a0a4e48..8c068c7b9 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -41,7 +41,7 @@ import qualified Data.Text as T diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text diffFiles parser renderer sourceBlobs = do let sources = source <$> sourceBlobs - terms <- traverse (parser . source) sourceBlobs + terms <- traverse parser sourceBlobs let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs let textDiff = case areNullOids of From 78dd3b6b6db6279b3873facae7fced5f53e18b3b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 12:11:19 -0400 Subject: [PATCH 053/208] :fire: a redundant import in the prologue. --- src/Prologue.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Prologue.hs b/src/Prologue.hs index a37f4d92b..31f4be197 100644 --- a/src/Prologue.hs +++ b/src/Prologue.hs @@ -7,7 +7,6 @@ module Prologue import Protolude as X import Data.List (lookup) -import System.IO (FilePath) import Control.Comonad.Trans.Cofree as X import Control.Monad.Trans.Free as X From fc51f7ec909685ad33c29e3febad71bb45fe48a1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 12:13:35 -0400 Subject: [PATCH 054/208] These are blobs. --- src/Diffing.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 8c068c7b9..306ebce64 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -81,9 +81,9 @@ lineByLineParser blob = pure . cofree . root $ case foldl' annotateLeaves ([], 0 -- | Return the parser that should be used for a given path. parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Range, Category]) -parserForFilepath path source = do - parsed <- parserForType (toS (takeExtension path)) source - pure $! breakDownLeavesByWord source parsed +parserForFilepath path blob = do + parsed <- parserForType (toS (takeExtension path)) blob + pure $! breakDownLeavesByWord (source blob) parsed -- | Replace every string leaf with leaves of the words in the string. breakDownLeavesByWord :: (HasField fields Category, HasField fields Range) => Source Char -> Term Text (Record fields) -> Term Text (Record fields) From a0b41083ab3090efdc8c94cf6b7bf65312961d78 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 12:13:52 -0400 Subject: [PATCH 055/208] Apparently this got swapped again or something? --- src/Diffing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 306ebce64..0c3d1fa70 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -50,7 +50,7 @@ diffFiles parser renderer sourceBlobs = do (_, _) -> runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) terms - pure $! renderer textDiff sourceBlobs + pure $! renderer sourceBlobs textDiff where construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax)) sumCost = fmap getSum . foldMap (fmap Sum . getCost) From a6aee9de71069db8bab3820d8680931b2bea5e57 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 12:14:09 -0400 Subject: [PATCH 056/208] :fire: a redundant binding. --- src/Diffing.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 0c3d1fa70..6ffafcdd3 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -40,7 +40,6 @@ import qualified Data.Text as T -- | with respect to other IO actions. diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text diffFiles parser renderer sourceBlobs = do - let sources = source <$> sourceBlobs terms <- traverse parser sourceBlobs let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs From bd81f9d4bfdce10725bbc9f6977c2a0e2311a9a3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 13:41:27 -0400 Subject: [PATCH 057/208] The p,q-gram decorator decorates with a pair of the gram for that node and the grams for all child nodes. --- src/Diffing.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 6ffafcdd3..a2f68f862 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -127,8 +127,8 @@ compareCategoryEq = (==) `on` category . extract termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) -pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (DList.DList (Gram label)) -pqGramDecorator getLabel p q (a :< s) = empty +pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label)) +pqGramDecorator getLabel p q (a :< s) = (Gram [] [], empty) -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer From cd2dfd53747f1aa57d25b395ad7972bc0d3cea53 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 13:45:58 -0400 Subject: [PATCH 058/208] Combine all the child grams together. --- src/Diffing.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index a2f68f862..13b0a25d3 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -1,6 +1,7 @@ {-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-} module Diffing where +import qualified Prologue import Prologue hiding (fst, snd) import qualified Data.ByteString.Char8 as B1 import qualified Data.DList as DList @@ -128,7 +129,9 @@ termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label)) -pqGramDecorator getLabel p q (a :< s) = (Gram [] [], empty) +pqGramDecorator getLabel p q (a :< s) = (Gram [] [], foldMap (Prologue.snd . childGrams) s) + where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) + childGrams = getField -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer From fab4a3c829a550a84705cdf0541cd4754244c8a4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 13:48:15 -0400 Subject: [PATCH 059/208] Build the label for the current node. --- src/Diffing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 13b0a25d3..2fa284235 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -129,7 +129,7 @@ termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label)) -pqGramDecorator getLabel p q (a :< s) = (Gram [] [], foldMap (Prologue.snd . childGrams) s) +pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just (getLabel c) ], foldMap (Prologue.snd . childGrams) s) where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) childGrams = getField From 192366d5ad88e8f7c4ef0a5245bb9499127f7569 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 13:56:29 -0400 Subject: [PATCH 060/208] This is already being exported. --- src/Prologue.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Prologue.hs b/src/Prologue.hs index 31f4be197..b73f2562a 100644 --- a/src/Prologue.hs +++ b/src/Prologue.hs @@ -2,7 +2,6 @@ module Prologue ( module X , lookup , traceShowId -, FilePath ) where import Protolude as X From 1b02940e26015aece8c2f0b41b0ba3018ddc9257 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 13:56:48 -0400 Subject: [PATCH 061/208] Combine the gram at each node in. --- src/Diffing.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 2fa284235..56a726870 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -129,9 +129,12 @@ termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label)) -pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just (getLabel c) ], foldMap (Prologue.snd . childGrams) s) - where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) - childGrams = getField +pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just label ], foldMap (childGrams label) s) + where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => label -> Record fields -> DList.DList (Gram label) + childGrams label record = let (child, grandchildren) = getField record in + DList.singleton (prependParent label child) <> grandchildren + prependParent label gram = gram { stem = Just label : stem gram } + label = getLabel c -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer From 3d3874ee14d586ee8d35c95f6cf8ddf9c6a58265 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 15:07:57 -0400 Subject: [PATCH 062/208] List the RWS exports manually. --- src/Data/RandomWalkSimilarity.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 74d6fc718..e11a30f75 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -1,5 +1,10 @@ {-# LANGUAGE RankNTypes #-} -module Data.RandomWalkSimilarity where +module Data.RandomWalkSimilarity +( rws +, pqGrams +, featureVector +, Gram(..) +) where import Control.Arrow ((&&&)) import Control.Monad.Random From f546b21ea297499491a39573a907f58c48f789a7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 15:08:21 -0400 Subject: [PATCH 063/208] Move the p,q-gram decorator to the RWS module. --- src/Data/RandomWalkSimilarity.hs | 14 +++++++++++++- src/Diffing.hs | 9 --------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index e11a30f75..3aed799ef 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-} module Data.RandomWalkSimilarity ( rws , pqGrams @@ -15,6 +15,7 @@ import Data.Functor.Foldable as Foldable import Data.Hashable import qualified Data.KdTree.Static as KdTree import qualified Data.List as List +import Data.Record import qualified Data.Vector as Vector import Patch import Prologue @@ -80,6 +81,17 @@ pqGrams getLabel p q = uncurry DList.cons . cata merge . setRootBase . setRootSt setRootBase term = let (a :< f) = runCofree term in cofree (setBase a (base a) :< f) setRootStem = foldr (\ p rest -> assignParent Nothing p . rest) identity [0..p] +type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field + +pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label)) +pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just label ], foldMap (childGrams label) s) + where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => label -> Record fields -> DList.DList (Gram label) + childGrams label record = let (child, grandchildren) = getField record in + DList.singleton (prependParent label child) <> grandchildren + prependParent label gram = gram { stem = Just label : stem gram } + label = getLabel c + + -- | A sliding-window fold over _n_ items of a list per iteration. windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b windowed n f seed = para alg diff --git a/src/Diffing.hs b/src/Diffing.hs index 56a726870..8fea49e8d 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -4,10 +4,8 @@ module Diffing where import qualified Prologue import Prologue hiding (fst, snd) import qualified Data.ByteString.Char8 as B1 -import qualified Data.DList as DList import Data.Functor.Both import Data.Functor.Foldable -import Data.RandomWalkSimilarity import Data.Record import qualified Data.Text.IO as TextIO import qualified Data.Text.ICU.Detect as Detect @@ -128,13 +126,6 @@ compareCategoryEq = (==) `on` category . extract termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) -pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label)) -pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just label ], foldMap (childGrams label) s) - where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => label -> Record fields -> DList.DList (Gram label) - childGrams label record = let (child, grandchildren) = getField record in - DList.singleton (prependParent label child) <> grandchildren - prependParent label gram = gram { stem = Just label : stem gram } - label = getLabel c -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer From d866df0a3d39b03d50d0efa423aad0698a556c1f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 2 Aug 2016 16:39:01 -0400 Subject: [PATCH 064/208] Set the bases on siblings using a foldMap. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s easier to reason about than the equivalent bind, IMO. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 3aed799ef..934927db0 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -76,7 +76,7 @@ pqGrams getLabel p q = uncurry DList.cons . cata merge . setRootBase . setRootSt | n > 0 = let gram :< functor = runCofree tree in cofree $ prependParent parentLabel gram :< (assignParent parentLabel (pred n) <$> functor) | otherwise = tree prependParent parentLabel gram = gram { stem = parentLabel : stem gram } - setBases gram siblings rest = setBase gram (siblings >>= base) : rest + setBases gram siblings rest = setBase gram (foldMap base siblings) : rest setBase gram newBase = gram { base = take q (newBase <> repeat Nothing) } setRootBase term = let (a :< f) = runCofree term in cofree (setBase a (base a) :< f) setRootStem = foldr (\ p rest -> assignParent Nothing p . rest) identity [0..p] From 8fc465c07cb24960ebf3762a5f8cf1846af6e7fc Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 3 Aug 2016 14:33:05 -0400 Subject: [PATCH 065/208] Stub in a decorator assigning feature vectors. --- src/Data/RandomWalkSimilarity.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 934927db0..6bbdcdd34 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -107,6 +107,10 @@ featureVector d bag = sumVectors $ unitDVector . hash <$> bag normalize vec = fmap (/ vmagnitude vec) vec sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) +featureVectorDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> Int -> TermDecorator f a (Vector.Vector Double) +featureVectorDecorator getLabel p q d (a :< s) = Vector.replicate d 0 + + -- | The magnitude of a Euclidean vector, i.e. its distance from the origin. vmagnitude :: Vector.Vector Double -> Double vmagnitude = sqrtDouble . Vector.sum . fmap (** 2) From bb6cbb753c642edac5a422b9d2a97845afb83471 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:27:14 -0400 Subject: [PATCH 066/208] Add a function to decorate a term with its label. --- src/Data/RandomWalkSimilarity.hs | 4 ++++ src/Data/Record.hs | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 6bbdcdd34..9c7e90a71 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -110,6 +110,10 @@ featureVector d bag = sumVectors $ unitDVector . hash <$> bag featureVectorDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> Int -> TermDecorator f a (Vector.Vector Double) featureVectorDecorator getLabel p q d (a :< s) = Vector.replicate d 0 +decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) +decorateTermWithLabel getLabel = cata $ \ c@(h :< t) -> + cofree ((getLabel c .: h) :< t) + -- | The magnitude of a Euclidean vector, i.e. its distance from the origin. vmagnitude :: Vector.Vector Double -> Double diff --git a/src/Data/Record.hs b/src/Data/Record.hs index ce0e7607d..5a2665428 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -1,6 +1,6 @@ {-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, ScopedTypeVariables, TypeOperators #-} module Data.Record -( Record(RNil) +( Record(..) , (.:) , HasField(..) , maybeGetField From 8e9cc43f498f52ddd8250bb4c4dc7ea91985f97a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:28:00 -0400 Subject: [PATCH 067/208] Add a function to decorate a term with its p,q-gram. --- src/Data/RandomWalkSimilarity.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 9c7e90a71..095c61585 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-} +{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-} module Data.RandomWalkSimilarity ( rws , pqGrams @@ -7,6 +7,7 @@ module Data.RandomWalkSimilarity ) where import Control.Arrow ((&&&)) +import qualified Control.Monad.Free as Free (Free) import Control.Monad.Random import Control.Monad.State import qualified Data.DList as DList @@ -114,6 +115,11 @@ decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Re decorateTermWithLabel getLabel = cata $ \ c@(h :< t) -> cofree ((getLabel c .: h) :< t) +decorateTermWithPQGram :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram getLabel p q d = futu coalgebra . (,) [] . decorateTermWithLabel getLabel + where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) + coalgebra (parentLabels, c) = case extract c of + RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) -- | The magnitude of a Euclidean vector, i.e. its distance from the origin. vmagnitude :: Vector.Vector Double -> Double From 9c3509f7d2220b03f03fb206da9c25e185a8bead Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:32:56 -0400 Subject: [PATCH 068/208] :fire: the unused parameter d. --- src/Data/RandomWalkSimilarity.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 095c61585..a0c17d472 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -115,8 +115,8 @@ decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Re decorateTermWithLabel getLabel = cata $ \ c@(h :< t) -> cofree ((getLabel c .: h) :< t) -decorateTermWithPQGram :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram getLabel p q d = futu coalgebra . (,) [] . decorateTermWithLabel getLabel +decorateTermWithPQGram :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram getLabel p q = futu coalgebra . (,) [] . decorateTermWithLabel getLabel where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) From 3dc0a4f071da716f356dad9f102a17857261a9d1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:35:47 -0400 Subject: [PATCH 069/208] Expect the incoming term to have a label at the head of the record already. --- src/Data/RandomWalkSimilarity.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index a0c17d472..f75beac8a 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -115,8 +115,8 @@ decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Re decorateTermWithLabel getLabel = cata $ \ c@(h :< t) -> cofree ((getLabel c .: h) :< t) -decorateTermWithPQGram :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram getLabel p q = futu coalgebra . (,) [] . decorateTermWithLabel getLabel +decorateTermWithPQGram :: (Typeable label, Functor f) => (forall b. CofreeF f (Record (label ': fields)) b -> label) -> Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram getLabel p q = futu coalgebra . (,) [] where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) From 32e084971a26bddc83971b4e8b8b90d4b31ae35e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:37:49 -0400 Subject: [PATCH 070/208] Define decorateTermWithLabel inline. --- src/Data/RandomWalkSimilarity.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f75beac8a..72b6a1f07 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -112,8 +112,7 @@ featureVectorDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF featureVectorDecorator getLabel p q d (a :< s) = Vector.replicate d 0 decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) -decorateTermWithLabel getLabel = cata $ \ c@(h :< t) -> - cofree ((getLabel c .: h) :< t) +decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) decorateTermWithPQGram :: (Typeable label, Functor f) => (forall b. CofreeF f (Record (label ': fields)) b -> label) -> Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram getLabel p q = futu coalgebra . (,) [] From b06194da38e1b67c04bc19588dc382c9820b04e4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:43:50 -0400 Subject: [PATCH 071/208] Add a function decorating a term with bags of p,q-grams. --- src/Data/RandomWalkSimilarity.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 72b6a1f07..eda289e16 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -120,6 +120,9 @@ decorateTermWithPQGram getLabel p q = futu coalgebra . (,) [] coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) +decorateTermWithBagsOfPQGrams :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) +decorateTermWithBagsOfPQGrams = cata $ \ (RCons gram rest :< functor) -> cofree ((DList.cons gram (foldMap (getField . extract) functor) .: rest) :< functor) + -- | The magnitude of a Euclidean vector, i.e. its distance from the origin. vmagnitude :: Vector.Vector Double -> Double vmagnitude = sqrtDouble . Vector.sum . fmap (** 2) From d573ca7940be2bf543a715ead452980ea3ad371a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:48:13 -0400 Subject: [PATCH 072/208] :fire: the getLabel parameter to decorateTermWithPQGram. --- src/Data/RandomWalkSimilarity.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index eda289e16..f55c6d997 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -114,8 +114,8 @@ featureVectorDecorator getLabel p q d (a :< s) = Vector.replicate d 0 decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) -decorateTermWithPQGram :: (Typeable label, Functor f) => (forall b. CofreeF f (Record (label ': fields)) b -> label) -> Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram getLabel p q = futu coalgebra . (,) [] +decorateTermWithPQGram :: (Typeable label, Functor f) => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram p q = futu coalgebra . (,) [] where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) From b079bf1b5abc0bc7e4027b372297f817f11e7710 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 15:53:17 -0400 Subject: [PATCH 073/208] =?UTF-8?q?Define=20a=20function=20mapping=20p,q-g?= =?UTF-8?q?ram=20bag=E2=80=93decorated=20terms=20into=20feature=20vector?= =?UTF-8?q?=E2=80=93decorated=20terms.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Data/RandomWalkSimilarity.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f55c6d997..d3cad0833 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -123,6 +123,9 @@ decorateTermWithPQGram p q = futu coalgebra . (,) [] decorateTermWithBagsOfPQGrams :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) decorateTermWithBagsOfPQGrams = cata $ \ (RCons gram rest :< functor) -> cofree ((DList.cons gram (foldMap (getField . extract) functor) .: rest) :< functor) +decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) +decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest + -- | The magnitude of a Euclidean vector, i.e. its distance from the origin. vmagnitude :: Vector.Vector Double -> Double vmagnitude = sqrtDouble . Vector.sum . fmap (** 2) From 0a047d77cbf070a366abb46476d45ec95fc6645d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 16:21:23 -0400 Subject: [PATCH 074/208] :fire: featureVectorDecorator. --- src/Data/RandomWalkSimilarity.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index d3cad0833..abc03962f 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -108,9 +108,6 @@ featureVector d bag = sumVectors $ unitDVector . hash <$> bag normalize vec = fmap (/ vmagnitude vec) vec sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) -featureVectorDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> Int -> TermDecorator f a (Vector.Vector Double) -featureVectorDecorator getLabel p q d (a :< s) = Vector.replicate d 0 - decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) From 8c31fd9794743118deec489d8705c9811cd996f2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 16:21:28 -0400 Subject: [PATCH 075/208] :fire: pqGramDecorator. --- src/Data/RandomWalkSimilarity.hs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index abc03962f..aa8a9f7e1 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -84,14 +84,6 @@ pqGrams getLabel p q = uncurry DList.cons . cata merge . setRootBase . setRootSt type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field -pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label)) -pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just label ], foldMap (childGrams label) s) - where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => label -> Record fields -> DList.DList (Gram label) - childGrams label record = let (child, grandchildren) = getField record in - DList.singleton (prependParent label child) <> grandchildren - prependParent label gram = gram { stem = Just label : stem gram } - label = getLabel c - -- | A sliding-window fold over _n_ items of a list per iteration. windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b From eb1ab50b00fdb41cb822e99439d7b4ffa8066771 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 16:50:14 -0400 Subject: [PATCH 076/208] :fire: the unused q parameter. --- src/Data/RandomWalkSimilarity.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index aa8a9f7e1..8b0333f42 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -103,8 +103,8 @@ featureVector d bag = sumVectors $ unitDVector . hash <$> bag decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) -decorateTermWithPQGram :: (Typeable label, Functor f) => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram p q = futu coalgebra . (,) [] +decorateTermWithPGram :: (Typeable label, Functor f) => Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPGram p = futu coalgebra . (,) [] where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) From d678f64ad1737a2bacd114b1ff0a7de5dc143147 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 17:21:17 -0400 Subject: [PATCH 077/208] Assign bases of width q. --- src/Data/RandomWalkSimilarity.hs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 8b0333f42..b0a673b7a 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -109,8 +109,15 @@ decorateTermWithPGram p = futu coalgebra . (,) [] coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) -decorateTermWithBagsOfPQGrams :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) -decorateTermWithBagsOfPQGrams = cata $ \ (RCons gram rest :< functor) -> cofree ((DList.cons gram (foldMap (getField . extract) functor) .: rest) :< functor) +decorateTermWithPQGram :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) +decorateTermWithPQGram q = cata algebra + where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) + algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q (\ gram siblings rest -> gram { base = take q (foldMap base siblings <> repeat Nothing) } : rest) [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) + getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) + getGrams = getField + +decorateTermWithBagOfPQGrams :: (Typeable label, Functor f) => Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) +decorateTermWithBagOfPQGrams = fmap (\ (RCons (first, rest) t) -> DList.cons first rest .: t) decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest From 4b21c560c9ca96af6593655646001811f7104d75 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 17:22:18 -0400 Subject: [PATCH 078/208] Roll the grams together in a single function. --- src/Data/RandomWalkSimilarity.hs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index b0a673b7a..d502f6d56 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -109,16 +109,13 @@ decorateTermWithPGram p = futu coalgebra . (,) [] coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) -decorateTermWithPQGram :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) -decorateTermWithPQGram q = cata algebra +decorateTermWithBagOfPQGrams :: (Typeable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) +decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons first rest .: t) . cata algebra where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q (\ gram siblings rest -> gram { base = take q (foldMap base siblings <> repeat Nothing) } : rest) [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) getGrams = getField -decorateTermWithBagOfPQGrams :: (Typeable label, Functor f) => Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) -decorateTermWithBagOfPQGrams = fmap (\ (RCons (first, rest) t) -> DList.cons first rest .: t) - decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest From 0a7d5fa12467b0823bee3d883f9cd5d5d6f99887 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 19:17:58 -0400 Subject: [PATCH 079/208] Add a function to retrieve the head element of a non-empty record. --- src/Data/Record.hs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 5a2665428..12f3d37bd 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -2,6 +2,7 @@ module Data.Record ( Record(..) , (.:) +, rhead , HasField(..) , maybeGetField , updateField @@ -25,6 +26,11 @@ infixr 0 .: (.:) :: Typeable h => h -> Record t -> Record (h ': t) (.:) = RCons +-- | Get the first element of a non-empty record. +rhead :: Record (head ': tail) -> head +rhead (RCons head _) = head + + -- | Return 'Just' a 'field', if it exists in a record. Otherwise, return 'Nothing'. maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t From 46a3aee33e37b31d6e75e94d2595ae00b30ab32d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 19:19:03 -0400 Subject: [PATCH 080/208] Define pqGrams in terms of the decorator. --- src/Data/RandomWalkSimilarity.hs | 26 ++++++++------------------ src/Interpreter.hs | 6 +++--- test/Data/RandomWalkSimilarity/Spec.hs | 10 ++++++---- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index d502f6d56..2154409c7 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -25,16 +25,16 @@ import Test.QuickCheck hiding (Fixed) import Test.QuickCheck.Random -- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf). -rws :: (Hashable label, Eq annotation, Prologue.Foldable f, Functor f, Eq (f (Cofree f annotation))) => +rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label) => -- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. - (Cofree f annotation -> Cofree f annotation -> Maybe (Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation)))) -> + (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -> -- | A function to compute a label for an unpacked term. - (forall b. CofreeF f annotation b -> label) -> + (forall b. CofreeF f (Record fields) b -> label) -> -- | The old list of terms. - [Cofree f annotation] -> + [Cofree f (Record fields)] -> -- | The new list of terms. - [Cofree f annotation] -> - [Free (CofreeF f (Both annotation)) (Patch (Cofree f annotation))] + [Cofree f (Record fields)] -> + [Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))] rws compare getLabel as bs | null as, null bs = [] | null as = insert <$> bs @@ -69,18 +69,8 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } deriving (Eq, Show) -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. -pqGrams :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f annotation b -> label) -> Int -> Int -> Cofree f annotation -> DList.DList (Gram label) -pqGrams getLabel p q = uncurry DList.cons . cata merge . setRootBase . setRootStem . cata go - where go c = cofree (Gram [] [ Just (getLabel c) ] :< (assignParent (Just (getLabel c)) p <$> tailF c)) - merge (head :< tail) = let tail' = toList tail in (head, DList.fromList (windowed q setBases [] (fst <$> tail')) <> foldMap snd tail') - assignParent parentLabel n tree - | n > 0 = let gram :< functor = runCofree tree in cofree $ prependParent parentLabel gram :< (assignParent parentLabel (pred n) <$> functor) - | otherwise = tree - prependParent parentLabel gram = gram { stem = parentLabel : stem gram } - setBases gram siblings rest = setBase gram (foldMap base siblings) : rest - setBase gram newBase = gram { base = take q (newBase <> repeat Nothing) } - setRootBase term = let (a :< f) = runCofree term in cofree (setBase a (base a) :< f) - setRootStem = foldr (\ p rest -> assignParent Nothing p . rest) identity [0..p] +pqGrams :: (Prologue.Foldable f, Functor f, Typeable label) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) +pqGrams getLabel p q = getField . extract . decorateTermWithBagOfPQGrams q . decorateTermWithPGram p . decorateTermWithLabel getLabel type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field diff --git a/src/Interpreter.hs b/src/Interpreter.hs index 16f396538..bf12b77d8 100644 --- a/src/Interpreter.hs +++ b/src/Interpreter.hs @@ -25,11 +25,11 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation - type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation -- | Diff two terms, given a function that determines whether two terms can be compared and a cost function. -diffTerms :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields) +diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields) diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b -- | Constructs an algorithm and runs it -constructAndRun :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields)) +constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields)) constructAndRun construct comparable cost t1 t2 | not $ comparable t1 t2 = Nothing | (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2 @@ -42,7 +42,7 @@ constructAndRun construct comparable cost t1 t2 annotate = pure . construct . (both annotation1 annotation2 :<) -- | Runs the diff algorithm -run :: (Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields)) +run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields)) run construct comparable cost algorithm = case runFree algorithm of Pure diff -> Just diff Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 145d65a84..b54bb3b65 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -1,8 +1,10 @@ +{-# LANGUAGE DataKinds #-} module Data.RandomWalkSimilarity.Spec where import Category import Data.DList as DList hiding (toList) import Data.RandomWalkSimilarity +import Data.Record import Diff import Patch import Prologue @@ -17,10 +19,10 @@ spec :: Spec spec = parallel $ do describe "pqGrams" $ do prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text Text) `shouldSatisfy` all ((== p) . length . stem) + \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== p) . length . stem) prop "produces grams with bases of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text Text) `shouldSatisfy` all ((== q) . length . base) + \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base) describe "featureVector" $ do prop "produces a vector of the specified dimension" . forAll (arbitrary `suchThat` ((> 0) . Prologue.snd)) $ @@ -31,5 +33,5 @@ spec = parallel $ do prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $ \ (as, bs) -> let tas = toTerm <$> as tbs = toTerm <$> bs - diff = free (Free (pure Program :< Indexed (rws compare headF tas tbs :: [Diff Text Category]))) in - (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree (Program :< Indexed tas)), Just (cofree (Program :< Indexed tbs))) + diff = free (Free (pure (Program .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category])]))) in + (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: RNil) :< Indexed tas)), Just (cofree ((Program .: RNil) :< Indexed tbs))) From 32f9152f225f5e1306dccf435582f7dfd1b80c9f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 19:48:21 -0400 Subject: [PATCH 081/208] Rename a test. --- test/Data/RandomWalkSimilarity/Spec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index b54bb3b65..f773be590 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -21,7 +21,7 @@ spec = parallel $ do prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== p) . length . stem) - prop "produces grams with bases of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ + prop "produces grams with bases of the specified width" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base) describe "featureVector" $ do From 70d1d9b3085ce5676112048fb025e587b55d23bb Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 19:48:59 -0400 Subject: [PATCH 082/208] :fire: the unused TermDecorator type synonym. --- src/Data/RandomWalkSimilarity.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 2154409c7..374b04ee3 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -72,8 +72,6 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } pqGrams :: (Prologue.Foldable f, Functor f, Typeable label) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) pqGrams getLabel p q = getField . extract . decorateTermWithBagOfPQGrams q . decorateTermWithPGram p . decorateTermWithLabel getLabel -type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field - -- | A sliding-window fold over _n_ items of a list per iteration. windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b From 03438d2cb761f7531eb25cea443afdff03f972b4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 19:57:40 -0400 Subject: [PATCH 083/208] Extract the setBases function. --- src/Data/RandomWalkSimilarity.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 374b04ee3..521b0fad8 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -100,7 +100,9 @@ decorateTermWithPGram p = futu coalgebra . (,) [] decorateTermWithBagOfPQGrams :: (Typeable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons first rest .: t) . cata algebra where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) - algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q (\ gram siblings rest -> gram { base = take q (foldMap base siblings <> repeat Nothing) } : rest) [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) + algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) + setBases :: Gram label -> [Gram label] -> [Gram label] -> [Gram label] + setBases gram siblings rest = gram { base = take q (foldMap base siblings <> repeat Nothing) } : rest getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) getGrams = getField From ea950d1953145ea8013cbb93ddc757639e33c633 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 20:00:36 -0400 Subject: [PATCH 084/208] Extract a function to pad a list to a given length. --- src/Data/RandomWalkSimilarity.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 521b0fad8..bccb56624 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -6,6 +6,7 @@ module Data.RandomWalkSimilarity , Gram(..) ) where +import Control.Applicative import Control.Arrow ((&&&)) import qualified Control.Monad.Free as Free (Free) import Control.Monad.Random @@ -95,20 +96,23 @@ decorateTermWithPGram :: (Typeable label, Functor f) => Int -> Cofree f (Record decorateTermWithPGram p = futu coalgebra . (,) [] where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) coalgebra (parentLabels, c) = case extract c of - RCons label rest -> (Gram (take p (parentLabels <> repeat Nothing)) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) + RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) decorateTermWithBagOfPQGrams :: (Typeable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons first rest .: t) . cata algebra where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) setBases :: Gram label -> [Gram label] -> [Gram label] -> [Gram label] - setBases gram siblings rest = gram { base = take q (foldMap base siblings <> repeat Nothing) } : rest + setBases gram siblings rest = gram { base = padToSize q (foldMap base siblings) } : rest getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) getGrams = getField decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest +padToSize :: Alternative f => Int -> [f a] -> [f a] +padToSize n list = take n (list <> repeat empty) + -- | The magnitude of a Euclidean vector, i.e. its distance from the origin. vmagnitude :: Vector.Vector Double -> Double vmagnitude = sqrtDouble . Vector.sum . fmap (** 2) From 46f4ca93c3420915364d762bbc79285ecbb760c7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 20:03:44 -0400 Subject: [PATCH 085/208] Pad the base at the root. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index bccb56624..83299259d 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -99,7 +99,7 @@ decorateTermWithPGram p = futu coalgebra . (,) [] RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) decorateTermWithBagOfPQGrams :: (Typeable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) -decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons first rest .: t) . cata algebra +decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons (first { base = padToSize q (base first) }) rest .: t) . cata algebra where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) setBases :: Gram label -> [Gram label] -> [Gram label] -> [Gram label] From d26c7834e93e6fa28bbfe1ab6f6c34dad84e0cdc Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 20:37:34 -0400 Subject: [PATCH 086/208] Add a decorator for feature vectors which chains the other decorators together. --- src/Data/RandomWalkSimilarity.hs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 83299259d..161b0ad5e 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -3,6 +3,7 @@ module Data.RandomWalkSimilarity ( rws , pqGrams , featureVector +, featureVectorDecorator , Gram(..) ) where @@ -110,6 +111,13 @@ decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons ( decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest +featureVectorDecorator :: (Typeable label, Hashable label, Functor f, Prologue.Foldable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) +featureVectorDecorator getLabel p q d + = decorateTermWithFeatureVector d + . decorateTermWithBagOfPQGrams q + . decorateTermWithPGram p + . decorateTermWithLabel getLabel + padToSize :: Alternative f => Int -> [f a] -> [f a] padToSize n list = take n (list <> repeat empty) From ec548b56d43390e5c95de5abe80c3fb73e0b7be4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 20:42:39 -0400 Subject: [PATCH 087/208] Sort the test dependencies alphabetically. --- semantic-diff.cabal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/semantic-diff.cabal b/semantic-diff.cabal index 0ba91c38d..b90b6d02c 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -119,16 +119,16 @@ test-suite semantic-diff-test , deepseq , dlist , filepath + , free , Glob , hspec >= 2.1.10 , mtl , QuickCheck >= 2.8.1 , quickcheck-text + , recursion-schemes >= 4.1 , semantic-diff , text >= 1.2.1.3 , these - , free - , recursion-schemes >= 4.1 , wl-pprint-text if os(darwin) ghc-options: -threaded -rtsopts -with-rtsopts=-N -j From 74710eefcde9a8400b78a1429c509119006dfba1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 20:42:50 -0400 Subject: [PATCH 088/208] :fire: redundant imports. --- test/CorpusSpec.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index a9c196c27..5c39249d1 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -1,7 +1,6 @@ {-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-} module CorpusSpec where -import Data.String import Diffing import Renderer import qualified Renderer.JSON as J @@ -18,7 +17,6 @@ import Data.Set as Set import qualified Data.Text as T import Info import Prologue hiding (fst, snd) -import Range import qualified Source as S import System.FilePath import System.FilePath.Glob From 60c4effabeb458a263205a629b24660f7d43eeb0 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 20:43:10 -0400 Subject: [PATCH 089/208] Diffing annotates terms with their feature vectors. --- semantic-diff.cabal | 1 + src/Diffing.hs | 10 ++++++++-- test/CorpusSpec.hs | 3 ++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/semantic-diff.cabal b/semantic-diff.cabal index b90b6d02c..137f86a90 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -129,6 +129,7 @@ test-suite semantic-diff-test , semantic-diff , text >= 1.2.1.3 , these + , vector , wl-pprint-text if os(darwin) ghc-options: -threaded -rtsopts -with-rtsopts=-N -j diff --git a/src/Diffing.hs b/src/Diffing.hs index 8fea49e8d..8bfab8c0f 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -6,11 +6,13 @@ import Prologue hiding (fst, snd) import qualified Data.ByteString.Char8 as B1 import Data.Functor.Both import Data.Functor.Foldable +import Data.RandomWalkSimilarity import Data.Record import qualified Data.Text.IO as TextIO import qualified Data.Text.ICU.Detect as Detect import qualified Data.Text.ICU.Convert as Convert import Data.These +import qualified Data.Vector as Vector import Diff import Info import Interpreter @@ -37,9 +39,9 @@ import qualified Data.Text as T -- | result. -- | Returns the rendered result strictly, so it's always fully evaluated -- | with respect to other IO actions. -diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text +diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Text diffFiles parser renderer sourceBlobs = do - terms <- traverse parser sourceBlobs + terms <- traverse (fmap (featureVectorDecorator getLabel p q d) . parser) sourceBlobs let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs let textDiff = case areNullOids of @@ -55,6 +57,10 @@ diffFiles parser renderer sourceBlobs = do getCost diff = case runFree diff of Free (info :< _) -> cost <$> info Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch))) + getLabel (h :< t) = (category h, case t of + Leaf s -> Just s + _ -> Nothing) + (p, q, d) = (2, 2, 15) -- | Return a parser based on the file extension (including the "."). parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category]) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 5c39249d1..81207a038 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -1,6 +1,7 @@ {-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-} module CorpusSpec where +import qualified Data.Vector as Vector import Diffing import Renderer import qualified Renderer.JSON as J @@ -69,7 +70,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte -- | Given file paths for A, B, and, optionally, a diff, return whether diffing -- | the files will produce the diff. If no diff is provided, then the result -- | is true, but the diff will still be calculated. -testDiff :: Renderer (Record '[Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation +testDiff :: Renderer (Record '[Vector.Vector Double, Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation testDiff renderer paths diff matcher = do sources <- sequence $ readAndTranscodeFile <$> paths actual <- Verbatim <$> diffFiles (decorateParser termCostDecorator parser) renderer (sourceBlobs sources) From 9fc9827b348c9f0a784a9bb6d145dfadba575943 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Thu, 4 Aug 2016 21:01:54 -0400 Subject: [PATCH 090/208] rws requires a feature vector. --- src/Data/RandomWalkSimilarity.hs | 2 +- src/Interpreter.hs | 7 ++++--- test/Data/RandomWalkSimilarity/Spec.hs | 10 ++++++++-- test/Diff/Spec.hs | 14 ++++++++++---- test/DiffSummarySpec.hs | 18 ++++++++++++------ test/InterpreterSpec.hs | 17 ++++++++++++----- 6 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 161b0ad5e..1236a13b3 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -27,7 +27,7 @@ import Test.QuickCheck hiding (Fixed) import Test.QuickCheck.Random -- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf). -rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label) => +rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label, HasField fields (Vector.Vector Double)) => -- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -> -- | A function to compute a label for an unpacked term. diff --git a/src/Interpreter.hs b/src/Interpreter.hs index bf12b77d8..f561ed6e2 100644 --- a/src/Interpreter.hs +++ b/src/Interpreter.hs @@ -9,6 +9,7 @@ import Data.Hashable import Data.RandomWalkSimilarity import Data.Record import Data.These +import qualified Data.Vector as Vector import Diff import Info import Operation @@ -25,11 +26,11 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation - type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation -- | Diff two terms, given a function that determines whether two terms can be compared and a cost function. -diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields) +diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields) diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b -- | Constructs an algorithm and runs it -constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields)) +constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields)) constructAndRun construct comparable cost t1 t2 | not $ comparable t1 t2 = Nothing | (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2 @@ -42,7 +43,7 @@ constructAndRun construct comparable cost t1 t2 annotate = pure . construct . (both annotation1 annotation2 :<) -- | Runs the diff algorithm -run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields)) +run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields)) run construct comparable cost algorithm = case runFree algorithm of Pure diff -> Just diff Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index f773be590..b6b120956 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -5,6 +5,7 @@ import Category import Data.DList as DList hiding (toList) import Data.RandomWalkSimilarity import Data.Record +import qualified Data.Vector as Vector import Diff import Patch import Prologue @@ -33,5 +34,10 @@ spec = parallel $ do prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $ \ (as, bs) -> let tas = toTerm <$> as tbs = toTerm <$> bs - diff = free (Free (pure (Program .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category])]))) in - (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: RNil) :< Indexed tas)), Just (cofree ((Program .: RNil) :< Indexed tbs))) + diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in + (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs))) + + +instance Arbitrary a => Arbitrary (Vector.Vector a) where + arbitrary = Vector.fromList <$> arbitrary + shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/Diff/Spec.hs b/test/Diff/Spec.hs index 6673b7ba5..a4199cb89 100644 --- a/test/Diff/Spec.hs +++ b/test/Diff/Spec.hs @@ -4,6 +4,7 @@ module Diff.Spec where import Category import Data.Record import Data.Text.Arbitrary () +import qualified Data.Vector as Vector import Diff import Diff.Arbitrary import Interpreter @@ -16,23 +17,28 @@ import Test.QuickCheck spec :: Spec spec = parallel $ do prop "equality is reflexive" $ - \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in + \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in diff `shouldBe` diff prop "equal terms produce identity diffs" $ - \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category])) in + \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category])) in diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0 describe "beforeTerm" $ do prop "recovers the before term" $ - \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in + \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in beforeTerm diff `shouldBe` Just (toTerm a) describe "afterTerm" $ do prop "recovers the after term" $ - \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Category]))) in + \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in afterTerm diff `shouldBe` Just (toTerm b) describe "ArbitraryDiff" $ do prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $ \ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n + + +instance Arbitrary a => Arbitrary (Vector.Vector a) where + arbitrary = Vector.fromList <$> arbitrary + shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index dc648c4bf..ab5491cda 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -3,6 +3,7 @@ module DiffSummarySpec where import Prologue import Data.Record +import qualified Data.Vector as Vector import Test.Hspec import Test.Hspec.QuickCheck import Diff @@ -20,14 +21,15 @@ import Interpreter import Info import Source import Data.Functor.Both +import Test.QuickCheck hiding (Fixed) -arrayInfo :: Record '[Category, Range] -arrayInfo = ArrayLiteral .: Range 0 3 .: RNil +arrayInfo :: Record '[Category, Range, Vector.Vector Double] +arrayInfo = ArrayLiteral .: Range 0 3 .: Vector.singleton 0 .: RNil -literalInfo :: Record '[Category, Range] -literalInfo = StringLiteral .: Range 1 2 .: RNil +literalInfo :: Record '[Category, Range, Vector.Vector Double] +literalInfo = StringLiteral .: Range 1 2 .: Vector.singleton 0 .: RNil -testDiff :: Diff Text (Record '[Category, Range]) +testDiff :: Diff Text (Record '[Category, Range, Vector.Vector Double]) testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ]) testSummary :: DiffSummary DiffInfo @@ -46,7 +48,7 @@ spec = parallel $ do diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ] prop "equal terms produce identity diffs" $ - \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in + \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range, Vector.Vector Double])) in diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` [] describe "annotatedSummaries" $ do @@ -103,3 +105,7 @@ isBranchInfo info = case info of isBranchNode :: Patch DiffInfo -> Bool isBranchNode = any isBranchInfo + +instance Arbitrary a => Arbitrary (Vector.Vector a) where + arbitrary = Vector.fromList <$> arbitrary + shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index 0b62d9790..39efd7933 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -4,6 +4,7 @@ module InterpreterSpec where import Category import Diff import Data.Record +import qualified Data.Vector as Vector import Interpreter import Patch import Prologue @@ -11,20 +12,26 @@ import Syntax import Term.Arbitrary import Test.Hspec import Test.Hspec.QuickCheck +import Test.QuickCheck spec :: Spec spec = parallel $ do describe "interpret" $ do it "returns a replacement when comparing two unicode equivalent terms" $ - let termA = cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text) - termB = cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in - diffTerms (free . Free) ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB)) + let termA = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf ("t\776" :: Text) + termB = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf "\7831" in + diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB)) prop "produces correct diffs" $ - \ a b -> let diff = diffTerms (free . Free) ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category]) in + \ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category, Vector.Vector Double]) in (beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b)) prop "constructs zero-cost diffs of equal terms" $ \ a -> let term = toTerm a - diff = diffTerms (free . Free) ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category]) in + diff = diffTerms wrap ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category, Vector.Vector Double]) in diffCost diff `shouldBe` 0 + + +instance Arbitrary a => Arbitrary (Vector.Vector a) where + arbitrary = Vector.fromList <$> arbitrary + shrink a = Vector.fromList <$> shrink (Vector.toList a) From ed4de9140db822a03da038aa5b263604ef020762 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:01:00 -0400 Subject: [PATCH 091/208] Compute RWS by using the existing field. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 1236a13b3..300b894f4 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -48,7 +48,7 @@ rws compare getLabel as bs fas = zipWith featurize [0..] as fbs = zipWith featurize [0..] bs kdas = KdTree.build (Vector.toList . feature) fas - featurize index term = UnmappedTerm index (featureVector d (pqGrams getLabel p q term)) term + featurize index term = UnmappedTerm index (getField (extract term)) term findNearestNeighbourTo kv@(UnmappedTerm _ _ v) = do (previous, unmapped) <- get let (UnmappedTerm i _ _) = KdTree.nearest kdas kv From 399c13da78ae2e1a27d462aa21a28f94a824f371 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:01:04 -0400 Subject: [PATCH 092/208] :fire: p, q, d. --- src/Data/RandomWalkSimilarity.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 300b894f4..664edceee 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -44,7 +44,6 @@ rws compare getLabel as bs | otherwise = fmap snd . uncurry deleteRemaining . (`runState` (negate 1, fas)) $ traverse findNearestNeighbourTo fbs where insert = pure . Insert delete = pure . Delete - (p, q, d) = (2, 2, 15) fas = zipWith featurize [0..] as fbs = zipWith featurize [0..] bs kdas = KdTree.build (Vector.toList . feature) fas From eb815695999a6df968e7e948afb1a65fa5de3464 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:08:17 -0400 Subject: [PATCH 093/208] :fire: the getLabel parameter to rws. --- src/Data/RandomWalkSimilarity.hs | 6 ++---- src/Interpreter.hs | 5 +---- test/Data/RandomWalkSimilarity/Spec.hs | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 664edceee..2726e2777 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -27,17 +27,15 @@ import Test.QuickCheck hiding (Fixed) import Test.QuickCheck.Random -- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf). -rws :: (Hashable label, Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), Typeable label, HasField fields (Vector.Vector Double)) => +rws :: (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), HasField fields (Vector.Vector Double)) => -- | A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -> - -- | A function to compute a label for an unpacked term. - (forall b. CofreeF f (Record fields) b -> label) -> -- | The old list of terms. [Cofree f (Record fields)] -> -- | The new list of terms. [Cofree f (Record fields)] -> [Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))] -rws compare getLabel as bs +rws compare as bs | null as, null bs = [] | null as = insert <$> bs | null bs = delete <$> as diff --git a/src/Interpreter.hs b/src/Interpreter.hs index f561ed6e2..b235061a3 100644 --- a/src/Interpreter.hs +++ b/src/Interpreter.hs @@ -56,7 +56,4 @@ run construct comparable cost algorithm = case runFree algorithm of Free (ByIndex a b f) -> run construct comparable cost . f $ ses (constructAndRun construct comparable cost) cost a b - Free (ByRandomWalkSimilarity a b f) -> run construct comparable cost . f $ rws (constructAndRun construct comparable cost) getLabel a b - where getLabel (h :< t) = (category h, case t of - Leaf s -> Just s - _ -> Nothing) + Free (ByRandomWalkSimilarity a b f) -> run construct comparable cost . f $ rws (constructAndRun construct comparable cost) a b diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index b6b120956..82fc52340 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -34,7 +34,7 @@ spec = parallel $ do prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $ \ (as, bs) -> let tas = toTerm <$> as tbs = toTerm <$> bs - diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare (rhead . headF) tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in + diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs))) From 76a3fea7c20b1e27f23a07f6e7a05f77be6bf82a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:09:08 -0400 Subject: [PATCH 094/208] :fire: the Typeable and Hashable constraints on leaf. --- src/Interpreter.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreter.hs b/src/Interpreter.hs index b235061a3..57e2440be 100644 --- a/src/Interpreter.hs +++ b/src/Interpreter.hs @@ -5,7 +5,6 @@ import Category import Data.Align.Generic import Data.Functor.Foldable import Data.Functor.Both -import Data.Hashable import Data.RandomWalkSimilarity import Data.Record import Data.These @@ -26,11 +25,11 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation - type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation -- | Diff two terms, given a function that determines whether two terms can be compared and a cost function. -diffTerms :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields) +diffTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Diff leaf (Record fields) diffTerms construct comparable cost a b = fromMaybe (pure $ Replace a b) $ constructAndRun construct comparable cost a b -- | Constructs an algorithm and runs it -constructAndRun :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields)) +constructAndRun :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Term leaf (Record fields) -> Term leaf (Record fields) -> Maybe (Diff leaf (Record fields)) constructAndRun construct comparable cost t1 t2 | not $ comparable t1 t2 = Nothing | (category <$> t1) == (category <$> t2) = hylo construct runCofree <$> zipTerms t1 t2 @@ -43,7 +42,7 @@ constructAndRun construct comparable cost t1 t2 annotate = pure . construct . (both annotation1 annotation2 :<) -- | Runs the diff algorithm -run :: (Typeable leaf, Eq leaf, Hashable leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields)) +run :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -> Comparable leaf (Record fields) -> SES.Cost (Diff leaf (Record fields)) -> Algorithm leaf (Record fields) (Diff leaf (Record fields)) -> Maybe (Diff leaf (Record fields)) run construct comparable cost algorithm = case runFree algorithm of Pure diff -> Just diff Free (Recursive t1 t2 f) -> run construct comparable cost . f $ recur a b where From 3e85be8740dad560674e2d7791f519a05fa9e349 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:26:21 -0400 Subject: [PATCH 095/208] Generate arbitrary non-empty vectors. --- test/Data/RandomWalkSimilarity/Spec.hs | 2 +- test/Diff/Spec.hs | 2 +- test/DiffSummarySpec.hs | 2 +- test/InterpreterSpec.hs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 82fc52340..0798564de 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -39,5 +39,5 @@ spec = parallel $ do instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> arbitrary + arbitrary = Vector.fromList <$> listOf1 arbitrary shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/Diff/Spec.hs b/test/Diff/Spec.hs index a4199cb89..f470717e3 100644 --- a/test/Diff/Spec.hs +++ b/test/Diff/Spec.hs @@ -40,5 +40,5 @@ spec = parallel $ do instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> arbitrary + arbitrary = Vector.fromList <$> listOf1 arbitrary shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index ab5491cda..5c77d23ae 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -107,5 +107,5 @@ isBranchNode :: Patch DiffInfo -> Bool isBranchNode = any isBranchInfo instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> arbitrary + arbitrary = Vector.fromList <$> listOf1 arbitrary shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index 39efd7933..95f00df61 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -33,5 +33,5 @@ spec = parallel $ do instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> arbitrary + arbitrary = Vector.fromList <$> listOf1 arbitrary shrink a = Vector.fromList <$> shrink (Vector.toList a) From 46ac7b563031c0e0f7e062188c66495c09bb4181 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:33:28 -0400 Subject: [PATCH 096/208] Revert "Add a function to retrieve the head element of a non-empty record." This reverts commit 91d55d0e6c7ee324e21943ec06eb3acf213975f1. --- src/Data/Record.hs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 12f3d37bd..5a2665428 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -2,7 +2,6 @@ module Data.Record ( Record(..) , (.:) -, rhead , HasField(..) , maybeGetField , updateField @@ -26,11 +25,6 @@ infixr 0 .: (.:) :: Typeable h => h -> Record t -> Record (h ': t) (.:) = RCons --- | Get the first element of a non-empty record. -rhead :: Record (head ': tail) -> head -rhead (RCons head _) = head - - -- | Return 'Just' a 'field', if it exists in a record. Otherwise, return 'Nothing'. maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t From d1d0f41939eca2d0fc0c68b5c4c30293d7a27a37 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:43:12 -0400 Subject: [PATCH 097/208] Revert ":memo: updateRCons, updateField, and maybeGetField." This reverts commit 1ff75a1a12fa4a7b613b484ff9b7259db0662b82. --- src/Data/Record.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 5a2665428..fafbdf82c 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -25,18 +25,15 @@ infixr 0 .: (.:) :: Typeable h => h -> Record t -> Record (h ': t) (.:) = RCons --- | Return 'Just' a 'field', if it exists in a record. Otherwise, return 'Nothing'. maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t maybeGetField RNil = Nothing --- | Update (replace the value of) 'field' in a record, if it exists. Otherwise, return the record unchanged. updateField :: forall field fields. Typeable field => Record fields -> field -> Record fields updateField record a = case record of RNil -> RNil cons@(RCons _ _) -> updateRCons cons a --- | Update (replace the value of) 'field' in a non-empty record, if it exists. Otherwise return the record unchanged. updateRCons :: forall h t field. Typeable field => Record (h ': t) -> field -> Record (h ': t) updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of Just Refl -> RCons a t From a2e6a7c0cbd0a0a1fbd0dae686fb27b43c4c25a5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:49:04 -0400 Subject: [PATCH 098/208] Revert ":fire: a redundant constraint." This reverts commit b9da8f8c1cf6c7d8db51b56e724cfebb3587ac24. --- src/Data/Record.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index fafbdf82c..b31adf4fc 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -34,7 +34,7 @@ updateField record a = case record of RNil -> RNil cons@(RCons _ _) -> updateRCons cons a -updateRCons :: forall h t field. Typeable field => Record (h ': t) -> field -> Record (h ': t) +updateRCons :: forall h t field. (Typeable h, Typeable field) => Record (h ': t) -> field -> Record (h ': t) updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of Just Refl -> RCons a t Nothing -> RCons h (updateField t a) From 7d94ffe26df8ba28d8db28e1433f333acb65af8e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:51:50 -0400 Subject: [PATCH 099/208] Revert "Add a function to update fields." This reverts commit 1ab7e06cd7b06c9d688c1cea9c0bec2d4d505593. --- src/Data/Record.hs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index b31adf4fc..8af2fcca8 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -29,11 +29,6 @@ maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t maybeGetField RNil = Nothing -updateField :: forall field fields. Typeable field => Record fields -> field -> Record fields -updateField record a = case record of - RNil -> RNil - cons@(RCons _ _) -> updateRCons cons a - updateRCons :: forall h t field. (Typeable h, Typeable field) => Record (h ': t) -> field -> Record (h ': t) updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of Just Refl -> RCons a t From db4ebefe1e8f46871bbb9e26bcb5ce9c63a3bfcb Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:52:33 -0400 Subject: [PATCH 100/208] Revert "Add a function to update cons records." This reverts commit 32e3ffe1915699dfc58de19bccfbf231a2e6dfe4. --- src/Data/Record.hs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 8af2fcca8..ed4aa1738 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -1,10 +1,9 @@ -{-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, ScopedTypeVariables, TypeOperators #-} +{-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, TypeOperators #-} module Data.Record ( Record(..) , (.:) , HasField(..) , maybeGetField -, updateField ) where import GHC.Show @@ -29,11 +28,6 @@ maybeGetField :: Typeable field => Record fields -> Maybe field maybeGetField (RCons h t) = cast h <|> maybeGetField t maybeGetField RNil = Nothing -updateRCons :: forall h t field. (Typeable h, Typeable field) => Record (h ': t) -> field -> Record (h ': t) -updateRCons (RCons h t) a = case eqT :: Maybe (h :~: field) of - Just Refl -> RCons a t - Nothing -> RCons h (updateField t a) - -- Classes From a34f9ae3a4f2962a8682ba97f647e740eb9ed6a7 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:52:52 -0400 Subject: [PATCH 101/208] Revert "Add a maybeGetField function over Records." This reverts commit 41dda71a26dfc2a92fecfdf88d6ab3ad7165d383. --- src/Data/Record.hs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index ed4aa1738..006b61cc2 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -3,7 +3,6 @@ module Data.Record ( Record(..) , (.:) , HasField(..) -, maybeGetField ) where import GHC.Show @@ -24,10 +23,6 @@ infixr 0 .: (.:) :: Typeable h => h -> Record t -> Record (h ': t) (.:) = RCons -maybeGetField :: Typeable field => Record fields -> Maybe field -maybeGetField (RCons h t) = cast h <|> maybeGetField t -maybeGetField RNil = Nothing - -- Classes From 995c839f3febe48673f868b432647eb0c2555e7e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 09:59:15 -0400 Subject: [PATCH 102/208] Revert "Constrain record fields to be Typeable." This reverts commit 95fc1cb0ad4c7546450007b09fd690b4c84de1ce. --- src/Data/Record.hs | 6 +++--- src/Diffing.hs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 006b61cc2..4106ae307 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -14,13 +14,13 @@ import Test.QuickCheck -- | This is heavily inspired by Aaron Levin’s [Extensible Effects in the van Laarhoven Free Monad](http://aaronlevin.ca/post/136494428283/extensible-effects-in-the-van-laarhoven-free-monad). data Record :: [*] -> * where RNil :: Record '[] - RCons :: Typeable h => h -> Record t -> Record (h ': t) + RCons :: h -> Record t -> Record (h ': t) deriving Typeable infixr 0 .: -- | Infix synonym for `RCons`: `a .: b .: RNil == RCons a (RCons b RNil)`. -(.:) :: Typeable h => h -> Record t -> Record (h ': t) +(.:) :: h -> Record t -> Record (h ': t) (.:) = RCons @@ -67,7 +67,7 @@ instance Ord (Record '[]) where _ `compare` _ = EQ -instance (Typeable field, Arbitrary field, Arbitrary (Record fields)) => Arbitrary (Record (field ': fields)) where +instance (Arbitrary field, Arbitrary (Record fields)) => Arbitrary (Record (field ': fields)) where arbitrary = RCons <$> arbitrary <*> arbitrary shrink (RCons h t) = RCons <$> shrink h <*> shrink t diff --git a/src/Diffing.hs b/src/Diffing.hs index 8bfab8c0f..052eabeca 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -118,11 +118,11 @@ readAndTranscodeFile path = do type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field -- | Decorate the 'Term's produced by a 'Parser' using a function to compute the annotation values at every node. -decorateParser :: (Typeable field, Functor f) => TermDecorator f fields field -> Parser f (Record fields) -> Parser f (Record (field ': fields)) +decorateParser :: Functor f => TermDecorator f fields field -> Parser f (Record fields) -> Parser f (Record (field ': fields)) decorateParser decorator = (fmap (decorateTerm decorator) .) -- | Decorate a 'Term' using a function to compute the annotation values at every node. -decorateTerm :: (Typeable field, Functor f) => TermDecorator f fields field -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) +decorateTerm :: Functor f => TermDecorator f fields field -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Term leaf (Record fields) -> Bool From 54d3479f484df5190dee9833ac76f60e4396d5b9 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:00:48 -0400 Subject: [PATCH 103/208] :fire: some Typeable constraints. --- src/Data/RandomWalkSimilarity.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 2726e2777..f58df9779 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -87,16 +87,16 @@ featureVector d bag = sumVectors $ unitDVector . hash <$> bag normalize vec = fmap (/ vmagnitude vec) vec sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) -decorateTermWithLabel :: (Typeable label, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) +decorateTermWithLabel :: Functor f => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) -decorateTermWithPGram :: (Typeable label, Functor f) => Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPGram :: Functor f => Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPGram p = futu coalgebra . (,) [] where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) -decorateTermWithBagOfPQGrams :: (Typeable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) +decorateTermWithBagOfPQGrams :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons (first { base = padToSize q (base first) }) rest .: t) . cata algebra where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) @@ -108,7 +108,7 @@ decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons ( decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest -featureVectorDecorator :: (Typeable label, Hashable label, Functor f, Prologue.Foldable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) +featureVectorDecorator :: (Hashable label, Functor f, Prologue.Foldable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector d . decorateTermWithBagOfPQGrams q From 28320a64d516c8472adc2f003792f704c9535fd0 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:02:11 -0400 Subject: [PATCH 104/208] Revert "Derive some Typeable instances." This reverts commit 687667826cbcdf1337ec167ca01b8f92874bbdf1. --- src/Category.hs | 3 ++- src/Data/Record.hs | 1 - src/Info.hs | 2 +- src/Range.hs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Category.hs b/src/Category.hs index 3d812b3b6..fa5a9ee41 100644 --- a/src/Category.hs +++ b/src/Category.hs @@ -92,7 +92,8 @@ data Category | Method -- | A non-standard category, which can be used for comparability. | Other Text - deriving (Eq, Generic, Ord, Show, Typeable) + deriving (Eq, Generic, Ord, Show) + -- Instances diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 4106ae307..7452855ab 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -15,7 +15,6 @@ import Test.QuickCheck data Record :: [*] -> * where RNil :: Record '[] RCons :: h -> Record t -> Record (h ': t) - deriving Typeable infixr 0 .: diff --git a/src/Info.hs b/src/Info.hs index 84c4a1dd7..44ba5c7e9 100644 --- a/src/Info.hs +++ b/src/Info.hs @@ -8,7 +8,7 @@ import Range import Test.QuickCheck newtype Cost = Cost { unCost :: Integer } - deriving (Eq, Num, Ord, Show, Typeable) + deriving (Eq, Num, Ord, Show) characterRange :: HasField fields Range => Record fields -> Range characterRange = getField diff --git a/src/Range.hs b/src/Range.hs index 504357edc..4479253b4 100644 --- a/src/Range.hs +++ b/src/Range.hs @@ -9,7 +9,7 @@ import Test.QuickCheck -- | A half-open interval of integers, defined by start & end indices. data Range = Range { start :: !Int, end :: !Int } - deriving (Eq, Show, Typeable) + deriving (Eq, Show) -- | Make a range at a given index. rangeAt :: Int -> Range From 9d62a13fc55d5f44cd7ad8c8f675bbbc36e65381 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:02:38 -0400 Subject: [PATCH 105/208] :fire: another errant Typeable constraint. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f58df9779..4065670a6 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -68,7 +68,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } deriving (Eq, Show) -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. -pqGrams :: (Prologue.Foldable f, Functor f, Typeable label) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) +pqGrams :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) pqGrams getLabel p q = getField . extract . decorateTermWithBagOfPQGrams q . decorateTermWithPGram p . decorateTermWithLabel getLabel From 267f20f04805dff24f77bba8b2a17941dafe8a03 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:03:45 -0400 Subject: [PATCH 106/208] :fire: the exports list. --- src/Data/Record.hs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 7452855ab..70646facc 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -1,9 +1,5 @@ {-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, TypeOperators #-} -module Data.Record -( Record(..) -, (.:) -, HasField(..) -) where +module Data.Record where import GHC.Show import Prologue From cd456d15684e90ab6837fcc5ef2acfde13aa8f0e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:04:23 -0400 Subject: [PATCH 107/208] :fire: a blank line. --- src/Category.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Category.hs b/src/Category.hs index fa5a9ee41..425647973 100644 --- a/src/Category.hs +++ b/src/Category.hs @@ -94,7 +94,6 @@ data Category | Other Text deriving (Eq, Generic, Ord, Show) - -- Instances instance Hashable Category From a06dd1dc1eac375402e8342c0e19db97c1faee13 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:14:53 -0400 Subject: [PATCH 108/208] Move the Arbitrary instance over Vector into its own module. --- semantic-diff.cabal | 1 + test/Data/RandomWalkSimilarity/Spec.hs | 7 +------ test/Data/Vector/Arbitrary.hs | 10 ++++++++++ test/Diff/Spec.hs | 7 +------ test/DiffSummarySpec.hs | 6 +----- test/InterpreterSpec.hs | 7 +------ 6 files changed, 15 insertions(+), 23 deletions(-) create mode 100644 test/Data/Vector/Arbitrary.hs diff --git a/semantic-diff.cabal b/semantic-diff.cabal index 137f86a90..109c2d0fa 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -107,6 +107,7 @@ test-suite semantic-diff-test , CorpusSpec , Data.Mergeable.Spec , Data.RandomWalkSimilarity.Spec + , Data.Vector.Arbitrary , Diff.Spec , DiffSummarySpec , InterpreterSpec diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 0798564de..b3e45a7dc 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -5,7 +5,7 @@ import Category import Data.DList as DList hiding (toList) import Data.RandomWalkSimilarity import Data.Record -import qualified Data.Vector as Vector +import qualified Data.Vector.Arbitrary as Vector import Diff import Patch import Prologue @@ -36,8 +36,3 @@ spec = parallel $ do tbs = toTerm <$> bs diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs))) - - -instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> listOf1 arbitrary - shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/Data/Vector/Arbitrary.hs b/test/Data/Vector/Arbitrary.hs new file mode 100644 index 000000000..06a09e3b8 --- /dev/null +++ b/test/Data/Vector/Arbitrary.hs @@ -0,0 +1,10 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +module Data.Vector.Arbitrary (module Vector) where + +import Data.Vector as Vector +import Prologue +import Test.QuickCheck + +instance Arbitrary a => Arbitrary (Vector.Vector a) where + arbitrary = Vector.fromList <$> listOf1 arbitrary + shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/Diff/Spec.hs b/test/Diff/Spec.hs index f470717e3..64a1356c9 100644 --- a/test/Diff/Spec.hs +++ b/test/Diff/Spec.hs @@ -4,7 +4,7 @@ module Diff.Spec where import Category import Data.Record import Data.Text.Arbitrary () -import qualified Data.Vector as Vector +import qualified Data.Vector.Arbitrary as Vector import Diff import Diff.Arbitrary import Interpreter @@ -37,8 +37,3 @@ spec = parallel $ do describe "ArbitraryDiff" $ do prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $ \ (n, diff) -> arbitraryDiffSize (diff :: ArbitraryDiff Text ()) `shouldBe` n - - -instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> listOf1 arbitrary - shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index 5c77d23ae..12384e0fd 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -3,7 +3,7 @@ module DiffSummarySpec where import Prologue import Data.Record -import qualified Data.Vector as Vector +import qualified Data.Vector.Arbitrary as Vector import Test.Hspec import Test.Hspec.QuickCheck import Diff @@ -105,7 +105,3 @@ isBranchInfo info = case info of isBranchNode :: Patch DiffInfo -> Bool isBranchNode = any isBranchInfo - -instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> listOf1 arbitrary - shrink a = Vector.fromList <$> shrink (Vector.toList a) diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index 95f00df61..f95a81e35 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -4,7 +4,7 @@ module InterpreterSpec where import Category import Diff import Data.Record -import qualified Data.Vector as Vector +import qualified Data.Vector.Arbitrary as Vector import Interpreter import Patch import Prologue @@ -30,8 +30,3 @@ spec = parallel $ do \ a -> let term = toTerm a diff = diffTerms wrap ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category, Vector.Vector Double]) in diffCost diff `shouldBe` 0 - - -instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> listOf1 arbitrary - shrink a = Vector.fromList <$> shrink (Vector.toList a) From b29988a9a55a1f9bc6b6f304a0789678690cdfa8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:16:18 -0400 Subject: [PATCH 109/208] Fix a non-exhaustive pattern. --- test/DiffSummarySpec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index 12384e0fd..514652c99 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -101,7 +101,7 @@ isIndexedOrFixed' syntax = case syntax of isBranchInfo :: DiffInfo -> Bool isBranchInfo info = case info of (BranchInfo _ _ _) -> True - (LeafInfo _ _) -> False + _ -> False isBranchNode :: Patch DiffInfo -> Bool isBranchNode = any isBranchInfo From 984822486b0d18beece3d2d5859d07153f43b37a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:16:35 -0400 Subject: [PATCH 110/208] :fire: some redundant imports. --- test/DiffSummarySpec.hs | 3 --- test/InterpreterSpec.hs | 1 - 2 files changed, 4 deletions(-) diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index 514652c99..4fd876866 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -12,8 +12,6 @@ import Term import Patch import Category import DiffSummary -import Text.PrettyPrint.Leijen.Text (pretty) -import Test.Hspec.QuickCheck import Diff.Arbitrary import Data.List (partition) import Term.Arbitrary @@ -21,7 +19,6 @@ import Interpreter import Info import Source import Data.Functor.Both -import Test.QuickCheck hiding (Fixed) arrayInfo :: Record '[Category, Range, Vector.Vector Double] arrayInfo = ArrayLiteral .: Range 0 3 .: Vector.singleton 0 .: RNil diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index f95a81e35..4e7c509f2 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -12,7 +12,6 @@ import Syntax import Term.Arbitrary import Test.Hspec import Test.Hspec.QuickCheck -import Test.QuickCheck spec :: Spec spec = parallel $ do From 41a32f5ab82ad6c5ed6ba937c32dbbcee7cae47c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:22:03 -0400 Subject: [PATCH 111/208] We can do this in ana just as easily. --- src/Data/RandomWalkSimilarity.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 4065670a6..f9d5aa7fe 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -9,7 +9,6 @@ module Data.RandomWalkSimilarity import Control.Applicative import Control.Arrow ((&&&)) -import qualified Control.Monad.Free as Free (Free) import Control.Monad.Random import Control.Monad.State import qualified Data.DList as DList @@ -91,10 +90,10 @@ decorateTermWithLabel :: Functor f => (forall b. CofreeF f (Record fields) b -> decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) decorateTermWithPGram :: Functor f => Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPGram p = futu coalgebra . (,) [] - where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) (Free.Free (CofreeF f (Record (Gram label ': fields))) ([Maybe label], Cofree f (Record (label ': fields)))) +decorateTermWithPGram p = ana coalgebra . (,) [] + where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) ([Maybe label], Cofree f (Record (label ': fields))) coalgebra (parentLabels, c) = case extract c of - RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap (pure . (,) (take p (Just label : parentLabels))) (unwrap c) + RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (take p (Just label : parentLabels))) (unwrap c) decorateTermWithBagOfPQGrams :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons (first { base = padToSize q (base first) }) rest .: t) . cata algebra From ebaca5e49a033435e2efea75e79061fa859b5e97 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:22:44 -0400 Subject: [PATCH 112/208] Pad the stems. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f9d5aa7fe..e7a98436c 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -93,7 +93,7 @@ decorateTermWithPGram :: Functor f => Int -> Cofree f (Record (label ': fields)) decorateTermWithPGram p = ana coalgebra . (,) [] where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) ([Maybe label], Cofree f (Record (label ': fields))) coalgebra (parentLabels, c) = case extract c of - RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (take p (Just label : parentLabels))) (unwrap c) + RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) decorateTermWithBagOfPQGrams :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons (first { base = padToSize q (base first) }) rest .: t) . cata algebra From 4c729f5c77ab0ed32ddff4e7355dba0eac269282 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:24:11 -0400 Subject: [PATCH 113/208] :memo: padToSize. --- src/Data/RandomWalkSimilarity.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index e7a98436c..0cb9ab859 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -114,6 +114,7 @@ featureVectorDecorator getLabel p q d . decorateTermWithPGram p . decorateTermWithLabel getLabel +-- | Pads a list of Alternative values to exactly n elements. padToSize :: Alternative f => Int -> [f a] -> [f a] padToSize n list = take n (list <> repeat empty) From 3b1ce5850d2f60f8a33e1f0550b71e7931b723b4 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 10:26:16 -0400 Subject: [PATCH 114/208] :memo: the decorators. --- src/Data/RandomWalkSimilarity.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 0cb9ab859..9c3d2663e 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -86,15 +86,18 @@ featureVector d bag = sumVectors $ unitDVector . hash <$> bag normalize vec = fmap (/ vmagnitude vec) vec sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) +-- | Annotates a term with a label at each node. decorateTermWithLabel :: Functor f => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) +-- | Replaces labels in a term’s annotations with corresponding p,1-grams. decorateTermWithPGram :: Functor f => Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPGram p = ana coalgebra . (,) [] where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) ([Maybe label], Cofree f (Record (label ': fields))) coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) +-- | Replaces p,1-grams in a term’s annotations with corresponding bags of p,q-grams. decorateTermWithBagOfPQGrams :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons (first { base = padToSize q (base first) }) rest .: t) . cata algebra where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) @@ -104,9 +107,11 @@ decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons ( getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) getGrams = getField +-- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest +-- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Functor f, Prologue.Foldable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector d From 0923a950c2763ffc085b4d1282b0be3fa8207c32 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 18:04:07 -0400 Subject: [PATCH 115/208] Compute unit vectors separately from feature vectors. --- src/Data/RandomWalkSimilarity.hs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 0f6d0433e..d629afaa1 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -63,7 +63,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. pqGrams :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) -pqGrams getLabel p q = getField . extract . decorateTermWithBagOfPQGrams q . decorateTermWithPGram p . decorateTermWithLabel getLabel +pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram q . decorateTermWithPGram p . decorateTermWithLabel getLabel -- | A sliding-window fold over _n_ items of a list per iteration. @@ -93,24 +93,30 @@ decorateTermWithPGram p = ana coalgebra . (,) [] RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) -- | Replaces p,1-grams in a term’s annotations with corresponding bags of p,q-grams. -decorateTermWithBagOfPQGrams :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (DList.DList (Gram label) ': fields)) -decorateTermWithBagOfPQGrams q = fmap (\ (RCons (first, rest) t) -> DList.cons (first { base = padToSize q (base first) }) rest .: t) . cata algebra - where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields))) -> Cofree f (Record ((Gram label, DList.DList (Gram label)) ': fields)) - algebra (RCons gram rest :< functor) = cofree (((gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) .: rest) :< functor) +decorateTermWithPQGram :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram q = cata algebra + where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) + algebra (RCons gram rest :< functor) = cofree ((gram .: rest) :< functor) + -- (gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) setBases :: Gram label -> [Gram label] -> [Gram label] -> [Gram label] setBases gram siblings rest = gram { base = padToSize q (foldMap base siblings) } : rest getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) getGrams = getField -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. -decorateTermWithFeatureVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (DList.DList (Gram label) ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) -decorateTermWithFeatureVector d = fmap $ \ (RCons grams rest) -> featureVector d grams .: rest +decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) +decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> cofree (RCons (foldr (\ each into -> Vector.zipWith (+) (getField (extract each)) into) (Vector.replicate d 0) functor) rest :< functor) + +decorateTermWithUnitVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) +decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) .: rest + where normalize vec = fmap (/ vmagnitude vec) vec -- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Functor f, Prologue.Foldable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector d - . decorateTermWithBagOfPQGrams q + . decorateTermWithUnitVector d + . decorateTermWithPQGram q . decorateTermWithPGram p . decorateTermWithLabel getLabel From e2a8db8487c8796d0f52a630018c8935dca6cf54 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 18:04:57 -0400 Subject: [PATCH 116/208] Start summing from the unit vector. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index d629afaa1..0a152ad86 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -105,7 +105,7 @@ decorateTermWithPQGram q = cata algebra -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) -decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> cofree (RCons (foldr (\ each into -> Vector.zipWith (+) (getField (extract each)) into) (Vector.replicate d 0) functor) rest :< functor) +decorateTermWithFeatureVector d = cata $ \ (RCons unitVector rest :< functor) -> cofree (RCons (foldr (\ each into -> Vector.zipWith (+) (getField (extract each)) into) unitVector functor) rest :< functor) decorateTermWithUnitVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) .: rest From 65436657545971f4bb6f0c82e2e6e907895fb637 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 18:05:36 -0400 Subject: [PATCH 117/208] decorateTermWithFeatureVector does not rely on the d parameter. --- src/Data/RandomWalkSimilarity.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 0a152ad86..6259ee5b5 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -104,8 +104,8 @@ decorateTermWithPQGram q = cata algebra getGrams = getField -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. -decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) -decorateTermWithFeatureVector d = cata $ \ (RCons unitVector rest :< functor) -> cofree (RCons (foldr (\ each into -> Vector.zipWith (+) (getField (extract each)) into) unitVector functor) rest :< functor) +decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) +decorateTermWithFeatureVector = cata $ \ (RCons unitVector rest :< functor) -> cofree (RCons (foldr (\ each into -> Vector.zipWith (+) (getField (extract each)) into) unitVector functor) rest :< functor) decorateTermWithUnitVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) .: rest @@ -114,7 +114,7 @@ decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRa -- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Functor f, Prologue.Foldable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d - = decorateTermWithFeatureVector d + = decorateTermWithFeatureVector . decorateTermWithUnitVector d . decorateTermWithPQGram q . decorateTermWithPGram p From 1e3225f5942588dc80cb505ba143aabe19927c8f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Fri, 5 Aug 2016 18:11:34 -0400 Subject: [PATCH 118/208] Diff these branches by similarity. --- src/Interpreter.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreter.hs b/src/Interpreter.hs index 1a4829b11..7e77a037a 100644 --- a/src/Interpreter.hs +++ b/src/Interpreter.hs @@ -69,7 +69,7 @@ algorithmWithTerms construct t1 t2 = case (unwrap t1, unwrap t2) of annotate $! S.Method identifier params expressions _ -> recursively t1 t2 where annotate = pure . construct . (both (extract t1) (extract t2) :<) - byIndex constructor a b = Algorithm.byIndex a b >>= annotate . constructor + byIndex constructor a b = Algorithm.bySimilarity a b >>= annotate . constructor -- | Run an algorithm, given functions characterizing the evaluation. runAlgorithm :: (Functor f, GAlign f, Eq a, Eq (Record fields), Eq (f (Cofree f (Record fields))), Prologue.Foldable f, Traversable f, HasField fields (Vector.Vector Double)) From fa90ab1da64677970d359b310cf0ed59862860cc Mon Sep 17 00:00:00 2001 From: Rick Winfrey Date: Mon, 8 Aug 2016 10:26:35 -0500 Subject: [PATCH 119/208] Add 08-08-2016 weekly notes --- weekly/2016-08-08.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 weekly/2016-08-08.md diff --git a/weekly/2016-08-08.md b/weekly/2016-08-08.md new file mode 100644 index 000000000..f0fcc0c4d --- /dev/null +++ b/weekly/2016-08-08.md @@ -0,0 +1,37 @@ +### What went well? + +@robrix: Interpreter stuff was effective in cleaning up / clarifying some previously messy code. diffing algorithm is now a little scripting language. + +@joshvera: Javascript syntaxes went well. Self assessments were more streamlined. + +@rewinfrey: Self assessments were simple. Pairing with Rob went well (first time really driving). + +### What didn't go well? + +@robrix: Ancillary tasks not going so well (patent process). + +@joshvera: Self assessments took time away from working on minor diff summary tasks. Getting C into syntax is going to be difficult. + +@rewinfrey: Recognizing that there is a better way to handle the effects in the test generator in the types, but not quite there skillwise to implement it (but also recognize I will get there :) ). + +### What did you learn? + +@robrix: Cemented understanding of free monad and interpreters. learned about type families and type classes for derivative parsers. keep things in single module and break out as needed to avoid orphan instances. + +@joshvera: Learned about tradeoffs of performance of free monads we currently use, and learned more about effects. over the weekend read about comonads as pointed spaces. + +@rewinfrey: Learned about how to model effects in the type system (via free and cofree), and learned foundational Yesod (routing, controllers, models / migrations, etc.). + +### Other things? + +@robrix: Re: Staff shipping in two weeks -- how do we feel? Our performance issues are still a concern, but responsiveness is being looked at outside the team. + +@joshvera: Feeling okay with staff shipping, but performance should be addressed before staff shipping. This seems possible. + +@robrix: Please keep in mind the following: + - If we can't meet the deadline, can we punt? + - No death marches please. + +@joshvera: Let's reconvene later this week and update status of deliverables. + +@robrix: Goal is to get to smaller, more regular releases. Right now that is hard, because the initial staffship of Diff Summaries frontloads a lot of functionality that future releases will benefit from. From c191312ece6ce4c17ca74850d6b5cfafe6ad5465 Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 12:21:56 -0400 Subject: [PATCH 120/208] Move instances down and exported types up --- src/DiffSummary.hs | 184 +++++++++++++++++++++++---------------------- 1 file changed, 93 insertions(+), 91 deletions(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 001cd79d3..90b828b6c 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -26,6 +26,67 @@ data DiffInfo = LeafInfo { categoryName :: Text, termName :: Text } | ErrorInfo { errorSpan :: SourceSpan, categoryName :: Text } deriving (Eq, Show) +data Branch = BIndexed | BFixed | BCommented deriving (Show, Eq, Generic) +data DiffSummary a = DiffSummary { + patch :: Patch a, + parentAnnotations :: [Category] +} deriving (Eq, Functor, Show, Generic) + +annotatedSummaries :: DiffSummary DiffInfo -> [Text] +annotatedSummaries DiffSummary{..} = show . (P.<> maybeParentContext parentAnnotations) <$> summaries patch + +diffSummaries :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Both (Source Char) -> Diff leaf (Record fields) -> [DiffSummary DiffInfo] +diffSummaries sources = cata $ \case + -- Skip comments and leaves since they don't have any changes + (Free (_ :< Leaf _)) -> [] + Free (_ :< (S.Comment _)) -> [] + (Free (infos :< S.Indexed children)) -> annotateWithCategory infos <$> join children + (Free (infos :< S.Fixed children)) -> annotateWithCategory infos <$> join children + (Free (infos :< S.FunctionCall identifier children)) -> annotateWithCategory infos <$> join (Prologue.toList (identifier : children)) + (Free (infos :< S.Function id ps body)) -> annotateWithCategory infos <$> (fromMaybe [] id) <> (fromMaybe [] ps) <> body + (Free (infos :< S.Assignment id value)) -> annotateWithCategory infos <$> id <> value + (Free (infos :< S.MemberAccess base property)) -> annotateWithCategory infos <$> base <> property + (Free (infos :< S.SubscriptAccess base property)) -> annotateWithCategory infos <$> base <> property + (Free (infos :< S.MethodCall targetId methodId ps)) -> annotateWithCategory infos <$> targetId <> methodId <> ps + (Free (infos :< S.VarAssignment varId value)) -> annotateWithCategory infos <$> varId <> value + (Free (infos :< S.VarDecl decl)) -> annotateWithCategory infos <$> decl + (Free (infos :< S.Args args)) -> annotateWithCategory infos <$> join args + (Free (infos :< S.Switch expr cases)) -> annotateWithCategory infos <$> expr <> join cases + (Free (infos :< S.Case expr body)) -> annotateWithCategory infos <$> expr <> body + Free (infos :< (S.Ternary expr cases)) -> annotateWithCategory infos <$> expr <> join cases + Free (infos :< (S.MathAssignment id value)) -> annotateWithCategory infos <$> id <> value + Free (infos :< (S.Operator syntaxes)) -> annotateWithCategory infos <$> join syntaxes + Free (infos :< (S.Object kvs)) -> annotateWithCategory infos <$> join kvs + Free (infos :< (S.Return expr)) -> annotateWithCategory infos <$> fromMaybe [] expr + Free (infos :< (S.Pair a b)) -> annotateWithCategory infos <$> a <> b + Free (infos :< (S.Commented cs leaf)) -> annotateWithCategory infos <$> join cs <> fromMaybe [] leaf + Free (infos :< (S.Error _ children)) -> annotateWithCategory infos <$> join children + (Free (infos :< S.For exprs body)) -> annotateWithCategory infos <$> join exprs <> body + (Free (infos :< S.While expr body)) -> annotateWithCategory infos <$> expr <> body + (Free (infos :< S.DoWhile expr body)) -> annotateWithCategory infos <$> expr <> body + (Free (infos :< S.Throw expr)) -> annotateWithCategory infos <$> expr + (Free (infos :< S.Constructor expr)) -> annotateWithCategory infos <$> expr + (Free (infos :< S.Try expr catch finally)) -> annotateWithCategory infos <$> expr <> fromMaybe [] catch <> fromMaybe [] finally + (Free (infos :< S.Array children)) -> annotateWithCategory infos <$> join children + (Free (infos :< S.Class identifier superclass definitions)) -> annotateWithCategory infos <$> identifier <> fromMaybe [] superclass <> join definitions + (Free (infos :< S.Method identifier params definitions)) -> annotateWithCategory infos <$> identifier <> join params <> join definitions + (Pure (Insert term)) -> [ DiffSummary (Insert $ termToDiffInfo afterSource term) [] ] + (Pure (Delete term)) -> [ DiffSummary (Delete $ termToDiffInfo beforeSource term) [] ] + (Pure (Replace t1 t2)) -> [ DiffSummary (Replace (termToDiffInfo beforeSource t1) (termToDiffInfo afterSource t2)) [] ] + where + (beforeSource, afterSource) = runJoin sources + annotateWithCategory infos = prependSummary (category $ snd infos) + +summaries :: Patch DiffInfo -> [P.Doc] +summaries (Insert info) = (("Added" <+> "the") <+>) <$> toLeafInfos info +summaries (Delete info) = (("Deleted" <+> "the") <+>) <$> toLeafInfos info +summaries (Replace i1 i2) = zipWith (\a b -> "Replaced" <+> "the" <+> a <+> "with the" <+> b) (toLeafInfos i1) (toLeafInfos i2) + +toLeafInfos :: DiffInfo -> [Doc] +toLeafInfos LeafInfo{..} = pure $ squotes (toDoc termName) <+> (toDoc categoryName) +toLeafInfos BranchInfo{..} = pretty <$> branches +toLeafInfos err@ErrorInfo{} = pure $ pretty err + toTermName :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Source Char -> Term leaf (Record fields) -> Text toTermName source term = case unwrap term of S.Fixed children -> fromMaybe "branch" $ (toCategoryName . category) . extract <$> head children @@ -81,9 +142,41 @@ toTermName source term = case unwrap term of termNameFromRange range = toText $ Source.slice range source range = characterRange . extract +maybeParentContext :: [Category] -> Doc +maybeParentContext annotations = if null annotations + then "" + else space <> "in the" <+> (toDoc . intercalate "/" $ toCategoryName <$> annotations) <+> "context" +toDoc :: Text -> Doc +toDoc = string . toS + +termToDiffInfo :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Source Char -> Term leaf (Record fields) -> DiffInfo +termToDiffInfo blob term = case unwrap term of + Leaf _ -> LeafInfo (toCategoryName term) (toTermName' term) + S.Indexed children -> BranchInfo (termToDiffInfo' <$> children) (toCategoryName term) BIndexed + S.Fixed children -> BranchInfo (termToDiffInfo' <$> children) (toCategoryName term) BFixed + S.FunctionCall identifier _ -> LeafInfo (toCategoryName term) (toTermName' identifier) + S.Ternary ternaryCondition _ -> LeafInfo (toCategoryName term) (toTermName' ternaryCondition) + S.Function identifier _ _ -> LeafInfo (toCategoryName term) (maybe "anonymous" toTermName' identifier) + S.Assignment identifier _ -> LeafInfo (toCategoryName term) (toTermName' identifier) + S.MathAssignment identifier _ -> LeafInfo (toCategoryName term) (toTermName' identifier) + -- Currently we cannot express the operator for an operator production from TreeSitter. Eventually we should be able to + -- use the term name of the operator identifier when we have that production value. Until then, I'm using a placeholder value + -- to indicate where that value should be when constructing DiffInfos. + Commented cs leaf -> BranchInfo (termToDiffInfo' <$> cs <> maybeToList leaf) (toCategoryName term) BCommented + S.Error sourceSpan _ -> ErrorInfo sourceSpan (toCategoryName term) + _ -> LeafInfo (toCategoryName term) (toTermName' term) + where toTermName' = toTermName blob + termToDiffInfo' = termToDiffInfo blob + +prependSummary :: Category -> DiffSummary DiffInfo -> DiffSummary DiffInfo +prependSummary annotation summary = summary { parentAnnotations = annotation : parentAnnotations summary } + +-- The user-facing category name of 'a'. class HasCategory a where toCategoryName :: a -> Text +-- Instances + instance HasCategory Text where toCategoryName = identity @@ -135,16 +228,10 @@ instance HasCategory Category where instance (HasCategory leaf, HasField fields Category) => HasCategory (Term leaf (Record fields)) where toCategoryName = toCategoryName . category . extract -data Branch = BIndexed | BFixed | BCommented deriving (Show, Eq, Generic) instance Arbitrary Branch where arbitrary = oneof [ pure BIndexed, pure BFixed ] shrink = genericShrink -data DiffSummary a = DiffSummary { - patch :: Patch a, - parentAnnotations :: [Category] -} deriving (Eq, Functor, Show, Generic) - instance (Eq a, Arbitrary a) => Arbitrary (DiffSummary a) where arbitrary = DiffSummary <$> arbitrary <*> arbitrary shrink = genericShrink @@ -153,88 +240,3 @@ instance P.Pretty DiffInfo where pretty LeafInfo{..} = squotes (string $ toSL termName) <+> (string $ toSL categoryName) pretty BranchInfo{..} = mconcat $ punctuate (string "," <> space) (pretty <$> branches) pretty ErrorInfo{..} = "syntax error at" <+> (string . toSL $ displayStartEndPos errorSpan) <+> "in" <+> (string . toSL $ spanName errorSpan) - -annotatedSummaries :: DiffSummary DiffInfo -> [Text] -annotatedSummaries DiffSummary{..} = show . (P.<> maybeParentContext parentAnnotations) <$> summaries patch - -summaries :: Patch DiffInfo -> [P.Doc] -summaries (Insert info) = (("Added" <+> "the") <+>) <$> toLeafInfos info -summaries (Delete info) = (("Deleted" <+> "the") <+>) <$> toLeafInfos info -summaries (Replace i1 i2) = zipWith (\a b -> "Replaced" <+> "the" <+> a <+> "with the" <+> b) (toLeafInfos i1) (toLeafInfos i2) - -toLeafInfos :: DiffInfo -> [Doc] -toLeafInfos LeafInfo{..} = pure $ squotes (toDoc termName) <+> (toDoc categoryName) -toLeafInfos BranchInfo{..} = pretty <$> branches -toLeafInfos err@ErrorInfo{} = pure $ pretty err - -maybeParentContext :: [Category] -> Doc -maybeParentContext annotations = if null annotations - then "" - else space <> "in the" <+> (toDoc . intercalate "/" $ toCategoryName <$> annotations) <+> "context" -toDoc :: Text -> Doc -toDoc = string . toS - -diffSummaries :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Both (Source Char) -> Diff leaf (Record fields) -> [DiffSummary DiffInfo] -diffSummaries sources = cata $ \case - -- Skip comments and leaves since they don't have any changes - (Free (_ :< Leaf _)) -> [] - Free (_ :< (S.Comment _)) -> [] - (Free (infos :< S.Indexed children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.Fixed children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.FunctionCall identifier children)) -> annotateWithCategory infos <$> join (Prologue.toList (identifier : children)) - (Free (infos :< S.Function id ps body)) -> annotateWithCategory infos <$> (fromMaybe [] id) <> (fromMaybe [] ps) <> body - (Free (infos :< S.Assignment id value)) -> annotateWithCategory infos <$> id <> value - (Free (infos :< S.MemberAccess base property)) -> annotateWithCategory infos <$> base <> property - (Free (infos :< S.SubscriptAccess base property)) -> annotateWithCategory infos <$> base <> property - (Free (infos :< S.MethodCall targetId methodId ps)) -> annotateWithCategory infos <$> targetId <> methodId <> ps - (Free (infos :< S.VarAssignment varId value)) -> annotateWithCategory infos <$> varId <> value - (Free (infos :< S.VarDecl decl)) -> annotateWithCategory infos <$> decl - (Free (infos :< S.Args args)) -> annotateWithCategory infos <$> join args - (Free (infos :< S.Switch expr cases)) -> annotateWithCategory infos <$> expr <> join cases - (Free (infos :< S.Case expr body)) -> annotateWithCategory infos <$> expr <> body - Free (infos :< (S.Ternary expr cases)) -> annotateWithCategory infos <$> expr <> join cases - Free (infos :< (S.MathAssignment id value)) -> annotateWithCategory infos <$> id <> value - Free (infos :< (S.Operator syntaxes)) -> annotateWithCategory infos <$> join syntaxes - Free (infos :< (S.Object kvs)) -> annotateWithCategory infos <$> join kvs - Free (infos :< (S.Return expr)) -> annotateWithCategory infos <$> fromMaybe [] expr - Free (infos :< (S.Pair a b)) -> annotateWithCategory infos <$> a <> b - Free (infos :< (S.Commented cs leaf)) -> annotateWithCategory infos <$> join cs <> fromMaybe [] leaf - Free (infos :< (S.Error _ children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.For exprs body)) -> annotateWithCategory infos <$> join exprs <> body - (Free (infos :< S.While expr body)) -> annotateWithCategory infos <$> expr <> body - (Free (infos :< S.DoWhile expr body)) -> annotateWithCategory infos <$> expr <> body - (Free (infos :< S.Throw expr)) -> annotateWithCategory infos <$> expr - (Free (infos :< S.Constructor expr)) -> annotateWithCategory infos <$> expr - (Free (infos :< S.Try expr catch finally)) -> annotateWithCategory infos <$> expr <> fromMaybe [] catch <> fromMaybe [] finally - (Free (infos :< S.Array children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.Class identifier superclass definitions)) -> annotateWithCategory infos <$> identifier <> fromMaybe [] superclass <> join definitions - (Free (infos :< S.Method identifier params definitions)) -> annotateWithCategory infos <$> identifier <> join params <> join definitions - (Pure (Insert term)) -> [ DiffSummary (Insert $ termToDiffInfo afterSource term) [] ] - (Pure (Delete term)) -> [ DiffSummary (Delete $ termToDiffInfo beforeSource term) [] ] - (Pure (Replace t1 t2)) -> [ DiffSummary (Replace (termToDiffInfo beforeSource t1) (termToDiffInfo afterSource t2)) [] ] - where - (beforeSource, afterSource) = runJoin sources - annotateWithCategory infos = prependSummary (category $ snd infos) - - -termToDiffInfo :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Source Char -> Term leaf (Record fields) -> DiffInfo -termToDiffInfo blob term = case unwrap term of - Leaf _ -> LeafInfo (toCategoryName term) (toTermName' term) - S.Indexed children -> BranchInfo (termToDiffInfo' <$> children) (toCategoryName term) BIndexed - S.Fixed children -> BranchInfo (termToDiffInfo' <$> children) (toCategoryName term) BFixed - S.FunctionCall identifier _ -> LeafInfo (toCategoryName term) (toTermName' identifier) - S.Ternary ternaryCondition _ -> LeafInfo (toCategoryName term) (toTermName' ternaryCondition) - S.Function identifier _ _ -> LeafInfo (toCategoryName term) (maybe "anonymous" toTermName' identifier) - S.Assignment identifier _ -> LeafInfo (toCategoryName term) (toTermName' identifier) - S.MathAssignment identifier _ -> LeafInfo (toCategoryName term) (toTermName' identifier) - -- Currently we cannot express the operator for an operator production from TreeSitter. Eventually we should be able to - -- use the term name of the operator identifier when we have that production value. Until then, I'm using a placeholder value - -- to indicate where that value should be when constructing DiffInfos. - Commented cs leaf -> BranchInfo (termToDiffInfo' <$> cs <> maybeToList leaf) (toCategoryName term) BCommented - S.Error sourceSpan _ -> ErrorInfo sourceSpan (toCategoryName term) - _ -> LeafInfo (toCategoryName term) (toTermName' term) - where toTermName' = toTermName blob - termToDiffInfo' = termToDiffInfo blob - -prependSummary :: Category -> DiffSummary DiffInfo -> DiffSummary DiffInfo -prependSummary annotation summary = summary { parentAnnotations = annotation : parentAnnotations summary } From 51cb8918a94c6f8a85a6e39516ed897a876c381d Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 14:06:07 -0400 Subject: [PATCH 121/208] Prepend (Category, TermName) to DiffSummary.parentAnnotations --- src/DiffSummary.hs | 71 ++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 90b828b6c..1b2f67d9d 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -2,7 +2,7 @@ module DiffSummary (DiffSummary(..), diffSummaries, DiffInfo(..), annotatedSummaries) where -import Prologue hiding (snd, intercalate) +import Prologue hiding (intercalate) import Diff import Patch import Term @@ -11,7 +11,8 @@ import Range import Syntax as S import Category as C import Data.Functor.Foldable as Foldable -import Data.Functor.Both +import Data.Functor.Both hiding (fst, snd) +import qualified Data.Functor.Both as Both import Data.Text as Text (intercalate) import Test.QuickCheck hiding (Fixed) import Patch.Arbitrary() @@ -27,55 +28,31 @@ data DiffInfo = LeafInfo { categoryName :: Text, termName :: Text } deriving (Eq, Show) data Branch = BIndexed | BFixed | BCommented deriving (Show, Eq, Generic) + data DiffSummary a = DiffSummary { patch :: Patch a, - parentAnnotations :: [Category] + parentAnnotations :: [(Category, Text)] } deriving (Eq, Functor, Show, Generic) annotatedSummaries :: DiffSummary DiffInfo -> [Text] annotatedSummaries DiffSummary{..} = show . (P.<> maybeParentContext parentAnnotations) <$> summaries patch diffSummaries :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Both (Source Char) -> Diff leaf (Record fields) -> [DiffSummary DiffInfo] -diffSummaries sources = cata $ \case - -- Skip comments and leaves since they don't have any changes - (Free (_ :< Leaf _)) -> [] - Free (_ :< (S.Comment _)) -> [] - (Free (infos :< S.Indexed children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.Fixed children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.FunctionCall identifier children)) -> annotateWithCategory infos <$> join (Prologue.toList (identifier : children)) - (Free (infos :< S.Function id ps body)) -> annotateWithCategory infos <$> (fromMaybe [] id) <> (fromMaybe [] ps) <> body - (Free (infos :< S.Assignment id value)) -> annotateWithCategory infos <$> id <> value - (Free (infos :< S.MemberAccess base property)) -> annotateWithCategory infos <$> base <> property - (Free (infos :< S.SubscriptAccess base property)) -> annotateWithCategory infos <$> base <> property - (Free (infos :< S.MethodCall targetId methodId ps)) -> annotateWithCategory infos <$> targetId <> methodId <> ps - (Free (infos :< S.VarAssignment varId value)) -> annotateWithCategory infos <$> varId <> value - (Free (infos :< S.VarDecl decl)) -> annotateWithCategory infos <$> decl - (Free (infos :< S.Args args)) -> annotateWithCategory infos <$> join args - (Free (infos :< S.Switch expr cases)) -> annotateWithCategory infos <$> expr <> join cases - (Free (infos :< S.Case expr body)) -> annotateWithCategory infos <$> expr <> body - Free (infos :< (S.Ternary expr cases)) -> annotateWithCategory infos <$> expr <> join cases - Free (infos :< (S.MathAssignment id value)) -> annotateWithCategory infos <$> id <> value - Free (infos :< (S.Operator syntaxes)) -> annotateWithCategory infos <$> join syntaxes - Free (infos :< (S.Object kvs)) -> annotateWithCategory infos <$> join kvs - Free (infos :< (S.Return expr)) -> annotateWithCategory infos <$> fromMaybe [] expr - Free (infos :< (S.Pair a b)) -> annotateWithCategory infos <$> a <> b - Free (infos :< (S.Commented cs leaf)) -> annotateWithCategory infos <$> join cs <> fromMaybe [] leaf - Free (infos :< (S.Error _ children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.For exprs body)) -> annotateWithCategory infos <$> join exprs <> body - (Free (infos :< S.While expr body)) -> annotateWithCategory infos <$> expr <> body - (Free (infos :< S.DoWhile expr body)) -> annotateWithCategory infos <$> expr <> body - (Free (infos :< S.Throw expr)) -> annotateWithCategory infos <$> expr - (Free (infos :< S.Constructor expr)) -> annotateWithCategory infos <$> expr - (Free (infos :< S.Try expr catch finally)) -> annotateWithCategory infos <$> expr <> fromMaybe [] catch <> fromMaybe [] finally - (Free (infos :< S.Array children)) -> annotateWithCategory infos <$> join children - (Free (infos :< S.Class identifier superclass definitions)) -> annotateWithCategory infos <$> identifier <> fromMaybe [] superclass <> join definitions - (Free (infos :< S.Method identifier params definitions)) -> annotateWithCategory infos <$> identifier <> join params <> join definitions - (Pure (Insert term)) -> [ DiffSummary (Insert $ termToDiffInfo afterSource term) [] ] - (Pure (Delete term)) -> [ DiffSummary (Delete $ termToDiffInfo beforeSource term) [] ] - (Pure (Replace t1 t2)) -> [ DiffSummary (Replace (termToDiffInfo beforeSource t1) (termToDiffInfo afterSource t2)) [] ] +diffSummaries sources = para $ \diff -> + let diff' = free (Prologue.fst <$> diff) + annotateWithCategory :: [(Diff leaf (Record fields), [DiffSummary DiffInfo])] -> [DiffSummary DiffInfo] + annotateWithCategory children = maybeToList (prependSummary (Both.snd sources) <$> (afterTerm diff')) <*> (children >>= snd) in + case diff of + -- Skip comments and leaves since they don't have any changes + Free (_ :< Leaf _) -> [] + Free (_ :< (S.Comment _)) -> [] + (Free (_ :< syntax)) -> annotateWithCategory (toList syntax) + (Pure (Insert term)) -> [ DiffSummary (Insert $ termToDiffInfo afterSource term) [] ] + (Pure (Delete term)) -> [ DiffSummary (Delete $ termToDiffInfo beforeSource term) [] ] + (Pure (Replace t1 t2)) -> [ DiffSummary (Replace (termToDiffInfo beforeSource t1) (termToDiffInfo afterSource t2)) [] ] where (beforeSource, afterSource) = runJoin sources - annotateWithCategory infos = prependSummary (category $ snd infos) + summaries :: Patch DiffInfo -> [P.Doc] summaries (Insert info) = (("Added" <+> "the") <+>) <$> toLeafInfos info @@ -142,10 +119,10 @@ toTermName source term = case unwrap term of termNameFromRange range = toText $ Source.slice range source range = characterRange . extract -maybeParentContext :: [Category] -> Doc -maybeParentContext annotations = if null annotations - then "" - else space <> "in the" <+> (toDoc . intercalate "/" $ toCategoryName <$> annotations) <+> "context" +maybeParentContext :: [(Category, Text)] -> Doc +maybeParentContext annotations = case annotations of + [] -> "" + (annotation:xs) -> space <> "in the" <+> (toDoc $ snd annotation) <+> toDoc (toCategoryName $ fst annotation) toDoc :: Text -> Doc toDoc = string . toS @@ -168,8 +145,8 @@ termToDiffInfo blob term = case unwrap term of where toTermName' = toTermName blob termToDiffInfo' = termToDiffInfo blob -prependSummary :: Category -> DiffSummary DiffInfo -> DiffSummary DiffInfo -prependSummary annotation summary = summary { parentAnnotations = annotation : parentAnnotations summary } +prependSummary :: (HasCategory leaf, HasField fields Range, HasField fields Category) => Source Char -> Term leaf (Record fields) -> DiffSummary DiffInfo -> DiffSummary DiffInfo +prependSummary source term summary = summary { parentAnnotations = (category $ extract term, toTermName source term) : parentAnnotations summary } -- The user-facing category name of 'a'. class HasCategory a where From d8ff761f4053e29668c9dc76dca1aef4d24f1ff5 Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 14:19:47 -0400 Subject: [PATCH 122/208] Add mapPatch --- src/DiffSummary.hs | 8 ++------ src/Patch.hs | 6 ++++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 1b2f67d9d..1393c9cdf 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -44,12 +44,8 @@ diffSummaries sources = para $ \diff -> annotateWithCategory children = maybeToList (prependSummary (Both.snd sources) <$> (afterTerm diff')) <*> (children >>= snd) in case diff of -- Skip comments and leaves since they don't have any changes - Free (_ :< Leaf _) -> [] - Free (_ :< (S.Comment _)) -> [] (Free (_ :< syntax)) -> annotateWithCategory (toList syntax) - (Pure (Insert term)) -> [ DiffSummary (Insert $ termToDiffInfo afterSource term) [] ] - (Pure (Delete term)) -> [ DiffSummary (Delete $ termToDiffInfo beforeSource term) [] ] - (Pure (Replace t1 t2)) -> [ DiffSummary (Replace (termToDiffInfo beforeSource t1) (termToDiffInfo afterSource t2)) [] ] + (Pure patch) -> [ DiffSummary (mapPatch (termToDiffInfo beforeSource) (termToDiffInfo afterSource) patch) [] ] where (beforeSource, afterSource) = runJoin sources @@ -122,7 +118,7 @@ toTermName source term = case unwrap term of maybeParentContext :: [(Category, Text)] -> Doc maybeParentContext annotations = case annotations of [] -> "" - (annotation:xs) -> space <> "in the" <+> (toDoc $ snd annotation) <+> toDoc (toCategoryName $ fst annotation) + (annotation:_) -> space <> "in the" <+> (toDoc $ snd annotation) <+> toDoc (toCategoryName $ fst annotation) toDoc :: Text -> Doc toDoc = string . toS diff --git a/src/Patch.hs b/src/Patch.hs index c4dcc8e69..a156f01f2 100644 --- a/src/Patch.hs +++ b/src/Patch.hs @@ -9,6 +9,7 @@ module Patch , patchSum , maybeFst , maybeSnd +, mapPatch ) where import Data.These @@ -51,6 +52,11 @@ unPatch (Replace a b) = These a b unPatch (Insert b) = That b unPatch (Delete a) = This a +mapPatch :: (a -> b) -> (a -> b) -> Patch a -> Patch b +mapPatch f _ (Delete a ) = Delete (f a) +mapPatch _ g (Insert b) = Insert (g b) +mapPatch f g (Replace a b) = Replace (f a) (g b) + -- | Calculate the cost of the patch given a function to compute the cost of a item. patchSum :: (a -> Integer) -> Patch a -> Integer patchSum termCost patch = maybe 0 termCost (before patch) + maybe 0 termCost (after patch) From 11e35ac9c6578e7a7f292af982ef1537b904b3fe Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 14:30:55 -0400 Subject: [PATCH 123/208] Only prepend summaries if the parent term has an identifier --- src/DiffSummary.hs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 1393c9cdf..9068b44ad 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -142,7 +142,21 @@ termToDiffInfo blob term = case unwrap term of termToDiffInfo' = termToDiffInfo blob prependSummary :: (HasCategory leaf, HasField fields Range, HasField fields Category) => Source Char -> Term leaf (Record fields) -> DiffSummary DiffInfo -> DiffSummary DiffInfo -prependSummary source term summary = summary { parentAnnotations = (category $ extract term, toTermName source term) : parentAnnotations summary } +prependSummary source term summary = if hasIdentifier term + then summary { parentAnnotations = (category $ extract term, toTermName source term) : parentAnnotations summary } + else summary + where hasIdentifier term = case unwrap term of + S.FunctionCall{} -> True + S.Function id _ _ -> isJust id + S.Assignment{} -> True + S.MathAssignment{} -> True + S.MemberAccess{} -> True + S.MethodCall{} -> True + S.VarAssignment{} -> True + S.SubscriptAccess{} -> True + S.Class{} -> True + S.Method{} -> True + _ -> False -- The user-facing category name of 'a'. class HasCategory a where From 4efa6f6fa5d22a8487da4cb1bac9fed59d1bd445 Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 14:37:51 -0400 Subject: [PATCH 124/208] Add parent annotation to the end of the list --- src/DiffSummary.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 9068b44ad..33693d3bd 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -143,7 +143,7 @@ termToDiffInfo blob term = case unwrap term of prependSummary :: (HasCategory leaf, HasField fields Range, HasField fields Category) => Source Char -> Term leaf (Record fields) -> DiffSummary DiffInfo -> DiffSummary DiffInfo prependSummary source term summary = if hasIdentifier term - then summary { parentAnnotations = (category $ extract term, toTermName source term) : parentAnnotations summary } + then summary { parentAnnotations = parentAnnotations summary <> [(category $ extract term, toTermName source term)] } else summary where hasIdentifier term = case unwrap term of S.FunctionCall{} -> True From 81e42933ce40cd12e582e59cd1656c29d3cb3223 Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 14:49:19 -0400 Subject: [PATCH 125/208] Just keep track of the immediate parent annotation --- src/DiffSummary.hs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 33693d3bd..766e85306 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -31,11 +31,11 @@ data Branch = BIndexed | BFixed | BCommented deriving (Show, Eq, Generic) data DiffSummary a = DiffSummary { patch :: Patch a, - parentAnnotations :: [(Category, Text)] + parentAnnotation :: Maybe (Category, Text) } deriving (Eq, Functor, Show, Generic) annotatedSummaries :: DiffSummary DiffInfo -> [Text] -annotatedSummaries DiffSummary{..} = show . (P.<> maybeParentContext parentAnnotations) <$> summaries patch +annotatedSummaries DiffSummary{..} = show . (P.<> maybeParentContext parentAnnotation) <$> summaries patch diffSummaries :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Both (Source Char) -> Diff leaf (Record fields) -> [DiffSummary DiffInfo] diffSummaries sources = para $ \diff -> @@ -45,7 +45,7 @@ diffSummaries sources = para $ \diff -> case diff of -- Skip comments and leaves since they don't have any changes (Free (_ :< syntax)) -> annotateWithCategory (toList syntax) - (Pure patch) -> [ DiffSummary (mapPatch (termToDiffInfo beforeSource) (termToDiffInfo afterSource) patch) [] ] + (Pure patch) -> [ DiffSummary (mapPatch (termToDiffInfo beforeSource) (termToDiffInfo afterSource) patch) Nothing ] where (beforeSource, afterSource) = runJoin sources @@ -115,10 +115,10 @@ toTermName source term = case unwrap term of termNameFromRange range = toText $ Source.slice range source range = characterRange . extract -maybeParentContext :: [(Category, Text)] -> Doc -maybeParentContext annotations = case annotations of - [] -> "" - (annotation:_) -> space <> "in the" <+> (toDoc $ snd annotation) <+> toDoc (toCategoryName $ fst annotation) +maybeParentContext :: Maybe (Category, Text) -> Doc +maybeParentContext = maybe "" (\annotation -> + space <> "in the" <+> (toDoc $ snd annotation) <+> toDoc (toCategoryName $ fst annotation)) + toDoc :: Text -> Doc toDoc = string . toS @@ -142,8 +142,8 @@ termToDiffInfo blob term = case unwrap term of termToDiffInfo' = termToDiffInfo blob prependSummary :: (HasCategory leaf, HasField fields Range, HasField fields Category) => Source Char -> Term leaf (Record fields) -> DiffSummary DiffInfo -> DiffSummary DiffInfo -prependSummary source term summary = if hasIdentifier term - then summary { parentAnnotations = parentAnnotations summary <> [(category $ extract term, toTermName source term)] } +prependSummary source term summary = if (isNothing $ parentAnnotation summary) && hasIdentifier term + then summary { parentAnnotation = Just (category $ extract term, toTermName source term) } else summary where hasIdentifier term = case unwrap term of S.FunctionCall{} -> True From 0bee997724ecc65887d8d58b25503268c3c4fecb Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 14:59:51 -0400 Subject: [PATCH 126/208] fix tests --- test/DiffSummarySpec.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index dc648c4bf..e3f0710e3 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -31,10 +31,10 @@ testDiff :: Diff Text (Record '[Category, Range]) testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ]) testSummary :: DiffSummary DiffInfo -testSummary = DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [] } +testSummary = DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotation = Nothing } replacementSummary :: DiffSummary DiffInfo -replacementSummary = DiffSummary { patch = Replace (LeafInfo "string" "a") (LeafInfo "symbol" "b"), parentAnnotations = [ ArrayLiteral ] } +replacementSummary = DiffSummary { patch = Replace (LeafInfo "string" "a") (LeafInfo "symbol" "b"), parentAnnotation = Just (Info.FunctionCall, "foo") } sources :: Both (Source Char) sources = both (fromText "[]") (fromText "[a]") @@ -43,7 +43,7 @@ spec :: Spec spec = parallel $ do describe "diffSummaries" $ do it "outputs a diff summary" $ do - diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotations = [ ArrayLiteral ] } ] + diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotation = Nothing } ] prop "equal terms produce identity diffs" $ \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range])) in @@ -53,7 +53,7 @@ spec = parallel $ do it "should print adds" $ annotatedSummaries testSummary `shouldBe` ["Added the 'a' string"] it "prints a replacement" $ do - annotatedSummaries replacementSummary `shouldBe` ["Replaced the 'a' string with the 'b' symbol in the array context"] + annotatedSummaries replacementSummary `shouldBe` ["Replaced the 'a' string with the 'b' symbol in the foo function call"] describe "DiffInfo" $ do prop "patches in summaries match the patches in diffs" $ \a -> let From 29536db80bdf627205463b2e6bf083523902c08b Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 15:18:22 -0400 Subject: [PATCH 127/208] Don't output filepaths when adding or deleting an empty file --- src/Renderer/Patch.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Renderer/Patch.hs b/src/Renderer/Patch.hs index 34b6f5631..1d0c82975 100644 --- a/src/Renderer/Patch.hs +++ b/src/Renderer/Patch.hs @@ -83,7 +83,7 @@ showLine source line | Just line <- line = Just . toString . (`slice` source) $ -- | Returns the header given two source blobs and a hunk. header :: Both SourceBlob -> String -header blobs = intercalate "\n" [filepathHeader, fileModeHeader, beforeFilepath, afterFilepath] <> "\n" +header blobs = intercalate "\n" ([filepathHeader, fileModeHeader] <> maybeFilepaths) <> "\n" where filepathHeader = "diff --git a/" <> pathA <> " b/" <> pathB fileModeHeader = case (modeA, modeB) of (Nothing, Just mode) -> intercalate "\n" [ "new file mode " <> modeToDigits mode, blobOidHeader ] @@ -100,8 +100,10 @@ header blobs = intercalate "\n" [filepathHeader, fileModeHeader, beforeFilepath, modeHeader ty maybeMode path = case maybeMode of Just _ -> ty <> "/" <> path Nothing -> "/dev/null" + maybeFilepaths = if (nullOid == oidA && null (snd sources)) || (nullOid == oidB && null (fst sources)) then [] else [ beforeFilepath, afterFilepath ] beforeFilepath = "--- " <> modeHeader "a" modeA pathA afterFilepath = "+++ " <> modeHeader "b" modeB pathB + sources = source <$> blobs (pathA, pathB) = runJoin $ path <$> blobs (oidA, oidB) = runJoin $ oid <$> blobs (modeA, modeB) = runJoin $ blobKind <$> blobs From 394d6186d95a9735b70b8148e58b06c7ba15eaa3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 15:22:16 -0400 Subject: [PATCH 128/208] Add a patch fixture for the dictionary case. --- test/diffs/dictionary.patch.js | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 test/diffs/dictionary.patch.js diff --git a/test/diffs/dictionary.patch.js b/test/diffs/dictionary.patch.js new file mode 100644 index 000000000..42eb7a4f9 --- /dev/null +++ b/test/diffs/dictionary.patch.js @@ -0,0 +1,10 @@ +diff --git a/test/diffs/dictionary.A.js b/test/diffs/dictionary.B.js +index .. 100644 +--- a/test/diffs/dictionary.A.js ++++ b/test/diffs/dictionary.B.js +@@ -1,5 +1,5 @@ + { +- "b": 4, ++ "b": 5, + "a": 5 + } From 7d4e6affc260fad5290be3dc0cf9713a82284417 Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 15:23:08 -0400 Subject: [PATCH 129/208] Output error text from source --- src/DiffSummary.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index 001cd79d3..f616349d3 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -63,7 +63,7 @@ toTermName source term = case unwrap term of S.Object kvs -> "{" <> intercalate ", " (toTermName' <$> kvs) <> "}" S.Pair a b -> toTermName' a <> ": " <> toTermName' b S.Return expr -> maybe "empty" toTermName' expr - S.Error span _ -> displayStartEndPos span + S.Error _ _ -> termNameFromSource term S.For _ _ -> termNameFromChildren term S.While expr _ -> toTermName' expr S.DoWhile _ expr -> toTermName' expr From 12f2300fa9803b8c1ae3fa895bce2e5c310589f2 Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 15:28:09 -0400 Subject: [PATCH 130/208] Print the error term when printing ErrorInfos --- src/DiffSummary.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DiffSummary.hs b/src/DiffSummary.hs index f616349d3..5c4b21239 100644 --- a/src/DiffSummary.hs +++ b/src/DiffSummary.hs @@ -23,7 +23,7 @@ import Source data DiffInfo = LeafInfo { categoryName :: Text, termName :: Text } | BranchInfo { branches :: [ DiffInfo ], categoryName :: Text, branchType :: Branch } - | ErrorInfo { errorSpan :: SourceSpan, categoryName :: Text } + | ErrorInfo { errorSpan :: SourceSpan, termName :: Text } deriving (Eq, Show) toTermName :: (HasCategory leaf, HasField fields Category, HasField fields Range) => Source Char -> Term leaf (Record fields) -> Text @@ -152,7 +152,7 @@ instance (Eq a, Arbitrary a) => Arbitrary (DiffSummary a) where instance P.Pretty DiffInfo where pretty LeafInfo{..} = squotes (string $ toSL termName) <+> (string $ toSL categoryName) pretty BranchInfo{..} = mconcat $ punctuate (string "," <> space) (pretty <$> branches) - pretty ErrorInfo{..} = "syntax error at" <+> (string . toSL $ displayStartEndPos errorSpan) <+> "in" <+> (string . toSL $ spanName errorSpan) + pretty ErrorInfo{..} = squotes (string $ toSL termName) <+> "at" <+> (string . toSL $ displayStartEndPos errorSpan) <+> "in" <+> (string . toSL $ spanName errorSpan) annotatedSummaries :: DiffSummary DiffInfo -> [Text] annotatedSummaries DiffSummary{..} = show . (P.<> maybeParentContext parentAnnotations) <$> summaries patch @@ -231,7 +231,7 @@ termToDiffInfo blob term = case unwrap term of -- use the term name of the operator identifier when we have that production value. Until then, I'm using a placeholder value -- to indicate where that value should be when constructing DiffInfos. Commented cs leaf -> BranchInfo (termToDiffInfo' <$> cs <> maybeToList leaf) (toCategoryName term) BCommented - S.Error sourceSpan _ -> ErrorInfo sourceSpan (toCategoryName term) + S.Error sourceSpan _ -> ErrorInfo sourceSpan (toTermName' term) _ -> LeafInfo (toCategoryName term) (toTermName' term) where toTermName' = toTermName blob termToDiffInfo' = termToDiffInfo blob From 7603390361519c771b9c0d32be21e4a999d3cf31 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:24:09 -0400 Subject: [PATCH 131/208] Constrain the term functor to be Traversable. --- src/Data/RandomWalkSimilarity.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 6259ee5b5..c16b0f918 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -62,7 +62,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } deriving (Eq, Show) -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. -pqGrams :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) +pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram q . decorateTermWithPGram p . decorateTermWithLabel getLabel @@ -93,9 +93,9 @@ decorateTermWithPGram p = ana coalgebra . (,) [] RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) -- | Replaces p,1-grams in a term’s annotations with corresponding bags of p,q-grams. -decorateTermWithPQGram :: (Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram :: Traversable f => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram q = cata algebra - where algebra :: (Prologue.Foldable f, Functor f) => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) + where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) algebra (RCons gram rest :< functor) = cofree ((gram .: rest) :< functor) -- (gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) setBases :: Gram label -> [Gram label] -> [Gram label] -> [Gram label] @@ -112,7 +112,7 @@ decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRa where normalize vec = fmap (/ vmagnitude vec) vec -- | Annotates a term with a feature vector at each node. -featureVectorDecorator :: (Hashable label, Functor f, Prologue.Foldable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) +featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector . decorateTermWithUnitVector d From 17bba1b7620ab138a6a33295a72c987cd4762cab Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:46:34 -0400 Subject: [PATCH 132/208] Add a function to retrieve the head of a non-empty record. --- src/Data/Record.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Data/Record.hs b/src/Data/Record.hs index 70646facc..a2c740b4c 100644 --- a/src/Data/Record.hs +++ b/src/Data/Record.hs @@ -18,6 +18,10 @@ infixr 0 .: (.:) :: h -> Record t -> Record (h ': t) (.:) = RCons +-- | Get the first element of a non-empty record. +rhead :: Record (head ': tail) -> head +rhead (RCons head _) = head + -- Classes From 832d8606a6ba93e0c586d68dd48dcb6fb87e6790 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:46:51 -0400 Subject: [PATCH 133/208] :fire: setBases. --- src/Data/RandomWalkSimilarity.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index c16b0f918..ef108f09e 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -98,8 +98,6 @@ decorateTermWithPQGram q = cata algebra where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) algebra (RCons gram rest :< functor) = cofree ((gram .: rest) :< functor) -- (gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) - setBases :: Gram label -> [Gram label] -> [Gram label] -> [Gram label] - setBases gram siblings rest = gram { base = padToSize q (foldMap base siblings) } : rest getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) getGrams = getField From 62e16b51cd758d8e415256a25d5082d5057f18b2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:46:56 -0400 Subject: [PATCH 134/208] :fire: getGrams. --- src/Data/RandomWalkSimilarity.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index ef108f09e..413015c33 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -98,8 +98,6 @@ decorateTermWithPQGram q = cata algebra where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) algebra (RCons gram rest :< functor) = cofree ((gram .: rest) :< functor) -- (gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) - getGrams :: HasField fields (Gram label, DList.DList (Gram label)) => Record fields -> (Gram label, DList.DList (Gram label)) - getGrams = getField -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) From 23d4a0bd56e51793c679e71224b2794dc74ae4d3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:47:00 -0400 Subject: [PATCH 135/208] :fire: windowed. --- src/Data/RandomWalkSimilarity.hs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 413015c33..53d083be6 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -66,14 +66,6 @@ pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram q . decorateTermWithPGram p . decorateTermWithLabel getLabel --- | A sliding-window fold over _n_ items of a list per iteration. -windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b -windowed n f seed = para alg - where alg xs = case xs of - Cons a (as, b) -> f a (take n $ a : as) b - Nil -> seed - - -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vector Double featureVector d bag = sumVectors $ unitDVector . hash <$> bag From 3d4ea56ff3b426e89645e14f7e071b6ba550ecc1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:47:03 -0400 Subject: [PATCH 136/208] Assign sibling labels to gram bases. --- src/Data/RandomWalkSimilarity.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 53d083be6..c7535d8bd 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -88,8 +88,10 @@ decorateTermWithPGram p = ana coalgebra . (,) [] decorateTermWithPQGram :: Traversable f => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram q = cata algebra where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) - algebra (RCons gram rest :< functor) = cofree ((gram .: rest) :< functor) - -- (gram, DList.fromList (windowed q setBases [] (fst . getGrams . extract <$> toList functor)) <> foldMap (snd . getGrams . extract) functor) + algebra (RCons gram rest :< functor) = cofree ((gram .: rest) :< (`evalState` (foldMap (base . rhead . extract) functor)) (for functor $ \ a -> case runCofree a of + RCons gram rest :< functor -> do labels <- get + put (drop 1 labels) + pure $! cofree ((gram { base = padToSize q labels } .: rest) :< functor))) -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) From 0cfb700c23a4d2d6c4c9647437b815f28cdee5b1 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:47:33 -0400 Subject: [PATCH 137/208] Pad root labels out. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index c7535d8bd..6f9ab7d92 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -88,7 +88,7 @@ decorateTermWithPGram p = ana coalgebra . (,) [] decorateTermWithPQGram :: Traversable f => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram q = cata algebra where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) - algebra (RCons gram rest :< functor) = cofree ((gram .: rest) :< (`evalState` (foldMap (base . rhead . extract) functor)) (for functor $ \ a -> case runCofree a of + algebra (RCons gram rest :< functor) = cofree ((gram { base = padToSize q (base gram) } .: rest) :< (`evalState` (foldMap (base . rhead . extract) functor)) (for functor $ \ a -> case runCofree a of RCons gram rest :< functor -> do labels <- get put (drop 1 labels) pure $! cofree ((gram { base = padToSize q labels } .: rest) :< functor))) From 076ad3622924fc1fa92a63ab14aa1abb10bbf751 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:56:06 -0400 Subject: [PATCH 138/208] :fire: some redundant parens. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 6f9ab7d92..10c309150 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -42,7 +42,7 @@ rws compare as bs featurize index term = UnmappedTerm index (getField (extract term)) term findNearestNeighbourTo kv@(UnmappedTerm _ _ v) = do (previous, unmapped) <- get - let (UnmappedTerm i _ _) = KdTree.nearest kdas kv + let UnmappedTerm i _ _ = KdTree.nearest kdas kv fromMaybe (pure (negate 1, inserting v)) $ do found <- find ((== i) . termIndex) unmapped guard (i >= previous) From dfffc15760b262d9387ed37faecb82a8d0d6c505 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 8 Aug 2016 16:56:15 -0400 Subject: [PATCH 139/208] Add a unit vector decorator. --- src/Data/RandomWalkSimilarity.hs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 10c309150..fc4130a67 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -4,6 +4,7 @@ module Data.RandomWalkSimilarity , pqGrams , featureVector , featureVectorDecorator +, unitVectorDecorator , Gram(..) ) where @@ -110,6 +111,14 @@ featureVectorDecorator getLabel p q d . decorateTermWithPGram p . decorateTermWithLabel getLabel +-- | Annotate a term with a unit vector at each node. +unitVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) +unitVectorDecorator getLabel p q d + = decorateTermWithUnitVector d + . decorateTermWithPQGram q + . decorateTermWithPGram p + . decorateTermWithLabel getLabel + -- | Pads a list of Alternative values to exactly n elements. padToSize :: Alternative f => Int -> [f a] -> [f a] padToSize n list = take n (list <> repeat empty) From 4009d547a146b41bfa083ca995dbfdf362ea2d77 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 10:51:17 -0400 Subject: [PATCH 140/208] Revert "Add a unit vector decorator." This reverts commit 94cdb43e577b4bf9381a34b1225c30163d3cac3f. --- src/Data/RandomWalkSimilarity.hs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index fc4130a67..10c309150 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -4,7 +4,6 @@ module Data.RandomWalkSimilarity , pqGrams , featureVector , featureVectorDecorator -, unitVectorDecorator , Gram(..) ) where @@ -111,14 +110,6 @@ featureVectorDecorator getLabel p q d . decorateTermWithPGram p . decorateTermWithLabel getLabel --- | Annotate a term with a unit vector at each node. -unitVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) -unitVectorDecorator getLabel p q d - = decorateTermWithUnitVector d - . decorateTermWithPQGram q - . decorateTermWithPGram p - . decorateTermWithLabel getLabel - -- | Pads a list of Alternative values to exactly n elements. padToSize :: Alternative f => Int -> [f a] -> [f a] padToSize n list = take n (list <> repeat empty) From e369b445c28ca632ecaea804045934eac64afab6 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 10:57:25 -0400 Subject: [PATCH 141/208] Simplify the unit vector summation. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 10c309150..7ca0d050e 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -95,7 +95,7 @@ decorateTermWithPQGram q = cata algebra -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) -decorateTermWithFeatureVector = cata $ \ (RCons unitVector rest :< functor) -> cofree (RCons (foldr (\ each into -> Vector.zipWith (+) (getField (extract each)) into) unitVector functor) rest :< functor) +decorateTermWithFeatureVector = cata $ \ (RCons unitVector rest :< functor) -> cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) unitVector functor) rest :< functor) decorateTermWithUnitVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) .: rest From ccd734c117d5a81b5579f58986eaaf08c29f546e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:01:46 -0400 Subject: [PATCH 142/208] Roll decorateTermWithUnitVector back into decorateTermWithFeatureVector. --- src/Data/RandomWalkSimilarity.hs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 7ca0d050e..8f71816ca 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -94,8 +94,11 @@ decorateTermWithPQGram q = cata algebra pure $! cofree ((gram { base = padToSize q labels } .: rest) :< functor))) -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. -decorateTermWithFeatureVector :: (Prologue.Foldable f, Functor f) => Cofree f (Record (Vector.Vector Double ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) -decorateTermWithFeatureVector = cata $ \ (RCons unitVector rest :< functor) -> cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) unitVector functor) rest :< functor) +decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) +decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> + let unitVector = normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) in + cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) unitVector functor) rest :< functor) + where normalize vec = fmap (/ vmagnitude vec) vec decorateTermWithUnitVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) .: rest @@ -104,8 +107,7 @@ decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRa -- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d - = decorateTermWithFeatureVector - . decorateTermWithUnitVector d + = decorateTermWithFeatureVector d . decorateTermWithPQGram q . decorateTermWithPGram p . decorateTermWithLabel getLabel From dc4b0be491973ba0092e025d76af2cc4fa7c2a0f Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:03:02 -0400 Subject: [PATCH 143/208] Extract the computation of the unit vector to the where clause. --- src/Data/RandomWalkSimilarity.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 8f71816ca..4c8831bff 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -96,9 +96,9 @@ decorateTermWithPQGram q = cata algebra -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> - let unitVector = normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) in - cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) unitVector functor) rest :< functor) + cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) (unitVector (hash gram)) functor) rest :< functor) where normalize vec = fmap (/ vmagnitude vec) vec + unitVector hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom))) decorateTermWithUnitVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) .: rest From 5798a3b03dcd0b9ac111aa069c685aa27064719c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:03:14 -0400 Subject: [PATCH 144/208] :fire: decorateTermWithUnitVector. --- src/Data/RandomWalkSimilarity.hs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 4c8831bff..bd7235f3c 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -100,10 +100,6 @@ decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> where normalize vec = fmap (/ vmagnitude vec) vec unitVector hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom))) -decorateTermWithUnitVector :: (Hashable label, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) -decorateTermWithUnitVector d = fmap $ \ (RCons gram rest) -> normalize ((`evalRand` mkQCGen (hash gram)) (sequenceA (Vector.replicate d getRandom))) .: rest - where normalize vec = fmap (/ vmagnitude vec) vec - -- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d From 71a42c2d98282d31d68a855f407b8e91239a5912 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:05:32 -0400 Subject: [PATCH 145/208] Extract unitVector to the top level. --- src/Data/RandomWalkSimilarity.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index bd7235f3c..f5827226b 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -96,9 +96,11 @@ decorateTermWithPQGram q = cata algebra -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> - cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) (unitVector (hash gram)) functor) rest :< functor) + cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor) rest :< functor) + +unitVector :: Int -> Int -> Vector.Vector Double +unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom))) where normalize vec = fmap (/ vmagnitude vec) vec - unitVector hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom))) -- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) From 638d8eda2369d8db0e539b666d0d758af5874861 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:05:35 -0400 Subject: [PATCH 146/208] :memo: unitVector. --- src/Data/RandomWalkSimilarity.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f5827226b..5b7321186 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -98,6 +98,7 @@ decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor) rest :< functor) +-- | Computes a unit vector of the specified dimension from a hash. unitVector :: Int -> Int -> Vector.Vector Double unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom))) where normalize vec = fmap (/ vmagnitude vec) vec From 8d1fb84ed331a9cab7217556fbd5d208b267d2f3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:06:27 -0400 Subject: [PATCH 147/208] Idiomatic record construction. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 5b7321186..c7a73609b 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -96,7 +96,7 @@ decorateTermWithPQGram q = cata algebra -- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> - cofree (RCons (foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor) rest :< functor) + cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor) -- | Computes a unit vector of the specified dimension from a hash. unitVector :: Int -> Int -> Vector.Vector Double From c63721cfd405261aa8fc6a2bbdb1695fc0eea1bb Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:07:58 -0400 Subject: [PATCH 148/208] decorateTermWithPQGram calls out to decorateTermWithPGram. --- src/Data/RandomWalkSimilarity.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index c7a73609b..255b46495 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -63,7 +63,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) -pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram q . decorateTermWithPGram p . decorateTermWithLabel getLabel +pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram p q . decorateTermWithLabel getLabel -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. @@ -85,8 +85,8 @@ decorateTermWithPGram p = ana coalgebra . (,) [] RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) -- | Replaces p,1-grams in a term’s annotations with corresponding bags of p,q-grams. -decorateTermWithPQGram :: Traversable f => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram q = cata algebra +decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) algebra (RCons gram rest :< functor) = cofree ((gram { base = padToSize q (base gram) } .: rest) :< (`evalState` (foldMap (base . rhead . extract) functor)) (for functor $ \ a -> case runCofree a of RCons gram rest :< functor -> do labels <- get @@ -107,8 +107,7 @@ unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.repl featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector d - . decorateTermWithPQGram q - . decorateTermWithPGram p + . decorateTermWithPQGram p q . decorateTermWithLabel getLabel -- | Pads a list of Alternative values to exactly n elements. From c484a2c8fc4557145f81b1e8c2e928f290721a11 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:08:15 -0400 Subject: [PATCH 149/208] Correct the :memo: for decorateTermWithPQGram. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 255b46495..619a99ab1 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -84,7 +84,7 @@ decorateTermWithPGram p = ana coalgebra . (,) [] coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) --- | Replaces p,1-grams in a term’s annotations with corresponding bags of p,q-grams. +-- | Replaces labels in a term’s annotations with corresponding bags of p,q-grams. decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) From d255e1680819179a91ef6d13e9be07ebc7acd50c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:08:44 -0400 Subject: [PATCH 150/208] Correct the :memo: for decorateTermWithFeatureVector. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 619a99ab1..1c3d6c5f2 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -93,7 +93,7 @@ decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p put (drop 1 labels) pure $! cofree ((gram { base = padToSize q labels } .: rest) :< functor))) --- | Replaces bags of p,q-grams in a term’s annotations with corresponding feature vectors. +-- | Replaces a p,q-gram at the head of a term’s annotation with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor) From 6ab3e09c257c4376951f641baf4027fdd45b2ecd Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:09:34 -0400 Subject: [PATCH 151/208] Implement featureVector in terms of the top-level unitVector binding. --- src/Data/RandomWalkSimilarity.hs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 1c3d6c5f2..dc06cdfde 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -68,10 +68,8 @@ pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram p q . -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vector Double -featureVector d bag = sumVectors $ unitDVector . hash <$> bag - where unitDVector hash = normalize . (`evalRand` mkQCGen hash) $ Prologue.sequence (Vector.replicate d getRandom) - normalize vec = fmap (/ vmagnitude vec) vec - sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) +featureVector d bag = sumVectors $ unitVector d . hash <$> bag + where sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) -- | Annotates a term with a label at each node. decorateTermWithLabel :: Functor f => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) From 0cd9fa9e340d410123732b40fd51bce9cf0244e2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 11:10:04 -0400 Subject: [PATCH 152/208] Correct the :memo: for decorateTermWithPQGram. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index dc06cdfde..ec7dbb586 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -82,7 +82,7 @@ decorateTermWithPGram p = ana coalgebra . (,) [] coalgebra (parentLabels, c) = case extract c of RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) --- | Replaces labels in a term’s annotations with corresponding bags of p,q-grams. +-- | Replaces labels in a term’s annotations with corresponding p,q-grams. decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) From 4ad79e70d8d7e1bcc150b88cbcd8db2e94683967 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 13:07:49 -0400 Subject: [PATCH 153/208] Extract the computation of the set of sibling labels. --- src/Data/RandomWalkSimilarity.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index ec7dbb586..270815b9e 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -86,10 +86,12 @@ decorateTermWithPGram p = ana coalgebra . (,) [] decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) - algebra (RCons gram rest :< functor) = cofree ((gram { base = padToSize q (base gram) } .: rest) :< (`evalState` (foldMap (base . rhead . extract) functor)) (for functor $ \ a -> case runCofree a of + algebra (RCons gram rest :< functor) = cofree ((gram { base = padToSize q (base gram) } .: rest) :< (`evalState` (siblingLabels functor)) (for functor $ \ a -> case runCofree a of RCons gram rest :< functor -> do labels <- get put (drop 1 labels) pure $! cofree ((gram { base = padToSize q labels } .: rest) :< functor))) + siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label] + siblingLabels = foldMap (base . rhead . extract) -- | Replaces a p,q-gram at the head of a term’s annotation with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) From 45fce4dffd0c61f4dced39a3670829aeffc6099d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 13:11:10 -0400 Subject: [PATCH 154/208] Extract a function setting gram bases. --- src/Data/RandomWalkSimilarity.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 270815b9e..c627b0d65 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -86,12 +86,14 @@ decorateTermWithPGram p = ana coalgebra . (,) [] decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) - algebra (RCons gram rest :< functor) = cofree ((gram { base = padToSize q (base gram) } .: rest) :< (`evalState` (siblingLabels functor)) (for functor $ \ a -> case runCofree a of + algebra (RCons gram rest :< functor) = cofree ((setBase gram (base gram) .: rest) :< (`evalState` (siblingLabels functor)) (for functor $ \ a -> case runCofree a of RCons gram rest :< functor -> do labels <- get put (drop 1 labels) - pure $! cofree ((gram { base = padToSize q labels } .: rest) :< functor))) + pure $! cofree ((setBase gram labels .: rest) :< functor))) siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label] siblingLabels = foldMap (base . rhead . extract) + setBase :: Gram label -> [Maybe label] -> Gram label + setBase gram labels = gram { base = padToSize q labels } -- | Replaces a p,q-gram at the head of a term’s annotation with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) From 0cfa23fcc200d977e639c6a0e3fe6172fbf97f02 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 14:21:54 -0400 Subject: [PATCH 155/208] Extract the assignment of siblings. --- src/Data/RandomWalkSimilarity.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index c627b0d65..898d6e601 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -86,10 +86,13 @@ decorateTermWithPGram p = ana coalgebra . (,) [] decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) - algebra (RCons gram rest :< functor) = cofree ((setBase gram (base gram) .: rest) :< (`evalState` (siblingLabels functor)) (for functor $ \ a -> case runCofree a of - RCons gram rest :< functor -> do labels <- get - put (drop 1 labels) - pure $! cofree ((setBase gram labels .: rest) :< functor))) + algebra (RCons gram rest :< functor) = cofree ((setBase gram (base gram) .: rest) :< (`evalState` (siblingLabels functor)) (for functor assignSiblings)) + assignSiblings :: Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) + assignSiblings a = case runCofree a of + RCons gram rest :< functor -> do + labels <- get + put (drop 1 labels) + pure $! cofree ((setBase gram labels .: rest) :< functor) siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label] siblingLabels = foldMap (base . rhead . extract) setBase :: Gram label -> [Maybe label] -> Gram label From b966b56494e52c9c37795b2ba2c76743566aa49a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 14:39:00 -0400 Subject: [PATCH 156/208] Move vmagnitude into unitVector. --- src/Data/RandomWalkSimilarity.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 898d6e601..aa6e92c7b 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -107,6 +107,7 @@ decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> unitVector :: Int -> Int -> Vector.Vector Double unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom))) where normalize vec = fmap (/ vmagnitude vec) vec + vmagnitude = sqrtDouble . Vector.sum . fmap (** 2) -- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) @@ -119,10 +120,6 @@ featureVectorDecorator getLabel p q d padToSize :: Alternative f => Int -> [f a] -> [f a] padToSize n list = take n (list <> repeat empty) --- | The magnitude of a Euclidean vector, i.e. its distance from the origin. -vmagnitude :: Vector.Vector Double -> Double -vmagnitude = sqrtDouble . Vector.sum . fmap (** 2) - -- Instances From 3cbba53c184368ab341d2b289703409e2559324e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:00:04 -0400 Subject: [PATCH 157/208] Assign the stem in decorateTermWithPQGram. --- src/Data/RandomWalkSimilarity.hs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index aa6e92c7b..f3e1b47d2 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -84,19 +84,17 @@ decorateTermWithPGram p = ana coalgebra . (,) [] -- | Replaces labels in a term’s annotations with corresponding p,q-grams. decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram p q = cata algebra . decorateTermWithPGram p - where algebra :: Traversable f => CofreeF f (Record (Gram label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) - algebra (RCons gram rest :< functor) = cofree ((setBase gram (base gram) .: rest) :< (`evalState` (siblingLabels functor)) (for functor assignSiblings)) - assignSiblings :: Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) - assignSiblings a = case runCofree a of +decorateTermWithPQGram p q = cata algebra + where algebra :: Traversable f => CofreeF f (Record (label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) + algebra (RCons label rest :< functor) = cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label))) + assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) + assignLabels label a = case runCofree a of RCons gram rest :< functor -> do labels <- get put (drop 1 labels) - pure $! cofree ((setBase gram labels .: rest) :< functor) + pure $! cofree ((gram { stem = padToSize p (Just label : stem gram), base = padToSize q labels } .: rest) :< functor) siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label] siblingLabels = foldMap (base . rhead . extract) - setBase :: Gram label -> [Maybe label] -> Gram label - setBase gram labels = gram { base = padToSize q labels } -- | Replaces a p,q-gram at the head of a term’s annotation with corresponding feature vectors. decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) From bf3b8fdc3f7042b7c91caa4c9517d80827fcddd5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:00:19 -0400 Subject: [PATCH 158/208] :fire: decorateTermWithPGram. --- src/Data/RandomWalkSimilarity.hs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f3e1b47d2..60dde3057 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -75,13 +75,6 @@ featureVector d bag = sumVectors $ unitVector d . hash <$> bag decorateTermWithLabel :: Functor f => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) --- | Replaces labels in a term’s annotations with corresponding p,1-grams. -decorateTermWithPGram :: Functor f => Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPGram p = ana coalgebra . (,) [] - where coalgebra :: Functor f => ([Maybe label], Cofree f (Record (label ': fields))) -> CofreeF f (Record (Gram label ': fields)) ([Maybe label], Cofree f (Record (label ': fields))) - coalgebra (parentLabels, c) = case extract c of - RCons label rest -> (Gram (padToSize p parentLabels) (pure (Just label)) .: rest) :< fmap ((,) (padToSize p (Just label : parentLabels))) (unwrap c) - -- | Replaces labels in a term’s annotations with corresponding p,q-grams. decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram p q = cata algebra From 404c0b7fd5a719555f3c981b26d4b84aa2c0cf8d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:02:54 -0400 Subject: [PATCH 159/208] Rename decorateTermWithLabel to labelDecorator. --- src/Data/RandomWalkSimilarity.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 60dde3057..0bfa12c2f 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -63,7 +63,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) -pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram p q . decorateTermWithLabel getLabel +pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram p q . labelDecorator getLabel -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. @@ -72,8 +72,8 @@ featureVector d bag = sumVectors $ unitVector d . hash <$> bag where sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) -- | Annotates a term with a label at each node. -decorateTermWithLabel :: Functor f => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) -decorateTermWithLabel getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) +labelDecorator :: Functor f => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) +labelDecorator getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) -- | Replaces labels in a term’s annotations with corresponding p,q-grams. decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) @@ -105,7 +105,7 @@ featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector d . decorateTermWithPQGram p q - . decorateTermWithLabel getLabel + . labelDecorator getLabel -- | Pads a list of Alternative values to exactly n elements. padToSize :: Alternative f => Int -> [f a] -> [f a] From 6012dfd3547a713f3078f171580265ab9c088eb6 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:05:42 -0400 Subject: [PATCH 160/208] Compute the label in the p,q-gram decorator. --- src/Data/RandomWalkSimilarity.hs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 0bfa12c2f..a80d32414 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -63,7 +63,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) -pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram p q . labelDecorator getLabel +pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram getLabel p q -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. @@ -76,10 +76,9 @@ labelDecorator :: Functor f => (forall b. CofreeF f (Record fields) b -> label) labelDecorator getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) -- | Replaces labels in a term’s annotations with corresponding p,q-grams. -decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram p q = cata algebra - where algebra :: Traversable f => CofreeF f (Record (label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) - algebra (RCons label rest :< functor) = cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label))) +decorateTermWithPQGram :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) +decorateTermWithPQGram getLabel p q = cata algebra + where algebra (rest :< functor) = let label = getLabel (rest :< functor) in cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label))) assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) assignLabels label a = case runCofree a of RCons gram rest :< functor -> do @@ -104,8 +103,7 @@ unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.repl featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector d - . decorateTermWithPQGram p q - . labelDecorator getLabel + . decorateTermWithPQGram getLabel p q -- | Pads a list of Alternative values to exactly n elements. padToSize :: Alternative f => Int -> [f a] -> [f a] From b89892d0e1ae516b7ee23292cacd154de8a380fd Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:06:12 -0400 Subject: [PATCH 161/208] Rename decorateTermWithPQGram to pqGramDecorator. --- src/Data/RandomWalkSimilarity.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index a80d32414..9f833d8c9 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -63,7 +63,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) -pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram getLabel p q +pqGrams getLabel p q = foldMap (pure . getField) . pqGramDecorator getLabel p q -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. @@ -76,8 +76,8 @@ labelDecorator :: Functor f => (forall b. CofreeF f (Record fields) b -> label) labelDecorator getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) -- | Replaces labels in a term’s annotations with corresponding p,q-grams. -decorateTermWithPQGram :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) -decorateTermWithPQGram getLabel p q = cata algebra +pqGramDecorator :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) +pqGramDecorator getLabel p q = cata algebra where algebra (rest :< functor) = let label = getLabel (rest :< functor) in cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label))) assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) assignLabels label a = case runCofree a of @@ -103,7 +103,7 @@ unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.repl featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d = decorateTermWithFeatureVector d - . decorateTermWithPQGram getLabel p q + . pqGramDecorator getLabel p q -- | Pads a list of Alternative values to exactly n elements. padToSize :: Alternative f => Int -> [f a] -> [f a] From 8e84a9026fa4ec82d2197d469da59c6b5277dc36 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:06:24 -0400 Subject: [PATCH 162/208] :fire: labelDecorator. --- src/Data/RandomWalkSimilarity.hs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 9f833d8c9..328cb1586 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -71,10 +71,6 @@ featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vec featureVector d bag = sumVectors $ unitVector d . hash <$> bag where sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) --- | Annotates a term with a label at each node. -labelDecorator :: Functor f => (forall b. CofreeF f (Record fields) b -> label) -> Cofree f (Record fields) -> Cofree f (Record (label ': fields)) -labelDecorator getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) - -- | Replaces labels in a term’s annotations with corresponding p,q-grams. pqGramDecorator :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) pqGramDecorator getLabel p q = cata algebra From e0087f535ac33fcd693ee61659bba1a4fde18107 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:07:23 -0400 Subject: [PATCH 163/208] Correct the :memo: for pqGramDecorator. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 328cb1586..4b1e25190 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -71,7 +71,7 @@ featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vec featureVector d bag = sumVectors $ unitVector d . hash <$> bag where sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) --- | Replaces labels in a term’s annotations with corresponding p,q-grams. +-- | Annotates a term with the corresponding p,q-gram at each node. pqGramDecorator :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) pqGramDecorator getLabel p q = cata algebra where algebra (rest :< functor) = let label = getLabel (rest :< functor) in cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label))) From 89aa3dc96e39dffa1f5ab37503b2ed1ba49ef3c8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:08:03 -0400 Subject: [PATCH 164/208] Roll decorateTermWithFeatureVector into featureVectorDecorator. --- src/Data/RandomWalkSimilarity.hs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 4b1e25190..6a7fe2241 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -84,11 +84,6 @@ pqGramDecorator getLabel p q = cata algebra siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label] siblingLabels = foldMap (base . rhead . extract) --- | Replaces a p,q-gram at the head of a term’s annotation with corresponding feature vectors. -decorateTermWithFeatureVector :: (Hashable label, Prologue.Foldable f, Functor f) => Int -> Cofree f (Record (Gram label ': fields)) -> Cofree f (Record (Vector.Vector Double ': fields)) -decorateTermWithFeatureVector d = cata $ \ (RCons gram rest :< functor) -> - cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor) - -- | Computes a unit vector of the specified dimension from a hash. unitVector :: Int -> Int -> Vector.Vector Double unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.replicate d getRandom))) @@ -98,7 +93,8 @@ unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.repl -- | Annotates a term with a feature vector at each node. featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator getLabel p q d - = decorateTermWithFeatureVector d + = cata (\ (RCons gram rest :< functor) -> + cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor)) . pqGramDecorator getLabel p q -- | Pads a list of Alternative values to exactly n elements. From 50c52af354fca2f344556b6135748f99da8e85e3 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:09:23 -0400 Subject: [PATCH 165/208] Define padToSize in pqGramDecorator. --- src/Data/RandomWalkSimilarity.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 6a7fe2241..23a022d31 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -83,6 +83,7 @@ pqGramDecorator getLabel p q = cata algebra pure $! cofree ((gram { stem = padToSize p (Just label : stem gram), base = padToSize q labels } .: rest) :< functor) siblingLabels :: Traversable f => f (Cofree f (Record (Gram label ': fields))) -> [Maybe label] siblingLabels = foldMap (base . rhead . extract) + padToSize n list = take n (list <> repeat empty) -- | Computes a unit vector of the specified dimension from a hash. unitVector :: Int -> Int -> Vector.Vector Double @@ -97,10 +98,6 @@ featureVectorDecorator getLabel p q d cofree ((foldr (Vector.zipWith (+) . getField . extract) (unitVector d (hash gram)) functor .: rest) :< functor)) . pqGramDecorator getLabel p q --- | Pads a list of Alternative values to exactly n elements. -padToSize :: Alternative f => Int -> [f a] -> [f a] -padToSize n list = take n (list <> repeat empty) - -- Instances From 0facfdbb7f96f6e8900a8022715b833498e272e2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:10:56 -0400 Subject: [PATCH 166/208] Reformat the algebra. --- src/Data/RandomWalkSimilarity.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 23a022d31..f60f76631 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -74,7 +74,8 @@ featureVector d bag = sumVectors $ unitVector d . hash <$> bag -- | Annotates a term with the corresponding p,q-gram at each node. pqGramDecorator :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) pqGramDecorator getLabel p q = cata algebra - where algebra (rest :< functor) = let label = getLabel (rest :< functor) in cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label))) + where algebra term = let label = getLabel term in + cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: headF term) :< (`evalState` (siblingLabels (tailF term))) (for (tailF term) (assignLabels label))) assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) assignLabels label a = case runCofree a of RCons gram rest :< functor -> do From 55afdaba17dc827fdd40e1bbd3fe49c222252d57 Mon Sep 17 00:00:00 2001 From: joshvera Date: Mon, 8 Aug 2016 15:46:12 -0400 Subject: [PATCH 167/208] Add new file patch test --- test/diffs/empty-file.B.js | 0 test/diffs/empty-file.patch.js | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 test/diffs/empty-file.B.js create mode 100644 test/diffs/empty-file.patch.js diff --git a/test/diffs/empty-file.B.js b/test/diffs/empty-file.B.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/diffs/empty-file.patch.js b/test/diffs/empty-file.patch.js new file mode 100644 index 000000000..482540dcc --- /dev/null +++ b/test/diffs/empty-file.patch.js @@ -0,0 +1,3 @@ +diff --git a/test/diffs/empty-file.B.js b/test/diffs/empty-file.B.js +new file mode 100644 +index .. From 46b1a7385eb4e77a83082c664e899cd8bed1790a Mon Sep 17 00:00:00 2001 From: joshvera Date: Tue, 9 Aug 2016 15:50:22 -0400 Subject: [PATCH 168/208] Include protolude for Unsafe in tests --- semantic-diff.cabal | 1 + 1 file changed, 1 insertion(+) diff --git a/semantic-diff.cabal b/semantic-diff.cabal index 277247b9b..f05e47239 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -129,6 +129,7 @@ test-suite semantic-diff-test , free , recursion-schemes >= 4.1 , wl-pprint-text + , protolude if os(darwin) ghc-options: -threaded -rtsopts -with-rtsopts=-N -j else From 5aac311a884934c6ec676a7ec0046baccf65ed76 Mon Sep 17 00:00:00 2001 From: joshvera Date: Tue, 9 Aug 2016 15:50:40 -0400 Subject: [PATCH 169/208] Add emptySourceBlob and sourceBlob functions to Source --- src/Source.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Source.hs b/src/Source.hs index efa4e80f7..56d9cea26 100644 --- a/src/Source.hs +++ b/src/Source.hs @@ -30,6 +30,11 @@ modeToDigits (SymlinkBlob mode) = showOct mode "" defaultPlainBlob :: SourceKind defaultPlainBlob = PlainBlob 0o100644 +emptySourceBlob :: FilePath -> SourceBlob +emptySourceBlob filepath = SourceBlob (Source.fromList "") Source.nullOid filepath Nothing + +sourceBlob :: Source Char -> FilePath -> SourceBlob +sourceBlob source filepath = SourceBlob source Source.nullOid filepath (Just defaultPlainBlob) -- | Map blobs with Nothing blobKind to empty blobs. idOrEmptySourceBlob :: SourceBlob -> SourceBlob From 07dfb0f5d88fcb0a6fb760ddcc93febc5c74d5c8 Mon Sep 17 00:00:00 2001 From: joshvera Date: Tue, 9 Aug 2016 15:51:01 -0400 Subject: [PATCH 170/208] handle missing files in CorpusSpec --- test/CorpusSpec.hs | 61 ++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 67aaabd7e..92de8005d 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -1,7 +1,7 @@ {-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-} module CorpusSpec where -import Data.String +import Unsafe (unsafeFromJust) import Diffing import Renderer import qualified Renderer.JSON as J @@ -11,14 +11,11 @@ import qualified Renderer.Split as Split import Category import Control.DeepSeq import Data.Functor.Both -import Data.List as List -import Data.Map as Map import Data.Record -import Data.Set as Set +import Data.List (union) import qualified Data.Text as T import Info -import Prologue hiding (fst, snd) -import Range +import Prologue hiding (fst, snd, lookup) import qualified Source as S import System.FilePath import System.FilePath.Glob @@ -37,32 +34,36 @@ spec = parallel $ do examples "test/diffs/" `shouldNotReturn` [] where - runTestsIn :: FilePath -> (Verbatim -> Verbatim -> Expectation) -> SpecWith () + runTestsIn :: FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> SpecWith () runTestsIn directory matcher = do paths <- runIO $ examples directory let tests = correctTests =<< paths - traverse_ (\ (formatName, renderer, paths, output) -> it (normalizeName (fst paths) ++ " (" ++ formatName ++ ")") $ testDiff renderer paths output matcher) tests - - correctTests paths@(_, Nothing, Nothing, Nothing) = testsForPaths paths - correctTests paths = List.filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths - testsForPaths (paths, json, patch, split) = [ ("json", J.json, paths, json), ("patch", P.patch, paths, patch), ("split", Split.split, paths, split) ] + traverse_ (\ (formatName, renderer, paths, output) -> + it (maybe "/dev/null" normalizeName (fst paths) ++ " (" ++ formatName ++ ")") $ testDiff renderer paths output matcher) tests + correctTests paths@(_, _, Nothing, Nothing, Nothing) = testsForPaths paths + correctTests paths = filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths + testsForPaths (aPath, bPath, json, patch, split) = [ ("json", J.json, paths, json), ("patch", P.patch, paths, patch), ("split", Split.split, paths, split) ] + where paths = both aPath bPath -- | Return all the examples from the given directory. Examples are expected to -- | have the form "foo.A.js", "foo.B.js", "foo.patch.js". Diffs are not -- | required as the test may be verifying that the inputs don't crash. -examples :: FilePath -> IO [(Both FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath)] +examples :: FilePath -> IO [(Maybe FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath)] examples directory = do - as <- toDict <$> globFor "*.A.*" - bs <- toDict <$> globFor "*.B.*" - jsons <- toDict <$> globFor "*.json.*" - patches <- toDict <$> globFor "*.patch.*" - splits <- toDict <$> globFor "*.split.*" - let keys = Set.unions $ keysSet <$> [as, bs] - pure $ (\name -> (both (as ! name) (bs ! name), Map.lookup name jsons, Map.lookup name patches, Map.lookup name splits)) <$> sort (Set.toList keys) + as <- globFor "*.A.*" + bs <- globFor "*.B.*" + jsons <- globFor "*.json.*" + patches <- globFor "*.patch.*" + splits <- globFor "*.split.*" + + let lookupName name = (lookupNormalized name as, lookupNormalized name bs, lookupNormalized name jsons, lookupNormalized name patches, lookupNormalized name splits) + + let keys = normalizeName <$> union as bs + pure $ lookupName <$> keys where + lookupNormalized name = find $ (== name) . normalizeName globFor :: FilePath -> IO [FilePath] globFor p = globDir1 (compile p) directory - toDict list = Map.fromList ((normalizeName <$> list) `zip` list) -- | Given a test name like "foo.A.js", return "foo.js". normalizeName :: FilePath -> FilePath @@ -71,17 +72,23 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte -- | Given file paths for A, B, and, optionally, a diff, return whether diffing -- | the files will produce the diff. If no diff is provided, then the result -- | is true, but the diff will still be calculated. -testDiff :: Renderer (Record '[Range, Category, Cost]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation +testDiff :: Renderer (Record '[Range, Category, Cost]) -> Both (Maybe FilePath) -> Maybe FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> Expectation testDiff renderer paths diff matcher = do - sources <- sequence $ readAndTranscodeFile <$> paths - actual <- Verbatim <$> diffFiles parser renderer (sourceBlobs sources) + sources <- traverse (traverse readAndTranscodeFile) paths + actual <- fmap Verbatim <$> traverse (diffFiles' sources) parser case diff of Nothing -> matcher actual actual Just file -> do expected <- Verbatim <$> readFile file - matcher actual expected - where parser = parserForFilepath (fst paths) - sourceBlobs sources = pure S.SourceBlob <*> sources <*> pure mempty <*> paths <*> pure (Just S.defaultPlainBlob) + matcher actual (Just expected) + where diffFiles' sources parser = diffFiles parser renderer (sourceBlobs sources paths) + parser = parserForFilepath <$> runBothWith (<|>) paths + sourceBlobs :: Both (Maybe (S.Source Char)) -> Both (Maybe FilePath) -> Both S.SourceBlob + sourceBlobs sources paths = case runJoin paths of + (Nothing, Nothing) -> Join (S.emptySourceBlob "", S.emptySourceBlob "") + (Nothing, Just filepath) -> Join (S.emptySourceBlob "", S.sourceBlob (unsafeFromJust $ snd sources) filepath) + (Just filepath, Nothing) -> Join (S.sourceBlob (unsafeFromJust $ fst sources) filepath, S.emptySourceBlob "") + (Just path1, Just path2) -> Join (S.sourceBlob (unsafeFromJust $ fst sources) path1, S.sourceBlob (unsafeFromJust $ snd sources) path2) -- | A wrapper around `Text` with a more readable `Show` instance. newtype Verbatim = Verbatim Text From c286b39146b3924bfb7f3254632252945652a3b8 Mon Sep 17 00:00:00 2001 From: joshvera Date: Tue, 9 Aug 2016 16:01:25 -0400 Subject: [PATCH 171/208] Fix oids --- test/diffs/dictionary.json.js | 2 +- test/diffs/jquery.patch.js | 2 +- test/diffs/multiple-hunks.patch.js | 2 +- test/diffs/newline-at-eof.json.js | 2 +- test/diffs/newline-at-eof.patch.js | 2 +- test/diffs/no-newline-at-eof.json.js | 2 +- test/diffs/no-newline-at-eof.patch.js | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test/diffs/dictionary.json.js b/test/diffs/dictionary.json.js index b7f2f522c..3c1907633 100644 --- a/test/diffs/dictionary.json.js +++ b/test/diffs/dictionary.json.js @@ -1 +1 @@ -{"rows":[[{"number":1,"terms":[{"range":[0,2],"category":"Program","children":[{"range":[0,2],"category":"ExpressionStatements","children":[{"range":[0,2],"category":"Object","children":[]}]}]}],"range":[0,2],"hasChanges":false},{"number":1,"terms":[{"range":[0,2],"category":"Program","children":[{"range":[0,2],"category":"ExpressionStatements","children":[{"range":[0,2],"category":"Object","children":[]}]}]}],"range":[0,2],"hasChanges":false}],[{"number":2,"terms":[{"range":[2,12],"category":"Program","children":[{"range":[2,12],"category":"ExpressionStatements","children":[{"range":[2,12],"category":"Object","children":[{"range":[4,10],"category":"Pair","children":[{"range":[4,7],"category":"StringLiteral","children":[{"range":[4,5],"category":"StringLiteral"},{"range":[5,6],"category":"StringLiteral"},{"range":[6,7],"category":"StringLiteral"}]},{"patch":"replace","range":[9,10],"category":"number"}]}]}]}]}],"range":[2,12],"hasChanges":true},{"number":2,"terms":[{"range":[2,12],"category":"Program","children":[{"range":[2,12],"category":"ExpressionStatements","children":[{"range":[2,12],"category":"Object","children":[{"range":[4,10],"category":"Pair","children":[{"range":[4,7],"category":"StringLiteral","children":[{"range":[4,5],"category":"StringLiteral"},{"range":[5,6],"category":"StringLiteral"},{"range":[6,7],"category":"StringLiteral"}]},{"patch":"replace","range":[9,10],"category":"number"}]}]}]}]}],"range":[2,12],"hasChanges":true}],[{"number":3,"terms":[{"range":[12,21],"category":"Program","children":[{"range":[12,21],"category":"ExpressionStatements","children":[{"range":[12,21],"category":"Object","children":[{"range":[14,20],"category":"Pair","children":[{"range":[14,17],"category":"StringLiteral","children":[{"range":[14,15],"category":"StringLiteral"},{"range":[15,16],"category":"StringLiteral"},{"range":[16,17],"category":"StringLiteral"}]},{"range":[19,20],"category":"number"}]}]}]}]}],"range":[12,21],"hasChanges":false},{"number":3,"terms":[{"range":[12,21],"category":"Program","children":[{"range":[12,21],"category":"ExpressionStatements","children":[{"range":[12,21],"category":"Object","children":[{"range":[14,20],"category":"Pair","children":[{"range":[14,17],"category":"StringLiteral","children":[{"range":[14,15],"category":"StringLiteral"},{"range":[15,16],"category":"StringLiteral"},{"range":[16,17],"category":"StringLiteral"}]},{"range":[19,20],"category":"number"}]}]}]}]}],"range":[12,21],"hasChanges":false}],[{"number":4,"terms":[{"range":[21,23],"category":"Program","children":[{"range":[21,23],"category":"ExpressionStatements","children":[{"range":[21,22],"category":"Object","children":[]}]}]}],"range":[21,23],"hasChanges":false},{"number":4,"terms":[{"range":[21,23],"category":"Program","children":[{"range":[21,23],"category":"ExpressionStatements","children":[{"range":[21,22],"category":"Object","children":[]}]}]}],"range":[21,23],"hasChanges":false}],[{"number":5,"terms":[{"range":[23,23],"category":"Program","children":[]}],"range":[23,23],"hasChanges":false},{"number":5,"terms":[{"range":[23,23],"category":"Program","children":[]}],"range":[23,23],"hasChanges":false}]],"oids":["",""],"paths":["test/diffs/dictionary.A.js","test/diffs/dictionary.B.js"]} \ No newline at end of file +{"rows":[[{"number":1,"terms":[{"range":[0,2],"category":"Program","children":[{"range":[0,2],"category":"ExpressionStatements","children":[{"range":[0,2],"category":"Object","children":[]}]}]}],"range":[0,2],"hasChanges":false},{"number":1,"terms":[{"range":[0,2],"category":"Program","children":[{"range":[0,2],"category":"ExpressionStatements","children":[{"range":[0,2],"category":"Object","children":[]}]}]}],"range":[0,2],"hasChanges":false}],[{"number":2,"terms":[{"range":[2,12],"category":"Program","children":[{"range":[2,12],"category":"ExpressionStatements","children":[{"range":[2,12],"category":"Object","children":[{"range":[4,10],"category":"Pair","children":[{"range":[4,7],"category":"StringLiteral","children":[{"range":[4,5],"category":"StringLiteral"},{"range":[5,6],"category":"StringLiteral"},{"range":[6,7],"category":"StringLiteral"}]},{"patch":"replace","range":[9,10],"category":"number"}]}]}]}]}],"range":[2,12],"hasChanges":true},{"number":2,"terms":[{"range":[2,12],"category":"Program","children":[{"range":[2,12],"category":"ExpressionStatements","children":[{"range":[2,12],"category":"Object","children":[{"range":[4,10],"category":"Pair","children":[{"range":[4,7],"category":"StringLiteral","children":[{"range":[4,5],"category":"StringLiteral"},{"range":[5,6],"category":"StringLiteral"},{"range":[6,7],"category":"StringLiteral"}]},{"patch":"replace","range":[9,10],"category":"number"}]}]}]}]}],"range":[2,12],"hasChanges":true}],[{"number":3,"terms":[{"range":[12,21],"category":"Program","children":[{"range":[12,21],"category":"ExpressionStatements","children":[{"range":[12,21],"category":"Object","children":[{"range":[14,20],"category":"Pair","children":[{"range":[14,17],"category":"StringLiteral","children":[{"range":[14,15],"category":"StringLiteral"},{"range":[15,16],"category":"StringLiteral"},{"range":[16,17],"category":"StringLiteral"}]},{"range":[19,20],"category":"number"}]}]}]}]}],"range":[12,21],"hasChanges":false},{"number":3,"terms":[{"range":[12,21],"category":"Program","children":[{"range":[12,21],"category":"ExpressionStatements","children":[{"range":[12,21],"category":"Object","children":[{"range":[14,20],"category":"Pair","children":[{"range":[14,17],"category":"StringLiteral","children":[{"range":[14,15],"category":"StringLiteral"},{"range":[15,16],"category":"StringLiteral"},{"range":[16,17],"category":"StringLiteral"}]},{"range":[19,20],"category":"number"}]}]}]}]}],"range":[12,21],"hasChanges":false}],[{"number":4,"terms":[{"range":[21,23],"category":"Program","children":[{"range":[21,23],"category":"ExpressionStatements","children":[{"range":[21,22],"category":"Object","children":[]}]}]}],"range":[21,23],"hasChanges":false},{"number":4,"terms":[{"range":[21,23],"category":"Program","children":[{"range":[21,23],"category":"ExpressionStatements","children":[{"range":[21,22],"category":"Object","children":[]}]}]}],"range":[21,23],"hasChanges":false}],[{"number":5,"terms":[{"range":[23,23],"category":"Program","children":[]}],"range":[23,23],"hasChanges":false},{"number":5,"terms":[{"range":[23,23],"category":"Program","children":[]}],"range":[23,23],"hasChanges":false}]],"oids":["0000000000000000000000000000000000000000","0000000000000000000000000000000000000000"],"paths":["test/diffs/dictionary.A.js","test/diffs/dictionary.B.js"]} \ No newline at end of file diff --git a/test/diffs/jquery.patch.js b/test/diffs/jquery.patch.js index 3295847c0..c71be0b2c 100644 --- a/test/diffs/jquery.patch.js +++ b/test/diffs/jquery.patch.js @@ -1,5 +1,5 @@ diff --git a/test/diffs/jquery.A.js b/test/diffs/jquery.B.js -index .. 100644 +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644 --- a/test/diffs/jquery.A.js +++ b/test/diffs/jquery.B.js @@ -3,7 +3,7 @@ diff --git a/test/diffs/multiple-hunks.patch.js b/test/diffs/multiple-hunks.patch.js index 2fd80cb99..e923eb62e 100644 --- a/test/diffs/multiple-hunks.patch.js +++ b/test/diffs/multiple-hunks.patch.js @@ -1,5 +1,5 @@ diff --git a/test/diffs/multiple-hunks.A.js b/test/diffs/multiple-hunks.B.js -index .. 100644 +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644 --- a/test/diffs/multiple-hunks.A.js +++ b/test/diffs/multiple-hunks.B.js @@ -1,5 +1,5 @@ diff --git a/test/diffs/newline-at-eof.json.js b/test/diffs/newline-at-eof.json.js index a97d2ed91..3d58ee6bc 100644 --- a/test/diffs/newline-at-eof.json.js +++ b/test/diffs/newline-at-eof.json.js @@ -1 +1 @@ -{"rows":[[{"number":1,"terms":[{"range":[0,29],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,29],"hasChanges":false},{"number":1,"terms":[{"range":[0,29],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,29],"hasChanges":false}],[{"number":2,"terms":[{"range":[29,29],"category":"Program","children":[]}],"range":[29,29],"hasChanges":false},{"number":2,"terms":[{"range":[29,30],"category":"Program","children":[]}],"range":[29,30],"hasChanges":false}],[{"number":3,"terms":[{"range":[30,56],"category":"Program","children":[{"patch":"insert","range":[30,55],"category":"ExpressionStatements","children":[{"range":[30,54],"category":"MethodCall","children":[{"range":[30,37],"category":"Identifier"},{"range":[38,41],"category":"Identifier"},{"range":[42,53],"category":"Args","children":[{"range":[42,53],"category":"StringLiteral","children":[{"range":[42,43],"category":"StringLiteral"},{"range":[43,52],"category":"StringLiteral"},{"range":[52,53],"category":"StringLiteral"}]}]}]}]}]}],"range":[30,56],"hasChanges":true}],[{"number":4,"terms":[{"range":[56,56],"category":"Program","children":[]}],"range":[56,56],"hasChanges":false}]],"oids":["",""],"paths":["test/diffs/newline-at-eof.A.js","test/diffs/newline-at-eof.B.js"]} \ No newline at end of file +{"rows":[[{"number":1,"terms":[{"range":[0,29],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,29],"hasChanges":false},{"number":1,"terms":[{"range":[0,29],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,29],"hasChanges":false}],[{"number":2,"terms":[{"range":[29,29],"category":"Program","children":[]}],"range":[29,29],"hasChanges":false},{"number":2,"terms":[{"range":[29,30],"category":"Program","children":[]}],"range":[29,30],"hasChanges":false}],[{"number":3,"terms":[{"range":[30,56],"category":"Program","children":[{"patch":"insert","range":[30,55],"category":"ExpressionStatements","children":[{"range":[30,54],"category":"MethodCall","children":[{"range":[30,37],"category":"Identifier"},{"range":[38,41],"category":"Identifier"},{"range":[42,53],"category":"Args","children":[{"range":[42,53],"category":"StringLiteral","children":[{"range":[42,43],"category":"StringLiteral"},{"range":[43,52],"category":"StringLiteral"},{"range":[52,53],"category":"StringLiteral"}]}]}]}]}]}],"range":[30,56],"hasChanges":true}],[{"number":4,"terms":[{"range":[56,56],"category":"Program","children":[]}],"range":[56,56],"hasChanges":false}]],"oids":["0000000000000000000000000000000000000000","0000000000000000000000000000000000000000"],"paths":["test/diffs/newline-at-eof.A.js","test/diffs/newline-at-eof.B.js"]} \ No newline at end of file diff --git a/test/diffs/newline-at-eof.patch.js b/test/diffs/newline-at-eof.patch.js index 9a6735771..ff7530a52 100644 --- a/test/diffs/newline-at-eof.patch.js +++ b/test/diffs/newline-at-eof.patch.js @@ -1,5 +1,5 @@ diff --git a/test/diffs/newline-at-eof.A.js b/test/diffs/newline-at-eof.B.js -index .. 100644 +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644 --- a/test/diffs/newline-at-eof.A.js +++ b/test/diffs/newline-at-eof.B.js @@ -1,2 +1,4 @@ diff --git a/test/diffs/no-newline-at-eof.json.js b/test/diffs/no-newline-at-eof.json.js index dc8992a2d..ea5e696d5 100644 --- a/test/diffs/no-newline-at-eof.json.js +++ b/test/diffs/no-newline-at-eof.json.js @@ -1 +1 @@ -{"rows":[[{"number":1,"terms":[{"range":[0,28],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,28],"hasChanges":false},{"number":1,"terms":[{"range":[0,29],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,29],"hasChanges":false}],[{"number":2,"terms":[{"range":[29,30],"category":"Program","children":[]}],"range":[29,30],"hasChanges":false}],[{"number":3,"terms":[{"range":[30,55],"category":"Program","children":[{"patch":"insert","range":[30,55],"category":"ExpressionStatements","children":[{"range":[30,54],"category":"MethodCall","children":[{"range":[30,37],"category":"Identifier"},{"range":[38,41],"category":"Identifier"},{"range":[42,53],"category":"Args","children":[{"range":[42,53],"category":"StringLiteral","children":[{"range":[42,43],"category":"StringLiteral"},{"range":[43,52],"category":"StringLiteral"},{"range":[52,53],"category":"StringLiteral"}]}]}]}]}]}],"range":[30,55],"hasChanges":true}]],"oids":["",""],"paths":["test/diffs/no-newline-at-eof.A.js","test/diffs/no-newline-at-eof.B.js"]} \ No newline at end of file +{"rows":[[{"number":1,"terms":[{"range":[0,28],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,28],"hasChanges":false},{"number":1,"terms":[{"range":[0,29],"category":"Program","children":[{"range":[0,28],"category":"ExpressionStatements","children":[{"range":[0,27],"category":"MethodCall","children":[{"range":[0,7],"category":"Identifier"},{"range":[8,11],"category":"Identifier"},{"range":[12,26],"category":"Args","children":[{"range":[12,26],"category":"StringLiteral","children":[{"range":[12,13],"category":"StringLiteral"},{"range":[13,18],"category":"StringLiteral"},{"range":[18,19],"category":"StringLiteral"},{"range":[20,25],"category":"StringLiteral"},{"range":[25,26],"category":"StringLiteral"}]}]}]}]}]}],"range":[0,29],"hasChanges":false}],[{"number":2,"terms":[{"range":[29,30],"category":"Program","children":[]}],"range":[29,30],"hasChanges":false}],[{"number":3,"terms":[{"range":[30,55],"category":"Program","children":[{"patch":"insert","range":[30,55],"category":"ExpressionStatements","children":[{"range":[30,54],"category":"MethodCall","children":[{"range":[30,37],"category":"Identifier"},{"range":[38,41],"category":"Identifier"},{"range":[42,53],"category":"Args","children":[{"range":[42,53],"category":"StringLiteral","children":[{"range":[42,43],"category":"StringLiteral"},{"range":[43,52],"category":"StringLiteral"},{"range":[52,53],"category":"StringLiteral"}]}]}]}]}]}],"range":[30,55],"hasChanges":true}]],"oids":["0000000000000000000000000000000000000000","0000000000000000000000000000000000000000"],"paths":["test/diffs/no-newline-at-eof.A.js","test/diffs/no-newline-at-eof.B.js"]} \ No newline at end of file diff --git a/test/diffs/no-newline-at-eof.patch.js b/test/diffs/no-newline-at-eof.patch.js index e002c25c6..19dabe4b3 100644 --- a/test/diffs/no-newline-at-eof.patch.js +++ b/test/diffs/no-newline-at-eof.patch.js @@ -1,5 +1,5 @@ diff --git a/test/diffs/no-newline-at-eof.A.js b/test/diffs/no-newline-at-eof.B.js -index .. 100644 +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 100644 --- a/test/diffs/no-newline-at-eof.A.js +++ b/test/diffs/no-newline-at-eof.B.js @@ -1,1 +1,3 @@ From 25a53ac718927cf7e66e7f2be1793d566aea3bb9 Mon Sep 17 00:00:00 2001 From: joshvera Date: Tue, 9 Aug 2016 16:07:15 -0400 Subject: [PATCH 172/208] add file addition and deletion tests --- test/diffs/empty-file.patch.js | 3 --- test/diffs/{empty-file.B.js => file-addition.B.js} | 0 test/diffs/file-addition.patch.js | 3 +++ test/diffs/file-deletion.A.js | 0 test/diffs/file-deletion.patch.js | 3 +++ 5 files changed, 6 insertions(+), 3 deletions(-) delete mode 100644 test/diffs/empty-file.patch.js rename test/diffs/{empty-file.B.js => file-addition.B.js} (100%) create mode 100644 test/diffs/file-addition.patch.js create mode 100644 test/diffs/file-deletion.A.js create mode 100644 test/diffs/file-deletion.patch.js diff --git a/test/diffs/empty-file.patch.js b/test/diffs/empty-file.patch.js deleted file mode 100644 index 482540dcc..000000000 --- a/test/diffs/empty-file.patch.js +++ /dev/null @@ -1,3 +0,0 @@ -diff --git a/test/diffs/empty-file.B.js b/test/diffs/empty-file.B.js -new file mode 100644 -index .. diff --git a/test/diffs/empty-file.B.js b/test/diffs/file-addition.B.js similarity index 100% rename from test/diffs/empty-file.B.js rename to test/diffs/file-addition.B.js diff --git a/test/diffs/file-addition.patch.js b/test/diffs/file-addition.patch.js new file mode 100644 index 000000000..b0b2f1c4c --- /dev/null +++ b/test/diffs/file-addition.patch.js @@ -0,0 +1,3 @@ +diff --git a/ b/test/diffs/empty-file.B.js +new file mode 100644 +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 \ No newline at end of file diff --git a/test/diffs/file-deletion.A.js b/test/diffs/file-deletion.A.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/diffs/file-deletion.patch.js b/test/diffs/file-deletion.patch.js new file mode 100644 index 000000000..b0b2f1c4c --- /dev/null +++ b/test/diffs/file-deletion.patch.js @@ -0,0 +1,3 @@ +diff --git a/ b/test/diffs/empty-file.B.js +new file mode 100644 +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 \ No newline at end of file From 9df13991686a9bb1b75a703cbe263f4b7b1491dd Mon Sep 17 00:00:00 2001 From: joshvera Date: Tue, 9 Aug 2016 16:14:41 -0400 Subject: [PATCH 173/208] output the inserted/deleted path in the patch header --- src/Renderer/Patch.hs | 5 ++++- test/diffs/file-addition.patch.js | 4 ++-- test/diffs/file-deletion.patch.js | 6 +++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Renderer/Patch.hs b/src/Renderer/Patch.hs index 1d0c82975..34cac78c0 100644 --- a/src/Renderer/Patch.hs +++ b/src/Renderer/Patch.hs @@ -104,7 +104,10 @@ header blobs = intercalate "\n" ([filepathHeader, fileModeHeader] <> maybeFilepa beforeFilepath = "--- " <> modeHeader "a" modeA pathA afterFilepath = "+++ " <> modeHeader "b" modeB pathB sources = source <$> blobs - (pathA, pathB) = runJoin $ path <$> blobs + (pathA, pathB) = case runJoin $ path <$> blobs of + ("", path) -> (path, path) + (path, "") -> (path, path) + paths -> paths (oidA, oidB) = runJoin $ oid <$> blobs (modeA, modeB) = runJoin $ blobKind <$> blobs diff --git a/test/diffs/file-addition.patch.js b/test/diffs/file-addition.patch.js index b0b2f1c4c..6c04a8f0f 100644 --- a/test/diffs/file-addition.patch.js +++ b/test/diffs/file-addition.patch.js @@ -1,3 +1,3 @@ -diff --git a/ b/test/diffs/empty-file.B.js +diff --git a/test/diffs/file-addition.B.js b/test/diffs/file-addition.B.js new file mode 100644 -index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 \ No newline at end of file +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 diff --git a/test/diffs/file-deletion.patch.js b/test/diffs/file-deletion.patch.js index b0b2f1c4c..dea2c8982 100644 --- a/test/diffs/file-deletion.patch.js +++ b/test/diffs/file-deletion.patch.js @@ -1,3 +1,3 @@ -diff --git a/ b/test/diffs/empty-file.B.js -new file mode 100644 -index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 \ No newline at end of file +diff --git a/test/diffs/file-deletion.A.js b/test/diffs/file-deletion.A.js +deleted file mode 100644 +index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 From 15f0f9c47ab19288358f40bd732c55928bb9d2f2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:18:45 -0400 Subject: [PATCH 174/208] Label arbitrary p,q-grams with the text, not the record. --- test/Data/RandomWalkSimilarity/Spec.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index b3e45a7dc..aaa561d95 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -20,10 +20,10 @@ spec :: Spec spec = parallel $ do describe "pqGrams" $ do prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== p) . length . stem) + \ (term, p, q) -> pqGrams (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== p) . length . stem) prop "produces grams with bases of the specified width" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams headF p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base) + \ (term, p, q) -> pqGrams (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base) describe "featureVector" $ do prop "produces a vector of the specified dimension" . forAll (arbitrary `suchThat` ((> 0) . Prologue.snd)) $ From e0ec5ad7d045fee4fb174012a11c5eca0b095445 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:18:53 -0400 Subject: [PATCH 175/208] Export pqGramDecorator. --- src/Data/RandomWalkSimilarity.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f60f76631..8e4560113 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -3,6 +3,7 @@ module Data.RandomWalkSimilarity ( rws , pqGrams , featureVector +, pqGramDecorator , featureVectorDecorator , Gram(..) ) where From 46c2e4d8e0ec638451c5c0d749162aa941d25212 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:23:38 -0400 Subject: [PATCH 176/208] Test pqGramDecorator directly. --- test/Data/RandomWalkSimilarity/Spec.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index aaa561d95..14505b9fd 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -18,12 +18,12 @@ import Test.QuickCheck spec :: Spec spec = parallel $ do - describe "pqGrams" $ do + describe "pqGramDecorator" $ do prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== p) . length . stem) + \ (term, p, q) -> pqGramDecorator (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== p) . length . stem . rhead) prop "produces grams with bases of the specified width" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGrams (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base) + \ (term, p, q) -> pqGramDecorator (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base . rhead) describe "featureVector" $ do prop "produces a vector of the specified dimension" . forAll (arbitrary `suchThat` ((> 0) . Prologue.snd)) $ From 67034c067db6bc061b70e714d18649448829638c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:24:05 -0400 Subject: [PATCH 177/208] :fire: pqGrams. --- src/Data/RandomWalkSimilarity.hs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 8e4560113..31ad0e977 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -1,7 +1,6 @@ {-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-} module Data.RandomWalkSimilarity ( rws -, pqGrams , featureVector , pqGramDecorator , featureVectorDecorator @@ -62,11 +61,6 @@ data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature : data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } deriving (Eq, Show) --- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. -pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) -pqGrams getLabel p q = foldMap (pure . getField) . pqGramDecorator getLabel p q - - -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vector Double featureVector d bag = sumVectors $ unitVector d . hash <$> bag From 1b59d2782fa4e1f4fbacff383f711cc81ebbf06b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:33:54 -0400 Subject: [PATCH 178/208] Add a function to compute positive integers. --- test/Data/RandomWalkSimilarity/Spec.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 14505b9fd..d794a2571 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -36,3 +36,6 @@ spec = parallel $ do tbs = toTerm <$> bs diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs))) + +positively :: Int -> Int +positively = succ . abs From 4b2a7eaea4d317409f30829d77a8ff753b7d4c61 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:34:10 -0400 Subject: [PATCH 179/208] Test featureVectorDecorator directly. --- test/Data/RandomWalkSimilarity/Spec.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index d794a2571..9477d8433 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -2,7 +2,6 @@ module Data.RandomWalkSimilarity.Spec where import Category -import Data.DList as DList hiding (toList) import Data.RandomWalkSimilarity import Data.Record import qualified Data.Vector.Arbitrary as Vector @@ -25,9 +24,9 @@ spec = parallel $ do prop "produces grams with bases of the specified width" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ \ (term, p, q) -> pqGramDecorator (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base . rhead) - describe "featureVector" $ do - prop "produces a vector of the specified dimension" . forAll (arbitrary `suchThat` ((> 0) . Prologue.snd)) $ - \ (grams, d) -> length (featureVector d (fromList (grams :: [Gram Text]))) `shouldBe` d + describe "featureVectorDecorator" $ do + prop "produces a vector of the specified dimension" $ + \ (term, p, q, d) -> featureVectorDecorator (rhead . headF) (positively p) (positively q) (positively d) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively d)) . length . rhead) describe "rws" $ do let compare a b = if extract a == extract b then Just (pure (Replace a b)) else Nothing From 9261f190749402040bf7ba10e5cebe53848e8bd8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Tue, 9 Aug 2016 16:34:37 -0400 Subject: [PATCH 180/208] :fire: featureVector. --- src/Data/RandomWalkSimilarity.hs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 31ad0e977..f5b4af409 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -1,7 +1,6 @@ {-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-} module Data.RandomWalkSimilarity ( rws -, featureVector , pqGramDecorator , featureVectorDecorator , Gram(..) @@ -11,7 +10,6 @@ import Control.Applicative import Control.Arrow ((&&&)) import Control.Monad.Random import Control.Monad.State -import qualified Data.DList as DList import Data.Functor.Both hiding (fst, snd) import Data.Functor.Foldable as Foldable import Data.Hashable @@ -61,11 +59,6 @@ data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature : data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } deriving (Eq, Show) --- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. -featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vector Double -featureVector d bag = sumVectors $ unitVector d . hash <$> bag - where sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) - -- | Annotates a term with the corresponding p,q-gram at each node. pqGramDecorator :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) pqGramDecorator getLabel p q = cata algebra From 1ce869b8d0b2f821e6fa43ae69325e09ebd734a2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:05:12 -0400 Subject: [PATCH 181/208] Avoid forAll/suchThat in the p/q tests. This enables us to shrink properly. --- test/Data/RandomWalkSimilarity/Spec.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 9477d8433..2e33583a5 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -18,11 +18,11 @@ import Test.QuickCheck spec :: Spec spec = parallel $ do describe "pqGramDecorator" $ do - prop "produces grams with stems of the specified length" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGramDecorator (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== p) . length . stem . rhead) + prop "produces grams with stems of the specified length" $ + \ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively p)) . length . stem . rhead) - prop "produces grams with bases of the specified width" . forAll (arbitrary `suchThat` (\ (_, p, q) -> p > 0 && q > 0)) $ - \ (term, p, q) -> pqGramDecorator (rhead . headF) p q (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== q) . length . base . rhead) + prop "produces grams with bases of the specified width" $ + \ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively q)) . length . base . rhead) describe "featureVectorDecorator" $ do prop "produces a vector of the specified dimension" $ From f0466c0c43b88e215db5733439d070a4cebecd8b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:15:42 -0400 Subject: [PATCH 182/208] Enable FlexibleContexts for the tests. --- semantic-diff.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic-diff.cabal b/semantic-diff.cabal index e3e3e3dc5..cbe5adb4a 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -136,7 +136,7 @@ test-suite semantic-diff-test else ghc-options: -threaded -rtsopts -with-rtsopts=-N -j -pgml=script/g++ default-language: Haskell2010 - default-extensions: DeriveFunctor, DeriveGeneric, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards + default-extensions: DeriveFunctor, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards if os(darwin) extra-libraries: stdc++ icuuc icudata icui18n if os(darwin) From 04a4614dfc3da71adde9f8ef62fa7f76b259109c Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:21:58 -0400 Subject: [PATCH 183/208] =?UTF-8?q?The=20diff=20specs=20don=E2=80=99t=20ne?= =?UTF-8?q?ed=20to=20generate=20arbitrary=20vectors.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/Diff/Spec.hs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test/Diff/Spec.hs b/test/Diff/Spec.hs index 64a1356c9..91857b665 100644 --- a/test/Diff/Spec.hs +++ b/test/Diff/Spec.hs @@ -2,11 +2,12 @@ module Diff.Spec where import Category +import Data.RandomWalkSimilarity import Data.Record import Data.Text.Arbitrary () -import qualified Data.Vector.Arbitrary as Vector import Diff import Diff.Arbitrary +import Info import Interpreter import Prologue import Term.Arbitrary @@ -16,23 +17,24 @@ import Test.QuickCheck spec :: Spec spec = parallel $ do + let toTerm' = featureVectorDecorator (category . headF) 2 2 15 . toTerm prop "equality is reflexive" $ - \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in + \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in diff `shouldBe` diff prop "equal terms produce identity diffs" $ - \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category])) in + \ a -> let term = toTerm' (a :: ArbitraryTerm Text (Record '[Category])) in diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0 describe "beforeTerm" $ do prop "recovers the before term" $ - \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in - beforeTerm diff `shouldBe` Just (toTerm a) + \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in + beforeTerm diff `shouldBe` Just (toTerm' a) describe "afterTerm" $ do prop "recovers the after term" $ - \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm a) (toTerm (b :: ArbitraryTerm Text (Record '[Vector.Vector Double, Category]))) in - afterTerm diff `shouldBe` Just (toTerm b) + \ a b -> let diff = diffTerms wrap (==) diffCost (toTerm' a) (toTerm' (b :: ArbitraryTerm Text (Record '[Category]))) in + afterTerm diff `shouldBe` Just (toTerm' b) describe "ArbitraryDiff" $ do prop "generates diffs of a specific size" . forAll ((arbitrary >>= \ n -> (,) n <$> diffOfSize n) `suchThat` ((> 0) . fst)) $ From 45ce4bbf03e094e592e31c3c30a29659bfe4182a Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:26:58 -0400 Subject: [PATCH 184/208] =?UTF-8?q?The=20interpreter=20specs=20don?= =?UTF-8?q?=E2=80=99t=20need=20to=20generate=20arbitrary=20vectors.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/InterpreterSpec.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/test/InterpreterSpec.hs b/test/InterpreterSpec.hs index 4e7c509f2..ca8785585 100644 --- a/test/InterpreterSpec.hs +++ b/test/InterpreterSpec.hs @@ -2,9 +2,10 @@ module InterpreterSpec where import Category -import Diff +import Data.RandomWalkSimilarity import Data.Record -import qualified Data.Vector.Arbitrary as Vector +import Diff +import Info import Interpreter import Patch import Prologue @@ -16,16 +17,17 @@ import Test.Hspec.QuickCheck spec :: Spec spec = parallel $ do describe "interpret" $ do + let decorate = featureVectorDecorator (category . headF) 2 2 15 it "returns a replacement when comparing two unicode equivalent terms" $ - let termA = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf ("t\776" :: Text) - termB = cofree $ (StringLiteral .: Vector.singleton (0 :: Double) .: RNil) :< Leaf "\7831" in + let termA = decorate . cofree $ (StringLiteral .: RNil) :< Leaf ("t\776" :: Text) + termB = decorate . cofree $ (StringLiteral .: RNil) :< Leaf "\7831" in diffTerms wrap ((==) `on` extract) diffCost termA termB `shouldBe` free (Pure (Replace termA termB)) prop "produces correct diffs" $ - \ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (toTerm a) (toTerm b) :: Diff Text (Record '[Category, Vector.Vector Double]) in - (beforeTerm diff, afterTerm diff) `shouldBe` (Just (toTerm a), Just (toTerm b)) + \ a b -> let diff = diffTerms wrap ((==) `on` extract) diffCost (decorate (toTerm a)) (decorate (toTerm (b :: ArbitraryTerm Text (Record '[Category])))) in + (beforeTerm diff, afterTerm diff) `shouldBe` (Just (decorate (toTerm a)), Just (decorate (toTerm b))) prop "constructs zero-cost diffs of equal terms" $ - \ a -> let term = toTerm a - diff = diffTerms wrap ((==) `on` extract) diffCost term term :: Diff Text (Record '[Category, Vector.Vector Double]) in + \ a -> let term = decorate (toTerm (a :: ArbitraryTerm Text (Record '[Category]))) + diff = diffTerms wrap ((==) `on` extract) diffCost term term in diffCost diff `shouldBe` 0 From b6637828fa74d73eb50c3b8ce1dedfdf0bb4dc82 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:30:03 -0400 Subject: [PATCH 185/208] =?UTF-8?q?The=20diff=20summary=20specs=20don?= =?UTF-8?q?=E2=80=99t=20need=20to=20generate=20arbitrary=20vectors.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/DiffSummarySpec.hs | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/test/DiffSummarySpec.hs b/test/DiffSummarySpec.hs index ffb635d5e..93d69491c 100644 --- a/test/DiffSummarySpec.hs +++ b/test/DiffSummarySpec.hs @@ -1,32 +1,32 @@ {-# LANGUAGE DataKinds #-} module DiffSummarySpec where -import Prologue +import Category +import Data.Functor.Both +import Data.List (partition) +import Data.RandomWalkSimilarity import Data.Record -import qualified Data.Vector.Arbitrary as Vector -import Test.Hspec -import Test.Hspec.QuickCheck import Diff +import Diff.Arbitrary +import DiffSummary +import Info +import Interpreter +import Patch +import Prologue +import Source import Syntax import Term -import Patch -import Category -import DiffSummary -import Diff.Arbitrary -import Data.List (partition) import Term.Arbitrary -import Interpreter -import Info -import Source -import Data.Functor.Both +import Test.Hspec +import Test.Hspec.QuickCheck -arrayInfo :: Record '[Category, Range, Vector.Vector Double] -arrayInfo = ArrayLiteral .: Range 0 3 .: Vector.singleton 0 .: RNil +arrayInfo :: Record '[Category, Range] +arrayInfo = ArrayLiteral .: Range 0 3 .: RNil -literalInfo :: Record '[Category, Range, Vector.Vector Double] -literalInfo = StringLiteral .: Range 1 2 .: Vector.singleton 0 .: RNil +literalInfo :: Record '[Category, Range] +literalInfo = StringLiteral .: Range 1 2 .: RNil -testDiff :: Diff Text (Record '[Category, Range, Vector.Vector Double]) +testDiff :: Diff Text (Record '[Category, Range]) testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "a")) ]) testSummary :: DiffSummary DiffInfo @@ -45,7 +45,7 @@ spec = parallel $ do diffSummaries sources testDiff `shouldBe` [ DiffSummary { patch = Insert (LeafInfo "string" "a"), parentAnnotation = Nothing } ] prop "equal terms produce identity diffs" $ - \ a -> let term = toTerm (a :: ArbitraryTerm Text (Record '[Category, Range, Vector.Vector Double])) in + \ a -> let term = featureVectorDecorator (category . headF) 2 2 15 (toTerm (a :: ArbitraryTerm Text (Record '[Category, Range]))) in diffSummaries sources (diffTerms wrap (==) diffCost term term) `shouldBe` [] describe "annotatedSummaries" $ do From f05570dd3e67466450f47424e80fcf2b3469d762 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:38:41 -0400 Subject: [PATCH 186/208] =?UTF-8?q?The=20RWS=20specs=20don=E2=80=99t=20nee?= =?UTF-8?q?d=20to=20generate=20arbitrary=20vectors.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/Data/RandomWalkSimilarity/Spec.hs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 2e33583a5..3e92c26e3 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -1,11 +1,10 @@ {-# LANGUAGE DataKinds #-} module Data.RandomWalkSimilarity.Spec where -import Category import Data.RandomWalkSimilarity import Data.Record -import qualified Data.Vector.Arbitrary as Vector import Diff +import Info import Patch import Prologue import Syntax @@ -31,10 +30,10 @@ spec = parallel $ do describe "rws" $ do let compare a b = if extract a == extract b then Just (pure (Replace a b)) else Nothing prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $ - \ (as, bs) -> let tas = toTerm <$> as - tbs = toTerm <$> bs - diff = free (Free (pure (Program .: Vector.singleton 0 .: RNil) :< Indexed (rws compare tas tbs :: [Diff Text (Record '[Category, Vector.Vector Double])]))) in - (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tas)), Just (cofree ((Program .: Vector.singleton 0 .: RNil) :< Indexed tbs))) + \ (as, bs) -> let tas = featureVectorDecorator (category . headF) 2 2 15 . toTerm <$> (as :: [ArbitraryTerm Text (Record '[Category])]) + tbs = featureVectorDecorator (category . headF) 2 2 15 . toTerm <$> (bs :: [ArbitraryTerm Text (Record '[Category])]) + diff = free (Free (pure (pure 0 .: Program .: RNil) :< Indexed (rws compare tas tbs))) in + (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tas)), Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tbs))) positively :: Int -> Int positively = succ . abs From 959f15198c576bc09be8ec2859066cb4ffa00aa5 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:44:02 -0400 Subject: [PATCH 187/208] :fire: Data.Vector.Arbitrary. --- semantic-diff.cabal | 1 - test/Data/Vector/Arbitrary.hs | 10 ---------- 2 files changed, 11 deletions(-) delete mode 100644 test/Data/Vector/Arbitrary.hs diff --git a/semantic-diff.cabal b/semantic-diff.cabal index cbe5adb4a..f84499f5b 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -106,7 +106,6 @@ test-suite semantic-diff-test , CorpusSpec , Data.Mergeable.Spec , Data.RandomWalkSimilarity.Spec - , Data.Vector.Arbitrary , Diff.Spec , DiffSummarySpec , InterpreterSpec diff --git a/test/Data/Vector/Arbitrary.hs b/test/Data/Vector/Arbitrary.hs deleted file mode 100644 index 06a09e3b8..000000000 --- a/test/Data/Vector/Arbitrary.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -module Data.Vector.Arbitrary (module Vector) where - -import Data.Vector as Vector -import Prologue -import Test.QuickCheck - -instance Arbitrary a => Arbitrary (Vector.Vector a) where - arbitrary = Vector.fromList <$> listOf1 arbitrary - shrink a = Vector.fromList <$> shrink (Vector.toList a) From b97b422cfb5d33eb003b637d01db8247f1334151 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:44:55 -0400 Subject: [PATCH 188/208] Revert "Enable FlexibleContexts for the tests." This reverts commit 1c0007137862d4c263d75225c7ff579c574c48b1. --- semantic-diff.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic-diff.cabal b/semantic-diff.cabal index f84499f5b..1b2f72091 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -135,7 +135,7 @@ test-suite semantic-diff-test else ghc-options: -threaded -rtsopts -with-rtsopts=-N -j -pgml=script/g++ default-language: Haskell2010 - default-extensions: DeriveFunctor, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards + default-extensions: DeriveFunctor, DeriveGeneric, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards if os(darwin) extra-libraries: stdc++ icuuc icudata icui18n if os(darwin) From 3f919eec260c744cdbdb9da65c93f17de5badbbf Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:47:03 -0400 Subject: [PATCH 189/208] Move compareCategoryEq down. --- src/Diffing.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 052eabeca..0e8d2ec9c 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -125,13 +125,12 @@ decorateParser decorator = (fmap (decorateTerm decorator) .) decorateTerm :: Functor f => TermDecorator f fields field -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) -compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Term leaf (Record fields) -> Bool -compareCategoryEq = (==) `on` category . extract - -- | Term decorator computing the cost of an unpacked term. termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) +compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Term leaf (Record fields) -> Bool +compareCategoryEq = (==) `on` category . extract -- | The sum of the node count of the diff’s patches. diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer From b43494afdaf8d16c51ed42ed992a74089352a57e Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:47:11 -0400 Subject: [PATCH 190/208] :memo: compareCategoryEq. --- src/Diffing.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Diffing.hs b/src/Diffing.hs index 0e8d2ec9c..f0455d375 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -129,6 +129,7 @@ decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: hea termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator c = 1 + sum (cost <$> tailF c) +-- | Determine whether two terms are comparable based on the equality of their categories. compareCategoryEq :: HasField fields Category => Term leaf (Record fields) -> Term leaf (Record fields) -> Bool compareCategoryEq = (==) `on` category . extract From 48213c413438ed535454b5363bac9b9546e39ba2 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:48:16 -0400 Subject: [PATCH 191/208] :fire: some stray blank lines. --- src/Interpreter.hs | 1 - src/TreeSitter.hs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Interpreter.hs b/src/Interpreter.hs index 7e77a037a..2c6678034 100644 --- a/src/Interpreter.hs +++ b/src/Interpreter.hs @@ -24,7 +24,6 @@ type Comparable leaf annotation = Term leaf annotation -> Term leaf annotation - -- | Constructs a diff from the CofreeF containing its annotation and syntax. This function has the opportunity to, for example, cache properties in the annotation. type DiffConstructor leaf annotation = CofreeF (Syntax leaf) (Both annotation) (Diff leaf annotation) -> Diff leaf annotation - -- | Diff two terms recursively, given functions characterizing the diffing. diffTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor leaf (Record fields) -- ^ A function to wrap up & possibly annotate every produced diff. diff --git a/src/TreeSitter.hs b/src/TreeSitter.hs index ed81b4222..752be44e1 100644 --- a/src/TreeSitter.hs +++ b/src/TreeSitter.hs @@ -113,7 +113,6 @@ documentToTerm language document blob = alloca $ \ root -> do let info = range .: (categoriesForLanguage language (toS name)) .: RNil pure $! termConstructor (source blob) sourceSpan info children - getChild node n out = do _ <- ts_node_p_named_child node n out toTerm out From f6cd8fd8f6f976020615b936f5a5eaf73e909510 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:53:39 -0400 Subject: [PATCH 192/208] Annotate terms with costs in Diffing. --- src/Diffing.hs | 4 ++-- test/CorpusSpec.hs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index f0455d375..8a1ed991d 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -84,8 +84,8 @@ lineByLineParser blob = pure . cofree . root $ case foldl' annotateLeaves ([], 0 toText = T.pack . Source.toString -- | Return the parser that should be used for a given path. -parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Range, Category]) -parserForFilepath path blob = do +parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category]) +parserForFilepath path blob = decorateTerm termCostDecorator <$> do parsed <- parserForType (toS (takeExtension path)) blob pure $! breakDownLeavesByWord (source blob) parsed diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 81207a038..2bc42e587 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -73,7 +73,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte testDiff :: Renderer (Record '[Vector.Vector Double, Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation testDiff renderer paths diff matcher = do sources <- sequence $ readAndTranscodeFile <$> paths - actual <- Verbatim <$> diffFiles (decorateParser termCostDecorator parser) renderer (sourceBlobs sources) + actual <- Verbatim <$> diffFiles parser renderer (sourceBlobs sources) case diff of Nothing -> matcher actual actual Just file -> do From cc0850ae437c57ef85404f5a21600edd4eb52001 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:55:14 -0400 Subject: [PATCH 193/208] Use Program in the line by line parser. --- src/Diffing.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index 8a1ed991d..b70ca0019 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -77,8 +77,8 @@ lineByLineParser blob = pure . cofree . root $ case foldl' annotateLeaves ([], 0 where input = source blob lines = actualLines input - root children = ((Range 0 $ length input) .: Other "program" .: RNil) :< Indexed children - leaf charIndex line = ((Range charIndex $ charIndex + T.length line) .: Other "program" .: RNil) :< Leaf line + root children = ((Range 0 $ length input) .: Program .: RNil) :< Indexed children + leaf charIndex line = ((Range charIndex $ charIndex + T.length line) .: Program .: RNil) :< Leaf line annotateLeaves (accum, charIndex) line = (accum <> [ leaf charIndex (toText line) ] , charIndex + length line) toText = T.pack . Source.toString From cea38b06f337dac71f2a9e7414bc25a9c97471da Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 09:55:38 -0400 Subject: [PATCH 194/208] :fire: decorateParser. --- src/Diffing.hs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Diffing.hs b/src/Diffing.hs index b70ca0019..8a70b2506 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -117,10 +117,6 @@ readAndTranscodeFile path = do -- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms. type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field --- | Decorate the 'Term's produced by a 'Parser' using a function to compute the annotation values at every node. -decorateParser :: Functor f => TermDecorator f fields field -> Parser f (Record fields) -> Parser f (Record (field ': fields)) -decorateParser decorator = (fmap (decorateTerm decorator) .) - -- | Decorate a 'Term' using a function to compute the annotation values at every node. decorateTerm :: Functor f => TermDecorator f fields field -> Cofree f (Record fields) -> Cofree f (Record (field ': fields)) decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) From 22fd3a7a6c4b9522f06c8aad526c6065722a18d8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 10:02:37 -0400 Subject: [PATCH 195/208] Move the multiple hunks & jquery fixtures to diffs-todo. --- test/{diffs => diffs-todo}/jquery.patch.js | 0 test/{diffs => diffs-todo}/multiple-hunks.patch.js | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename test/{diffs => diffs-todo}/jquery.patch.js (100%) rename test/{diffs => diffs-todo}/multiple-hunks.patch.js (100%) diff --git a/test/diffs/jquery.patch.js b/test/diffs-todo/jquery.patch.js similarity index 100% rename from test/diffs/jquery.patch.js rename to test/diffs-todo/jquery.patch.js diff --git a/test/diffs/multiple-hunks.patch.js b/test/diffs-todo/multiple-hunks.patch.js similarity index 100% rename from test/diffs/multiple-hunks.patch.js rename to test/diffs-todo/multiple-hunks.patch.js From 9e7f45216ad998f941004508f15d55ff225ff407 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 10:37:28 -0400 Subject: [PATCH 196/208] Revert "Move the multiple hunks & jquery fixtures to diffs-todo." This reverts commit d2e7047c03212f567a5c705c8fc409a9a4d8e2c4. --- test/{diffs-todo => diffs}/jquery.patch.js | 0 test/{diffs-todo => diffs}/multiple-hunks.patch.js | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename test/{diffs-todo => diffs}/jquery.patch.js (100%) rename test/{diffs-todo => diffs}/multiple-hunks.patch.js (100%) diff --git a/test/diffs-todo/jquery.patch.js b/test/diffs/jquery.patch.js similarity index 100% rename from test/diffs-todo/jquery.patch.js rename to test/diffs/jquery.patch.js diff --git a/test/diffs-todo/multiple-hunks.patch.js b/test/diffs/multiple-hunks.patch.js similarity index 100% rename from test/diffs-todo/multiple-hunks.patch.js rename to test/diffs/multiple-hunks.patch.js From c384909668cf3b8d2296b4eabf8d1bbecaeaf90b Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 10:42:31 -0400 Subject: [PATCH 197/208] Revert "Diff these branches by similarity." This reverts commit 4be9f2f0eee59d7b4163ae9cb074bb85b50f9c76. --- src/Interpreter.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreter.hs b/src/Interpreter.hs index 2c6678034..e2ff84091 100644 --- a/src/Interpreter.hs +++ b/src/Interpreter.hs @@ -68,7 +68,7 @@ algorithmWithTerms construct t1 t2 = case (unwrap t1, unwrap t2) of annotate $! S.Method identifier params expressions _ -> recursively t1 t2 where annotate = pure . construct . (both (extract t1) (extract t2) :<) - byIndex constructor a b = Algorithm.bySimilarity a b >>= annotate . constructor + byIndex constructor a b = Algorithm.byIndex a b >>= annotate . constructor -- | Run an algorithm, given functions characterizing the evaluation. runAlgorithm :: (Functor f, GAlign f, Eq a, Eq (Record fields), Eq (f (Cofree f (Record fields))), Prologue.Foldable f, Traversable f, HasField fields (Vector.Vector Double)) From 1574e74bcb2484c71ec806610eb839ffe7e6a8a8 Mon Sep 17 00:00:00 2001 From: joshvera Date: Wed, 10 Aug 2016 10:55:48 -0400 Subject: [PATCH 198/208] Normalize names before unioning --- test/CorpusSpec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 92de8005d..2f20fd778 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -58,7 +58,7 @@ examples directory = do let lookupName name = (lookupNormalized name as, lookupNormalized name bs, lookupNormalized name jsons, lookupNormalized name patches, lookupNormalized name splits) - let keys = normalizeName <$> union as bs + let keys = union (normalizeName <$> as) (normalizeName <$> bs) pure $ lookupName <$> keys where lookupNormalized name = find $ (== name) . normalizeName From d09c1314a4b97b570887492724236a6210f91877 Mon Sep 17 00:00:00 2001 From: joshvera Date: Wed, 10 Aug 2016 10:56:28 -0400 Subject: [PATCH 199/208] infix --- test/CorpusSpec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 2f20fd778..21604475e 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -58,7 +58,7 @@ examples directory = do let lookupName name = (lookupNormalized name as, lookupNormalized name bs, lookupNormalized name jsons, lookupNormalized name patches, lookupNormalized name splits) - let keys = union (normalizeName <$> as) (normalizeName <$> bs) + let keys = normalizeName <$> as `union` normalizeName <$> bs pure $ lookupName <$> keys where lookupNormalized name = find $ (== name) . normalizeName From dfbc5e186726ce570baf727e4faef91084ffd110 Mon Sep 17 00:00:00 2001 From: joshvera Date: Wed, 10 Aug 2016 10:58:41 -0400 Subject: [PATCH 200/208] infix --- test/CorpusSpec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 21604475e..92c42c996 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -58,7 +58,7 @@ examples directory = do let lookupName name = (lookupNormalized name as, lookupNormalized name bs, lookupNormalized name jsons, lookupNormalized name patches, lookupNormalized name splits) - let keys = normalizeName <$> as `union` normalizeName <$> bs + let keys = (normalizeName <$> as) `union` (normalizeName <$> bs) pure $ lookupName <$> keys where lookupNormalized name = find $ (== name) . normalizeName From 1c1938080c0306b346ccf4f16435dc6f07f3aa8e Mon Sep 17 00:00:00 2001 From: joshvera Date: Wed, 10 Aug 2016 11:00:40 -0400 Subject: [PATCH 201/208] Use either path in output --- test/CorpusSpec.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 92c42c996..9eb1ec3a3 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -39,7 +39,7 @@ spec = parallel $ do paths <- runIO $ examples directory let tests = correctTests =<< paths traverse_ (\ (formatName, renderer, paths, output) -> - it (maybe "/dev/null" normalizeName (fst paths) ++ " (" ++ formatName ++ ")") $ testDiff renderer paths output matcher) tests + it (maybe "/dev/null" normalizeName (uncurry (<|>) (runJoin paths)) ++ " (" ++ formatName ++ ")") $ testDiff renderer paths output matcher) tests correctTests paths@(_, _, Nothing, Nothing, Nothing) = testsForPaths paths correctTests paths = filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths From cd04fe5dd326940f0cd190cef49f0f524188c8c8 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 11:32:42 -0400 Subject: [PATCH 202/208] Move `positively` into a let binding. --- test/Data/RandomWalkSimilarity/Spec.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/Data/RandomWalkSimilarity/Spec.hs b/test/Data/RandomWalkSimilarity/Spec.hs index 3e92c26e3..034e04c5b 100644 --- a/test/Data/RandomWalkSimilarity/Spec.hs +++ b/test/Data/RandomWalkSimilarity/Spec.hs @@ -16,6 +16,7 @@ import Test.QuickCheck spec :: Spec spec = parallel $ do + let positively = succ . abs describe "pqGramDecorator" $ do prop "produces grams with stems of the specified length" $ \ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term Text (Record '[Text])) `shouldSatisfy` all ((== (positively p)) . length . stem . rhead) @@ -34,6 +35,3 @@ spec = parallel $ do tbs = featureVectorDecorator (category . headF) 2 2 15 . toTerm <$> (bs :: [ArbitraryTerm Text (Record '[Category])]) diff = free (Free (pure (pure 0 .: Program .: RNil) :< Indexed (rws compare tas tbs))) in (beforeTerm diff, afterTerm diff) `shouldBe` (Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tas)), Just (cofree ((pure 0 .: Program .: RNil) :< Indexed tbs))) - -positively :: Int -> Int -positively = succ . abs From 38c7e5f3b9392d01187d5ea951d54759b8de3c3d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 15:50:13 -0400 Subject: [PATCH 203/208] Fix a typo in the docs for rws. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index f5b4af409..dac16df23 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -25,7 +25,7 @@ import Test.QuickCheck.Random -- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf). rws :: (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), HasField fields (Vector.Vector Double)) - => (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function which comapres a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. + => (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function which compares a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. -> [Cofree f (Record fields)] -- ^ The list of old terms. -> [Cofree f (Record fields)] -- ^ The list of new terms. -> [Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))] From db75159a63a8b90801c272a25688cc06ba6e36ec Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 15:50:39 -0400 Subject: [PATCH 204/208] :memo: the return value of rws. --- src/Data/RandomWalkSimilarity.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index dac16df23..35c61ebd6 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -28,7 +28,7 @@ rws :: (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Rec => (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function which compares a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared. -> [Cofree f (Record fields)] -- ^ The list of old terms. -> [Cofree f (Record fields)] -- ^ The list of new terms. - -> [Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))] + -> [Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))] -- ^ The resulting list of similarity-matched diffs. rws compare as bs | null as, null bs = [] | null as = inserting <$> bs From 98f54cabd251ffe455adb0640e50310ce4effffa Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 15:51:24 -0400 Subject: [PATCH 205/208] =?UTF-8?q?Reformat=20pqGramDecorator=E2=80=99s=20?= =?UTF-8?q?type=20signature=20for=20:memo:s.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Data/RandomWalkSimilarity.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 35c61ebd6..8ab699376 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -60,7 +60,12 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } deriving (Eq, Show) -- | Annotates a term with the corresponding p,q-gram at each node. -pqGramDecorator :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields)) +pqGramDecorator :: Traversable f + => (forall b. CofreeF f (Record fields) b -> label) + -> Int + -> Int + -> Cofree f (Record fields) + -> Cofree f (Record (Gram label ': fields)) pqGramDecorator getLabel p q = cata algebra where algebra term = let label = getLabel term in cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: headF term) :< (`evalState` (siblingLabels (tailF term))) (for (tailF term) (assignLabels label))) From d03ab55c238352d89cb4d8ee729832a6324157cf Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 15:53:37 -0400 Subject: [PATCH 206/208] :memo: pqGramDecorator. --- src/Data/RandomWalkSimilarity.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 8ab699376..14418904e 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -61,11 +61,11 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] } -- | Annotates a term with the corresponding p,q-gram at each node. pqGramDecorator :: Traversable f - => (forall b. CofreeF f (Record fields) b -> label) - -> Int - -> Int - -> Cofree f (Record fields) - -> Cofree f (Record (Gram label ': fields)) + => (forall b. CofreeF f (Record fields) b -> label) -- ^ A function computing the label from an arbitrary unpacked term. This function can use the annotation and functor’s constructor, but not any recursive values inside the functor (since they’re held parametric in 'b'). + -> Int -- ^ 'p'; the desired stem length for the grams. + -> Int -- ^ 'q'; the desired base length for the grams. + -> Cofree f (Record fields) -- ^ The term to decorate. + -> Cofree f (Record (Gram label ': fields)) -- ^ The decorated term. pqGramDecorator getLabel p q = cata algebra where algebra term = let label = getLabel term in cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: headF term) :< (`evalState` (siblingLabels (tailF term))) (for (tailF term) (assignLabels label))) From e162987c26f19a0950427f1ecb0243b22cd36497 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 15:54:37 -0400 Subject: [PATCH 207/208] Factor the calculation of the gram into the where clause. --- src/Data/RandomWalkSimilarity.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index 14418904e..d374b0fce 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -68,7 +68,8 @@ pqGramDecorator :: Traversable f -> Cofree f (Record (Gram label ': fields)) -- ^ The decorated term. pqGramDecorator getLabel p q = cata algebra where algebra term = let label = getLabel term in - cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: headF term) :< (`evalState` (siblingLabels (tailF term))) (for (tailF term) (assignLabels label))) + cofree ((gram label .: headF term) :< (`evalState` (siblingLabels (tailF term))) (for (tailF term) (assignLabels label))) + gram label = Gram (padToSize p []) (padToSize q (pure (Just label))) assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) assignLabels label a = case runCofree a of RCons gram rest :< functor -> do From 704ff4db08d06aeb149b1845c75a967a6021d31d Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Wed, 10 Aug 2016 15:57:55 -0400 Subject: [PATCH 208/208] Factor the base & stem assignment into the where clause. --- src/Data/RandomWalkSimilarity.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Data/RandomWalkSimilarity.hs b/src/Data/RandomWalkSimilarity.hs index d374b0fce..1b8b613bd 100644 --- a/src/Data/RandomWalkSimilarity.hs +++ b/src/Data/RandomWalkSimilarity.hs @@ -68,8 +68,9 @@ pqGramDecorator :: Traversable f -> Cofree f (Record (Gram label ': fields)) -- ^ The decorated term. pqGramDecorator getLabel p q = cata algebra where algebra term = let label = getLabel term in - cofree ((gram label .: headF term) :< (`evalState` (siblingLabels (tailF term))) (for (tailF term) (assignLabels label))) + cofree ((gram label .: headF term) :< assignParentAndSiblingLabels (tailF term) label) gram label = Gram (padToSize p []) (padToSize q (pure (Just label))) + assignParentAndSiblingLabels functor label = (`evalState` (siblingLabels functor)) (for functor (assignLabels label)) assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) assignLabels label a = case runCofree a of RCons gram rest :< functor -> do