diff --git a/semantic-diff.cabal b/semantic-diff.cabal index b90b6d02c..137f86a90 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -129,6 +129,7 @@ test-suite semantic-diff-test , semantic-diff , text >= 1.2.1.3 , these + , vector , wl-pprint-text if os(darwin) ghc-options: -threaded -rtsopts -with-rtsopts=-N -j diff --git a/src/Diffing.hs b/src/Diffing.hs index 8fea49e8d..8bfab8c0f 100644 --- a/src/Diffing.hs +++ b/src/Diffing.hs @@ -6,11 +6,13 @@ import Prologue hiding (fst, snd) import qualified Data.ByteString.Char8 as B1 import Data.Functor.Both import Data.Functor.Foldable +import Data.RandomWalkSimilarity import Data.Record import qualified Data.Text.IO as TextIO import qualified Data.Text.ICU.Detect as Detect import qualified Data.Text.ICU.Convert as Convert import Data.These +import qualified Data.Vector as Vector import Diff import Info import Interpreter @@ -37,9 +39,9 @@ import qualified Data.Text as T -- | result. -- | Returns the rendered result strictly, so it's always fully evaluated -- | with respect to other IO actions. -diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text +diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Text diffFiles parser renderer sourceBlobs = do - terms <- traverse parser sourceBlobs + terms <- traverse (fmap (featureVectorDecorator getLabel p q d) . parser) sourceBlobs let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs let textDiff = case areNullOids of @@ -55,6 +57,10 @@ diffFiles parser renderer sourceBlobs = do getCost diff = case runFree diff of Free (info :< _) -> cost <$> info Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch))) + getLabel (h :< t) = (category h, case t of + Leaf s -> Just s + _ -> Nothing) + (p, q, d) = (2, 2, 15) -- | Return a parser based on the file extension (including the "."). parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category]) diff --git a/test/CorpusSpec.hs b/test/CorpusSpec.hs index 5c39249d1..81207a038 100644 --- a/test/CorpusSpec.hs +++ b/test/CorpusSpec.hs @@ -1,6 +1,7 @@ {-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-} module CorpusSpec where +import qualified Data.Vector as Vector import Diffing import Renderer import qualified Renderer.JSON as J @@ -69,7 +70,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte -- | Given file paths for A, B, and, optionally, a diff, return whether diffing -- | the files will produce the diff. If no diff is provided, then the result -- | is true, but the diff will still be calculated. -testDiff :: Renderer (Record '[Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation +testDiff :: Renderer (Record '[Vector.Vector Double, Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation testDiff renderer paths diff matcher = do sources <- sequence $ readAndTranscodeFile <$> paths actual <- Verbatim <$> diffFiles (decorateParser termCostDecorator parser) renderer (sourceBlobs sources)