mirror of
https://github.com/github/semantic.git
synced 2024-11-27 12:57:49 +03:00
Diffing annotates terms with their feature vectors.
This commit is contained in:
parent
74710eefcd
commit
60c4effabe
@ -129,6 +129,7 @@ test-suite semantic-diff-test
|
||||
, semantic-diff
|
||||
, text >= 1.2.1.3
|
||||
, these
|
||||
, vector
|
||||
, wl-pprint-text
|
||||
if os(darwin)
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j
|
||||
|
@ -6,11 +6,13 @@ import Prologue hiding (fst, snd)
|
||||
import qualified Data.ByteString.Char8 as B1
|
||||
import Data.Functor.Both
|
||||
import Data.Functor.Foldable
|
||||
import Data.RandomWalkSimilarity
|
||||
import Data.Record
|
||||
import qualified Data.Text.IO as TextIO
|
||||
import qualified Data.Text.ICU.Detect as Detect
|
||||
import qualified Data.Text.ICU.Convert as Convert
|
||||
import Data.These
|
||||
import qualified Data.Vector as Vector
|
||||
import Diff
|
||||
import Info
|
||||
import Interpreter
|
||||
@ -37,9 +39,9 @@ import qualified Data.Text as T
|
||||
-- | result.
|
||||
-- | Returns the rendered result strictly, so it's always fully evaluated
|
||||
-- | with respect to other IO actions.
|
||||
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text
|
||||
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Text
|
||||
diffFiles parser renderer sourceBlobs = do
|
||||
terms <- traverse parser sourceBlobs
|
||||
terms <- traverse (fmap (featureVectorDecorator getLabel p q d) . parser) sourceBlobs
|
||||
|
||||
let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs
|
||||
let textDiff = case areNullOids of
|
||||
@ -55,6 +57,10 @@ diffFiles parser renderer sourceBlobs = do
|
||||
getCost diff = case runFree diff of
|
||||
Free (info :< _) -> cost <$> info
|
||||
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
|
||||
getLabel (h :< t) = (category h, case t of
|
||||
Leaf s -> Just s
|
||||
_ -> Nothing)
|
||||
(p, q, d) = (2, 2, 15)
|
||||
|
||||
-- | Return a parser based on the file extension (including the ".").
|
||||
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category])
|
||||
|
@ -1,6 +1,7 @@
|
||||
{-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-}
|
||||
module CorpusSpec where
|
||||
|
||||
import qualified Data.Vector as Vector
|
||||
import Diffing
|
||||
import Renderer
|
||||
import qualified Renderer.JSON as J
|
||||
@ -69,7 +70,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte
|
||||
-- | Given file paths for A, B, and, optionally, a diff, return whether diffing
|
||||
-- | the files will produce the diff. If no diff is provided, then the result
|
||||
-- | is true, but the diff will still be calculated.
|
||||
testDiff :: Renderer (Record '[Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation
|
||||
testDiff :: Renderer (Record '[Vector.Vector Double, Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation
|
||||
testDiff renderer paths diff matcher = do
|
||||
sources <- sequence $ readAndTranscodeFile <$> paths
|
||||
actual <- Verbatim <$> diffFiles (decorateParser termCostDecorator parser) renderer (sourceBlobs sources)
|
||||
|
Loading…
Reference in New Issue
Block a user