1
1
mirror of https://github.com/github/semantic.git synced 2024-11-27 12:57:49 +03:00

Diffing annotates terms with their feature vectors.

This commit is contained in:
Rob Rix 2016-08-04 20:43:10 -04:00
parent 74710eefcd
commit 60c4effabe
3 changed files with 11 additions and 3 deletions

View File

@ -129,6 +129,7 @@ test-suite semantic-diff-test
, semantic-diff
, text >= 1.2.1.3
, these
, vector
, wl-pprint-text
if os(darwin)
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j

View File

@ -6,11 +6,13 @@ import Prologue hiding (fst, snd)
import qualified Data.ByteString.Char8 as B1
import Data.Functor.Both
import Data.Functor.Foldable
import Data.RandomWalkSimilarity
import Data.Record
import qualified Data.Text.IO as TextIO
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import Data.These
import qualified Data.Vector as Vector
import Diff
import Info
import Interpreter
@ -37,9 +39,9 @@ import qualified Data.Text as T
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record fields) -> Both SourceBlob -> IO Text
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Text
diffFiles parser renderer sourceBlobs = do
terms <- traverse parser sourceBlobs
terms <- traverse (fmap (featureVectorDecorator getLabel p q d) . parser) sourceBlobs
let areNullOids = runBothWith (\a b -> (oid a == nullOid || null (source a), oid b == nullOid || null (source b))) sourceBlobs
let textDiff = case areNullOids of
@ -55,6 +57,10 @@ diffFiles parser renderer sourceBlobs = do
getCost diff = case runFree diff of
Free (info :< _) -> cost <$> info
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
(p, q, d) = (2, 2, 15)
-- | Return a parser based on the file extension (including the ".").
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category])

View File

@ -1,6 +1,7 @@
{-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-}
module CorpusSpec where
import qualified Data.Vector as Vector
import Diffing
import Renderer
import qualified Renderer.JSON as J
@ -69,7 +70,7 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte
-- | Given file paths for A, B, and, optionally, a diff, return whether diffing
-- | the files will produce the diff. If no diff is provided, then the result
-- | is true, but the diff will still be calculated.
testDiff :: Renderer (Record '[Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation
testDiff :: Renderer (Record '[Vector.Vector Double, Cost, Range, Category]) -> Both FilePath -> Maybe FilePath -> (Verbatim -> Verbatim -> Expectation) -> Expectation
testDiff renderer paths diff matcher = do
sources <- sequence $ readAndTranscodeFile <$> paths
actual <- Verbatim <$> diffFiles (decorateParser termCostDecorator parser) renderer (sourceBlobs sources)