mirror of
https://github.com/github/semantic.git
synced 2025-01-01 11:46:14 +03:00
Rework diffing for max parallelization
This commit is contained in:
parent
94dc2ac506
commit
652be339c6
@ -1,3 +1,4 @@
|
|||||||
|
{-# LANGUAGE DataKinds #-}
|
||||||
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
|
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
|
||||||
module DiffCommand where
|
module DiffCommand where
|
||||||
|
|
||||||
@ -5,12 +6,10 @@ import Data.Aeson hiding (json)
|
|||||||
import Data.Functor.Both as Both
|
import Data.Functor.Both as Both
|
||||||
import Data.List ((\\))
|
import Data.List ((\\))
|
||||||
import Data.String
|
import Data.String
|
||||||
import Data.Text.Encoding (encodeUtf8)
|
|
||||||
import GHC.Conc (numCapabilities)
|
import GHC.Conc (numCapabilities)
|
||||||
import Prologue hiding (fst, snd, null)
|
import Prologue hiding (fst, snd, null)
|
||||||
import qualified Control.Concurrent.Async.Pool as Async
|
import qualified Control.Concurrent.Async.Pool as Async
|
||||||
import System.FilePath.Posix (hasExtension)
|
import System.FilePath.Posix (hasExtension)
|
||||||
import System.Timeout (timeout)
|
|
||||||
import Git.Blob
|
import Git.Blob
|
||||||
import Git.Libgit2
|
import Git.Libgit2
|
||||||
import Git.Libgit2.Backend
|
import Git.Libgit2.Backend
|
||||||
@ -22,6 +21,7 @@ import Category
|
|||||||
import Data.RandomWalkSimilarity
|
import Data.RandomWalkSimilarity
|
||||||
import Data.Record
|
import Data.Record
|
||||||
import Info
|
import Info
|
||||||
|
import Diff
|
||||||
import Interpreter
|
import Interpreter
|
||||||
import ParseCommand (parserForFilepath)
|
import ParseCommand (parserForFilepath)
|
||||||
import Parser
|
import Parser
|
||||||
@ -35,7 +35,7 @@ import Renderer.Summary
|
|||||||
import Renderer.TOC
|
import Renderer.TOC
|
||||||
import Source
|
import Source
|
||||||
import Syntax
|
import Syntax
|
||||||
import Term
|
import Debug.Trace
|
||||||
|
|
||||||
diff :: Arguments -> IO ByteString
|
diff :: Arguments -> IO ByteString
|
||||||
diff args@Arguments{..} = case diffMode of
|
diff args@Arguments{..} = case diffMode of
|
||||||
@ -45,11 +45,8 @@ diff args@Arguments{..} = case diffMode of
|
|||||||
-- | Compare changes between two commits.
|
-- | Compare changes between two commits.
|
||||||
diffCommits :: Arguments -> IO ByteString
|
diffCommits :: Arguments -> IO ByteString
|
||||||
diffCommits args@Arguments{..} = do
|
diffCommits args@Arguments{..} = do
|
||||||
ts <- fetchTerms args
|
outputs <- fetchDiffs args
|
||||||
pure $ maybe mempty concatOutputs ts
|
pure $! concatOutputs outputs
|
||||||
where fetchTerms args = if developmentMode
|
|
||||||
then Just <$> fetchDiffs args
|
|
||||||
else timeout timeoutInMicroseconds (fetchDiffs args)
|
|
||||||
|
|
||||||
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
|
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
|
||||||
diffPaths :: Arguments -> Both FilePath -> IO ByteString
|
diffPaths :: Arguments -> Both FilePath -> IO ByteString
|
||||||
@ -66,28 +63,27 @@ fetchDiffs args@Arguments{..} = do
|
|||||||
([], Join (Just a, Just b)) -> pathsToDiff args (both a b)
|
([], Join (Just a, Just b)) -> pathsToDiff args (both a b)
|
||||||
(ps, _) -> pure ps
|
(ps, _) -> pure ps
|
||||||
|
|
||||||
Async.withTaskGroup numCapabilities $ \p -> Async.mapTasks p (fetchDiff args <$> paths)
|
diffs <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
|
||||||
|
fetchDiff args <$> paths
|
||||||
|
|
||||||
fetchDiff :: Arguments -> FilePath -> IO Output
|
pure $ uncurry (renderDiff args) <$> diffs
|
||||||
|
|
||||||
|
fetchDiff :: Arguments -> FilePath -> IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
|
||||||
fetchDiff args@Arguments{..} filepath = withRepository lgFactory gitDir $ do
|
fetchDiff args@Arguments{..} filepath = withRepository lgFactory gitDir $ do
|
||||||
repo <- getRepository
|
repo <- getRepository
|
||||||
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
|
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
|
||||||
lift $ runReaderT (fetchDiff' args filepath) repo
|
lift $ runReaderT (fetchDiff' args filepath) repo
|
||||||
|
|
||||||
fetchDiff' :: Arguments -> FilePath -> ReaderT LgRepo IO Output
|
fetchDiff' :: Arguments -> FilePath -> ReaderT LgRepo IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
|
||||||
fetchDiff' args@Arguments{..} filepath = do
|
fetchDiff' Arguments{..} filepath = do
|
||||||
|
liftIO $ traceEventIO ("START fetchDiff: " <> filepath)
|
||||||
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
|
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
|
||||||
|
|
||||||
let sources = fromMaybe (emptySourceBlob filepath) <$> sourcesAndOids
|
let sources = fromMaybe (emptySourceBlob filepath) <$> sourcesAndOids
|
||||||
let sourceBlobs = idOrEmptySourceBlob <$> sources
|
let sourceBlobs = idOrEmptySourceBlob <$> sources
|
||||||
|
|
||||||
text <- liftIO . render $ textDiff (parserForFilepath filepath) args sourceBlobs
|
diff <- liftIO $ diffFiles (parserForFilepath filepath) sourceBlobs
|
||||||
truncatedPatch <- liftIO $ truncatedDiff args sourceBlobs
|
pure $! traceEvent ("END fetchDiff: " <> filepath) (sourceBlobs, diff)
|
||||||
pure $ fromMaybe truncatedPatch text
|
|
||||||
where
|
|
||||||
render output = if developmentMode
|
|
||||||
then Just <$> output
|
|
||||||
else timeout timeoutInMicroseconds output
|
|
||||||
|
|
||||||
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
|
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
|
||||||
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
|
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
|
||||||
@ -136,20 +132,19 @@ getSourceBlob path sha = do
|
|||||||
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
|
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
|
||||||
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
|
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
|
||||||
|
|
||||||
-- | Given a parser and renderer, diff two sources and return the rendered
|
-- | Given a parser, diff two sources and return a SyntaxDiff.
|
||||||
-- | result.
|
-- | Returns the rendered result strictly, so it's always fully evaluated with respect to other IO actions.
|
||||||
-- | Returns the rendered result strictly, so it's always fully evaluated
|
diffFiles :: (HasField fields Category, NFData (Record fields))
|
||||||
-- | with respect to other IO actions.
|
|
||||||
diffFiles :: HasField fields Category
|
|
||||||
=> Parser (Syntax Text) (Record fields)
|
=> Parser (Syntax Text) (Record fields)
|
||||||
-> Renderer (Record fields)
|
|
||||||
-> Both SourceBlob
|
-> Both SourceBlob
|
||||||
-> IO Output
|
-> IO (SyntaxDiff Text fields)
|
||||||
diffFiles parse render sourceBlobs = do
|
diffFiles parse sourceBlobs = do
|
||||||
|
traceEventIO $ "diffFiles@SEMANTIC-DIFF START parse terms: " <> paths
|
||||||
terms <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
|
terms <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
|
||||||
(fmap (defaultFeatureVectorDecorator getLabel) . parse) <$> sourceBlobs
|
(fmap (defaultFeatureVectorDecorator getLabel) . parse) <$> sourceBlobs
|
||||||
pure $! render sourceBlobs (stripDiff (diffTerms' terms))
|
traceEventIO $ "diffFiles@SEMANTIC-DIFF END parse terms: " <> paths
|
||||||
|
traceEventIO $ "diffFiles@SEMANTIC-DIFF START diff terms: " <> paths
|
||||||
|
traceEvent ("diffFiles@SEMANTIC-DIFF END diff terms: " <> paths) pure $!! stripDiff (diffTerms' terms)
|
||||||
where
|
where
|
||||||
diffTerms' terms = case runBothWith areNullOids sourceBlobs of
|
diffTerms' terms = case runBothWith areNullOids sourceBlobs of
|
||||||
(True, False) -> pure $ Insert (snd terms)
|
(True, False) -> pure $ Insert (snd terms)
|
||||||
@ -158,19 +153,18 @@ diffFiles parse render sourceBlobs = do
|
|||||||
runBothWith diffTerms terms
|
runBothWith diffTerms terms
|
||||||
areNullOids a b = (hasNullOid a, hasNullOid b)
|
areNullOids a b = (hasNullOid a, hasNullOid b)
|
||||||
hasNullOid blob = oid blob == nullOid || null (source blob)
|
hasNullOid blob = oid blob == nullOid || null (source blob)
|
||||||
|
-- For trace events
|
||||||
|
paths = runBothWith (\a b -> fileAtSha a <> " -> " <> fileAtSha b) sourceBlobs
|
||||||
|
fileAtSha x = path x <> "@" <> toS (oid x)
|
||||||
|
|
||||||
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
|
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
|
||||||
getLabel (h :< t) = (category h, case t of
|
getLabel (h :< t) = (category h, case t of
|
||||||
Leaf s -> Just s
|
Leaf s -> Just s
|
||||||
_ -> Nothing)
|
_ -> Nothing)
|
||||||
|
|
||||||
-- | Determine whether two terms are comparable based on the equality of their categories.
|
-- | Returns a rendered diff given arguments and two source blobs.
|
||||||
compareCategoryEq :: Functor f => HasField fields Category => Term f (Record fields) -> Term f (Record fields) -> Bool
|
renderDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Arguments -> Both SourceBlob -> SyntaxDiff Text fields -> Output
|
||||||
compareCategoryEq = (==) `on` category . extract
|
renderDiff args = case format args of
|
||||||
|
|
||||||
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
|
|
||||||
textDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO Output
|
|
||||||
textDiff parser arguments = diffFiles parser $ case format arguments of
|
|
||||||
Split -> split
|
Split -> split
|
||||||
Patch -> patch
|
Patch -> patch
|
||||||
SExpression -> sExpression TreeOnly
|
SExpression -> sExpression TreeOnly
|
||||||
@ -178,24 +172,8 @@ textDiff parser arguments = diffFiles parser $ case format arguments of
|
|||||||
Summary -> summary
|
Summary -> summary
|
||||||
TOC -> toc
|
TOC -> toc
|
||||||
|
|
||||||
-- | Returns a truncated diff given diff arguments and two source blobs.
|
-- | Prints a rendered diff to stdio or a filepath given a parser, arguments and two source blobs.
|
||||||
truncatedDiff :: Arguments -> Both SourceBlob -> IO Output
|
printDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
|
||||||
truncatedDiff Arguments{..} sources = pure $ case format of
|
printDiff parser args sources = do
|
||||||
Split -> SplitOutput mempty
|
diff <- diffFiles parser sources
|
||||||
Patch -> PatchOutput (truncatePatch sources)
|
pure $! concatOutputs [renderDiff args sources diff]
|
||||||
SExpression -> SExpressionOutput mempty
|
|
||||||
JSON -> JSONOutput mempty
|
|
||||||
Summary -> SummaryOutput mempty
|
|
||||||
TOC -> TOCOutput mempty
|
|
||||||
|
|
||||||
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
|
|
||||||
printDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
|
|
||||||
printDiff parser arguments sources = do
|
|
||||||
rendered <- textDiff parser arguments sources
|
|
||||||
pure $ case rendered of
|
|
||||||
SplitOutput text -> encodeUtf8 text
|
|
||||||
PatchOutput text -> encodeUtf8 text
|
|
||||||
SExpressionOutput text -> text
|
|
||||||
JSONOutput series -> toS $ encode series
|
|
||||||
SummaryOutput summaries -> toS $ encode summaries
|
|
||||||
TOCOutput summaries -> toS $ encode summaries
|
|
||||||
|
@ -115,11 +115,12 @@ testParse path expectedOutput = do
|
|||||||
actual `shouldBe` expected
|
actual `shouldBe` expected
|
||||||
|
|
||||||
testDiff :: Renderer (Record '[Range, Category, SourceSpan]) -> Both FilePath -> FilePath -> Expectation
|
testDiff :: Renderer (Record '[Range, Category, SourceSpan]) -> Both FilePath -> FilePath -> Expectation
|
||||||
testDiff renderer paths diff = do
|
testDiff renderer paths expectedOutput = do
|
||||||
sources <- traverse readAndTranscodeFile' paths
|
sources <- traverse readAndTranscodeFile' paths
|
||||||
diff' <- diffFiles parser renderer (sourceBlobs sources)
|
diff <- diffFiles parser (sourceBlobs sources)
|
||||||
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diff'
|
let diffOutput = renderer (sourceBlobs sources) diff
|
||||||
expected <- (Verbatim . stripWhitespace) <$> B.readFile diff
|
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diffOutput
|
||||||
|
expected <- (Verbatim . stripWhitespace) <$> B.readFile expectedOutput
|
||||||
actual `shouldBe` expected
|
actual `shouldBe` expected
|
||||||
where
|
where
|
||||||
parser = parserForFilepath filePath
|
parser = parserForFilepath filePath
|
||||||
|
@ -110,19 +110,23 @@ spec = parallel $ do
|
|||||||
describe "diffFiles" $ do
|
describe "diffFiles" $ do
|
||||||
it "encodes to final JSON" $ do
|
it "encodes to final JSON" $ do
|
||||||
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb")
|
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb")
|
||||||
let parser = parserForFilepath (path (fst sourceBlobs))
|
output <- diffOutput sourceBlobs
|
||||||
output <- diffFiles parser toc sourceBlobs
|
output `shouldBe` "{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}"
|
||||||
concatOutputs (pure output) `shouldBe` ("{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}" )
|
|
||||||
|
|
||||||
it "encodes to final JSON if there are parse errors" $ do
|
it "encodes to final JSON if there are parse errors" $ do
|
||||||
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")
|
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")
|
||||||
let parser = parserForFilepath (path (fst sourceBlobs))
|
output <- diffOutput sourceBlobs
|
||||||
output <- diffFiles parser toc sourceBlobs
|
output `shouldBe` "{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}"
|
||||||
concatOutputs (pure output) `shouldBe` ("{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}" )
|
|
||||||
|
|
||||||
type Diff' = SyntaxDiff String '[Range, Category, SourceSpan]
|
type Diff' = SyntaxDiff String '[Range, Category, SourceSpan]
|
||||||
type Term' = SyntaxTerm String '[Range, Category, SourceSpan]
|
type Term' = SyntaxTerm String '[Range, Category, SourceSpan]
|
||||||
|
|
||||||
|
diffOutput :: Both SourceBlob -> IO ByteString
|
||||||
|
diffOutput sourceBlobs = do
|
||||||
|
let parser = parserForFilepath (path (fst sourceBlobs))
|
||||||
|
diff <- diffFiles parser sourceBlobs
|
||||||
|
pure $ concatOutputs [toc sourceBlobs diff]
|
||||||
|
|
||||||
numTocSummaries :: Diff' -> Int
|
numTocSummaries :: Diff' -> Int
|
||||||
numTocSummaries diff = Prologue.length $ filter (not . isErrorSummary) (diffTOC blankDiffBlobs diff)
|
numTocSummaries diff = Prologue.length $ filter (not . isErrorSummary) (diffTOC blankDiffBlobs diff)
|
||||||
|
|
||||||
@ -184,17 +188,9 @@ isMethodOrFunction a = case runCofree (unListableF a) of
|
|||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
testDiff :: Both SourceBlob -> IO (Diff (Syntax Text) (Record '[Range, Category, SourceSpan]))
|
testDiff :: Both SourceBlob -> IO (Diff (Syntax Text) (Record '[Range, Category, SourceSpan]))
|
||||||
testDiff sourceBlobs = do
|
testDiff sourceBlobs = diffFiles parser sourceBlobs
|
||||||
terms <- traverse (fmap (defaultFeatureVectorDecorator getLabel) . parser) sourceBlobs
|
|
||||||
pure $! stripDiff (diffTerms' terms sourceBlobs)
|
|
||||||
where
|
where
|
||||||
parser = parserForFilepath (path . fst $ sourceBlobs)
|
parser = parserForFilepath (path . fst $ sourceBlobs)
|
||||||
diffTerms' terms blobs = case runBothWith areNullOids blobs of
|
|
||||||
(True, False) -> pure $ Insert (snd terms)
|
|
||||||
(False, True) -> pure $ Delete (fst terms)
|
|
||||||
(_, _) -> runBothWith diffTerms terms
|
|
||||||
areNullOids a b = (hasNullOid a, hasNullOid b)
|
|
||||||
hasNullOid blob = oid blob == nullOid || Source.null (source blob)
|
|
||||||
|
|
||||||
blobsForPaths :: Both FilePath -> IO (Both SourceBlob)
|
blobsForPaths :: Both FilePath -> IO (Both SourceBlob)
|
||||||
blobsForPaths paths = do
|
blobsForPaths paths = do
|
||||||
|
Loading…
Reference in New Issue
Block a user