1
1
mirror of https://github.com/github/semantic.git synced 2024-12-29 01:42:43 +03:00

Rework diffing for max parallelization

This commit is contained in:
Timothy Clem 2017-03-13 16:25:18 -07:00
parent 94dc2ac506
commit 652be339c6
3 changed files with 55 additions and 80 deletions

View File

@ -1,3 +1,4 @@
{-# LANGUAGE DataKinds #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module DiffCommand where
@ -5,12 +6,10 @@ import Data.Aeson hiding (json)
import Data.Functor.Both as Both
import Data.List ((\\))
import Data.String
import Data.Text.Encoding (encodeUtf8)
import GHC.Conc (numCapabilities)
import Prologue hiding (fst, snd, null)
import qualified Control.Concurrent.Async.Pool as Async
import System.FilePath.Posix (hasExtension)
import System.Timeout (timeout)
import Git.Blob
import Git.Libgit2
import Git.Libgit2.Backend
@ -22,6 +21,7 @@ import Category
import Data.RandomWalkSimilarity
import Data.Record
import Info
import Diff
import Interpreter
import ParseCommand (parserForFilepath)
import Parser
@ -35,7 +35,7 @@ import Renderer.Summary
import Renderer.TOC
import Source
import Syntax
import Term
import Debug.Trace
diff :: Arguments -> IO ByteString
diff args@Arguments{..} = case diffMode of
@ -45,11 +45,8 @@ diff args@Arguments{..} = case diffMode of
-- | Compare changes between two commits.
diffCommits :: Arguments -> IO ByteString
diffCommits args@Arguments{..} = do
ts <- fetchTerms args
pure $ maybe mempty concatOutputs ts
where fetchTerms args = if developmentMode
then Just <$> fetchDiffs args
else timeout timeoutInMicroseconds (fetchDiffs args)
outputs <- fetchDiffs args
pure $! concatOutputs outputs
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
diffPaths :: Arguments -> Both FilePath -> IO ByteString
@ -66,28 +63,27 @@ fetchDiffs args@Arguments{..} = do
([], Join (Just a, Just b)) -> pathsToDiff args (both a b)
(ps, _) -> pure ps
Async.withTaskGroup numCapabilities $ \p -> Async.mapTasks p (fetchDiff args <$> paths)
diffs <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
fetchDiff args <$> paths
fetchDiff :: Arguments -> FilePath -> IO Output
pure $ uncurry (renderDiff args) <$> diffs
fetchDiff :: Arguments -> FilePath -> IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
fetchDiff args@Arguments{..} filepath = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (fetchDiff' args filepath) repo
fetchDiff' :: Arguments -> FilePath -> ReaderT LgRepo IO Output
fetchDiff' args@Arguments{..} filepath = do
fetchDiff' :: Arguments -> FilePath -> ReaderT LgRepo IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
fetchDiff' Arguments{..} filepath = do
liftIO $ traceEventIO ("START fetchDiff: " <> filepath)
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
let sources = fromMaybe (emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = idOrEmptySourceBlob <$> sources
text <- liftIO . render $ textDiff (parserForFilepath filepath) args sourceBlobs
truncatedPatch <- liftIO $ truncatedDiff args sourceBlobs
pure $ fromMaybe truncatedPatch text
where
render output = if developmentMode
then Just <$> output
else timeout timeoutInMicroseconds output
diff <- liftIO $ diffFiles (parserForFilepath filepath) sourceBlobs
pure $! traceEvent ("END fetchDiff: " <> filepath) (sourceBlobs, diff)
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
@ -136,20 +132,19 @@ getSourceBlob path sha = do
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
-- | Given a parser and renderer, diff two sources and return the rendered
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
diffFiles :: HasField fields Category
-- | Given a parser, diff two sources and return a SyntaxDiff.
-- | Returns the rendered result strictly, so it's always fully evaluated with respect to other IO actions.
diffFiles :: (HasField fields Category, NFData (Record fields))
=> Parser (Syntax Text) (Record fields)
-> Renderer (Record fields)
-> Both SourceBlob
-> IO Output
diffFiles parse render sourceBlobs = do
-> IO (SyntaxDiff Text fields)
diffFiles parse sourceBlobs = do
traceEventIO $ "diffFiles@SEMANTIC-DIFF START parse terms: " <> paths
terms <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
(fmap (defaultFeatureVectorDecorator getLabel) . parse) <$> sourceBlobs
pure $! render sourceBlobs (stripDiff (diffTerms' terms))
traceEventIO $ "diffFiles@SEMANTIC-DIFF END parse terms: " <> paths
traceEventIO $ "diffFiles@SEMANTIC-DIFF START diff terms: " <> paths
traceEvent ("diffFiles@SEMANTIC-DIFF END diff terms: " <> paths) pure $!! stripDiff (diffTerms' terms)
where
diffTerms' terms = case runBothWith areNullOids sourceBlobs of
(True, False) -> pure $ Insert (snd terms)
@ -158,19 +153,18 @@ diffFiles parse render sourceBlobs = do
runBothWith diffTerms terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || null (source blob)
-- For trace events
paths = runBothWith (\a b -> fileAtSha a <> " -> " <> fileAtSha b) sourceBlobs
fileAtSha x = path x <> "@" <> toS (oid x)
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Determine whether two terms are comparable based on the equality of their categories.
compareCategoryEq :: Functor f => HasField fields Category => Term f (Record fields) -> Term f (Record fields) -> Bool
compareCategoryEq = (==) `on` category . extract
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
textDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO Output
textDiff parser arguments = diffFiles parser $ case format arguments of
-- | Returns a rendered diff given arguments and two source blobs.
renderDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Arguments -> Both SourceBlob -> SyntaxDiff Text fields -> Output
renderDiff args = case format args of
Split -> split
Patch -> patch
SExpression -> sExpression TreeOnly
@ -178,24 +172,8 @@ textDiff parser arguments = diffFiles parser $ case format arguments of
Summary -> summary
TOC -> toc
-- | Returns a truncated diff given diff arguments and two source blobs.
truncatedDiff :: Arguments -> Both SourceBlob -> IO Output
truncatedDiff Arguments{..} sources = pure $ case format of
Split -> SplitOutput mempty
Patch -> PatchOutput (truncatePatch sources)
SExpression -> SExpressionOutput mempty
JSON -> JSONOutput mempty
Summary -> SummaryOutput mempty
TOC -> TOCOutput mempty
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
printDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
printDiff parser arguments sources = do
rendered <- textDiff parser arguments sources
pure $ case rendered of
SplitOutput text -> encodeUtf8 text
PatchOutput text -> encodeUtf8 text
SExpressionOutput text -> text
JSONOutput series -> toS $ encode series
SummaryOutput summaries -> toS $ encode summaries
TOCOutput summaries -> toS $ encode summaries
-- | Prints a rendered diff to stdio or a filepath given a parser, arguments and two source blobs.
printDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
printDiff parser args sources = do
diff <- diffFiles parser sources
pure $! concatOutputs [renderDiff args sources diff]

View File

@ -115,11 +115,12 @@ testParse path expectedOutput = do
actual `shouldBe` expected
testDiff :: Renderer (Record '[Range, Category, SourceSpan]) -> Both FilePath -> FilePath -> Expectation
testDiff renderer paths diff = do
testDiff renderer paths expectedOutput = do
sources <- traverse readAndTranscodeFile' paths
diff' <- diffFiles parser renderer (sourceBlobs sources)
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diff'
expected <- (Verbatim . stripWhitespace) <$> B.readFile diff
diff <- diffFiles parser (sourceBlobs sources)
let diffOutput = renderer (sourceBlobs sources) diff
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diffOutput
expected <- (Verbatim . stripWhitespace) <$> B.readFile expectedOutput
actual `shouldBe` expected
where
parser = parserForFilepath filePath

View File

@ -110,19 +110,23 @@ spec = parallel $ do
describe "diffFiles" $ do
it "encodes to final JSON" $ do
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb")
let parser = parserForFilepath (path (fst sourceBlobs))
output <- diffFiles parser toc sourceBlobs
concatOutputs (pure output) `shouldBe` ("{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}" )
output <- diffOutput sourceBlobs
output `shouldBe` "{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}"
it "encodes to final JSON if there are parse errors" $ do
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")
let parser = parserForFilepath (path (fst sourceBlobs))
output <- diffFiles parser toc sourceBlobs
concatOutputs (pure output) `shouldBe` ("{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}" )
output <- diffOutput sourceBlobs
output `shouldBe` "{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}"
type Diff' = SyntaxDiff String '[Range, Category, SourceSpan]
type Term' = SyntaxTerm String '[Range, Category, SourceSpan]
diffOutput :: Both SourceBlob -> IO ByteString
diffOutput sourceBlobs = do
let parser = parserForFilepath (path (fst sourceBlobs))
diff <- diffFiles parser sourceBlobs
pure $ concatOutputs [toc sourceBlobs diff]
numTocSummaries :: Diff' -> Int
numTocSummaries diff = Prologue.length $ filter (not . isErrorSummary) (diffTOC blankDiffBlobs diff)
@ -184,17 +188,9 @@ isMethodOrFunction a = case runCofree (unListableF a) of
_ -> False
testDiff :: Both SourceBlob -> IO (Diff (Syntax Text) (Record '[Range, Category, SourceSpan]))
testDiff sourceBlobs = do
terms <- traverse (fmap (defaultFeatureVectorDecorator getLabel) . parser) sourceBlobs
pure $! stripDiff (diffTerms' terms sourceBlobs)
testDiff sourceBlobs = diffFiles parser sourceBlobs
where
parser = parserForFilepath (path . fst $ sourceBlobs)
diffTerms' terms blobs = case runBothWith areNullOids blobs of
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) -> runBothWith diffTerms terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || Source.null (source blob)
blobsForPaths :: Both FilePath -> IO (Both SourceBlob)
blobsForPaths paths = do