1
1
mirror of https://github.com/github/semantic.git synced 2024-12-25 16:02:43 +03:00

Merge pull request #1042 from github/parallel-path-diffing

Turn on parallel path diffing
This commit is contained in:
Timothy Clem 2017-03-15 08:22:32 -07:00 committed by GitHub
commit b3f14f14da
12 changed files with 97 additions and 101 deletions

View File

@ -1,4 +1,5 @@
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Category where
@ -227,7 +228,7 @@ data Category
| Modifier Category
-- | A singleton method declaration, e.g. `def self.foo;end` in Ruby
| SingletonMethod
deriving (Eq, Generic, Ord, Show)
deriving (Eq, Generic, Ord, Show, NFData)
{-# DEPRECATED RescueModifier "Deprecated; use Modifier Rescue instead." #-}

View File

@ -31,3 +31,5 @@ instance (Semigroup a, Monoid a) => Monoid (Join (,) a) where
instance (Semigroup a) => Semigroup (Join (,) a) where
a <> b = Join $ runJoin a <> runJoin b
instance NFData a => NFData (Join (,) a)

View File

@ -51,6 +51,11 @@ instance {-# OVERLAPPABLE #-} HasField (field ': fields) field where
getField (h :. _) = h
setField (_ :. t) f = f :. t
instance (NFData h, NFData (Record t)) => NFData (Record (h ': t)) where
rnf (h :. t) = rnf h `seq` rnf t `seq` ()
instance NFData (Record '[]) where
rnf _ = ()
instance (Show h, Show (Record t)) => Show (Record (h ': t)) where
showsPrec n (h :. t) = showParen (n > 0) $ showsPrec 1 h . (" :. " <>) . shows t

View File

@ -1,4 +1,5 @@
{-# LANGUAGE TypeSynonymInstances, ScopedTypeVariables #-}
{-# LANGUAGE TypeSynonymInstances, ScopedTypeVariables, UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Diff where
import Prologue
@ -50,3 +51,8 @@ modifyAnnotations :: (Functor f, Functor g) => (annotation -> annotation) -> Fre
modifyAnnotations f r = case runFree r of
Free (ga :< functor) -> wrap (fmap f ga :< functor)
_ -> r
instance (NFData (f (Diff f a)), NFData (Cofree f a), NFData a, Functor f) => NFData (Diff f a) where
rnf fa = case runFree fa of
Free f -> rnf f `seq` ()
Pure a -> rnf a `seq` ()

View File

@ -1,3 +1,4 @@
{-# LANGUAGE DataKinds #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module DiffCommand where
@ -5,12 +6,10 @@ import Data.Aeson hiding (json)
import Data.Functor.Both as Both
import Data.List ((\\))
import Data.String
import Data.Text.Encoding (encodeUtf8)
import GHC.Conc (numCapabilities)
import Prologue hiding (fst, snd, null)
import qualified Control.Concurrent.Async.Pool as Async
import System.FilePath.Posix (hasExtension)
import System.Timeout (timeout)
import Git.Blob
import Git.Libgit2
import Git.Libgit2.Backend
@ -22,6 +21,7 @@ import Category
import Data.RandomWalkSimilarity
import Data.Record
import Info
import Diff
import Interpreter
import ParseCommand (parserForFilepath)
import Parser
@ -35,7 +35,7 @@ import Renderer.Summary
import Renderer.TOC
import Source
import Syntax
import Term
import Debug.Trace
diff :: Arguments -> IO ByteString
diff args@Arguments{..} = case diffMode of
@ -45,11 +45,8 @@ diff args@Arguments{..} = case diffMode of
-- | Compare changes between two commits.
diffCommits :: Arguments -> IO ByteString
diffCommits args@Arguments{..} = do
ts <- fetchTerms args
pure $ maybe mempty concatOutputs ts
where fetchTerms args = if developmentMode
then Just <$> fetchDiffs args
else timeout timeoutInMicroseconds (fetchDiffs args)
outputs <- fetchDiffs args
pure $! concatOutputs outputs
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
diffPaths :: Arguments -> Both FilePath -> IO ByteString
@ -66,40 +63,38 @@ fetchDiffs args@Arguments{..} = do
([], Join (Just a, Just b)) -> pathsToDiff args (both a b)
(ps, _) -> pure ps
Async.withTaskGroup numCapabilities $ \p -> Async.mapTasks p (fetchDiff args <$> paths)
diffs <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
fetchDiff args <$> paths
pure $ uncurry (renderDiff args) <$> diffs
fetchDiff :: Arguments -> FilePath -> IO Output
fetchDiff :: Arguments -> FilePath -> IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
fetchDiff args@Arguments{..} filepath = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (fetchDiff' args filepath) repo
fetchDiff' :: Arguments -> FilePath -> ReaderT LgRepo IO Output
fetchDiff' args@Arguments{..} filepath = do
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
let sources = fromMaybe (emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = idOrEmptySourceBlob <$> sources
text <- liftIO . render $ textDiff (parserForFilepath filepath) args sourceBlobs
truncatedPatch <- liftIO $ truncatedDiff args sourceBlobs
pure $ fromMaybe truncatedPatch text
lift $ runReaderT (go args filepath) repo
where
render output = if developmentMode
then Just <$> output
else timeout timeoutInMicroseconds output
go :: Arguments -> FilePath -> ReaderT LgRepo IO (Both SourceBlob, SyntaxDiff Text '[Range, Category, SourceSpan])
go Arguments{..} filepath = do
liftIO $ traceEventIO ("START fetchDiff: " <> filepath)
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
let sources = fromMaybe (emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = idOrEmptySourceBlob <$> sources
diff <- liftIO $ diffFiles (parserForFilepath filepath) sourceBlobs
pure $! traceEvent ("END fetchDiff: " <> filepath) (sourceBlobs, diff)
-- | Returns a list of relative file paths that have changed between the given commit shas.
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (pathsToDiff' shas) repo
-- | Returns a list of relative file paths that have changed between the given commit shas.
pathsToDiff' :: Both String -> ReaderT LgRepo IO [FilePath]
pathsToDiff' shas = do
entries <- blobEntriesToDiff shas
pure $ (\(p, _, _) -> toS p) <$> entries
lift $ runReaderT (go shas) repo
where
go :: Both String -> ReaderT LgRepo IO [FilePath]
go shas = do
entries <- blobEntriesToDiff shas
pure $ (\(p, _, _) -> toS p) <$> entries
-- | Returns a list of blob entries that have changed between the given commits shas.
blobEntriesToDiff :: Both String -> ReaderT LgRepo IO [(TreeFilePath, Git.BlobOid LgRepo, BlobKind)]
@ -136,20 +131,19 @@ getSourceBlob path sha = do
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode
-- | Given a parser and renderer, diff two sources and return the rendered
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
diffFiles :: HasField fields Category
-- | Given a parser, diff two sources and return a SyntaxDiff.
-- | Returns the rendered result strictly, so it's always fully evaluated with respect to other IO actions.
diffFiles :: (HasField fields Category, NFData (Record fields))
=> Parser (Syntax Text) (Record fields)
-> Renderer (Record fields)
-> Both SourceBlob
-> IO Output
diffFiles parse render sourceBlobs = do
-> IO (SyntaxDiff Text fields)
diffFiles parse sourceBlobs = do
traceEventIO $ "diffFiles@SEMANTIC-DIFF START parse terms: " <> paths
terms <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
(fmap (defaultFeatureVectorDecorator getLabel) . parse) <$> sourceBlobs
pure $! render sourceBlobs (stripDiff (diffTerms' terms))
traceEventIO $ "diffFiles@SEMANTIC-DIFF END parse terms: " <> paths
traceEventIO $ "diffFiles@SEMANTIC-DIFF START diff terms: " <> paths
traceEvent ("diffFiles@SEMANTIC-DIFF END diff terms: " <> paths) pure $!! stripDiff (diffTerms' terms)
where
diffTerms' terms = case runBothWith areNullOids sourceBlobs of
(True, False) -> pure $ Insert (snd terms)
@ -158,19 +152,18 @@ diffFiles parse render sourceBlobs = do
runBothWith diffTerms terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || null (source blob)
-- For trace events
paths = runBothWith (\a b -> fileAtSha a <> " -> " <> fileAtSha b) sourceBlobs
fileAtSha x = path x <> "@" <> toS (oid x)
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Determine whether two terms are comparable based on the equality of their categories.
compareCategoryEq :: Functor f => HasField fields Category => Term f (Record fields) -> Term f (Record fields) -> Bool
compareCategoryEq = (==) `on` category . extract
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
textDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO Output
textDiff parser arguments = diffFiles parser $ case format arguments of
-- | Returns a rendered diff given arguments and two source blobs.
renderDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Arguments -> Both SourceBlob -> SyntaxDiff Text fields -> Output
renderDiff args = case format args of
Split -> split
Patch -> patch
SExpression -> sExpression TreeOnly
@ -178,24 +171,8 @@ textDiff parser arguments = diffFiles parser $ case format arguments of
Summary -> summary
TOC -> toc
-- | Returns a truncated diff given diff arguments and two source blobs.
truncatedDiff :: Arguments -> Both SourceBlob -> IO Output
truncatedDiff Arguments{..} sources = pure $ case format of
Split -> SplitOutput mempty
Patch -> PatchOutput (truncatePatch sources)
SExpression -> SExpressionOutput mempty
JSON -> JSONOutput mempty
Summary -> SummaryOutput mempty
TOC -> TOCOutput mempty
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
printDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
printDiff parser arguments sources = do
rendered <- textDiff parser arguments sources
pure $ case rendered of
SplitOutput text -> encodeUtf8 text
PatchOutput text -> encodeUtf8 text
SExpressionOutput text -> text
JSONOutput series -> toS $ encode series
SummaryOutput summaries -> toS $ encode summaries
TOCOutput summaries -> toS $ encode summaries
-- | Prints a rendered diff to stdio or a filepath given a parser, arguments and two source blobs.
printDiff :: (ToJSON (Record fields), NFData (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
printDiff parser args sources = do
diff <- diffFiles parser sources
pure $! concatOutputs [renderDiff args sources diff]

View File

@ -1,3 +1,4 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Patch
( Patch(..)
@ -24,7 +25,7 @@ data Patch a
= Replace a a
| Insert a
| Delete a
deriving (Eq, Foldable, Functor, Generic, Ord, Show, Traversable)
deriving (Eq, Foldable, Functor, Generic, Ord, Show, Traversable, NFData)
-- DSL

View File

@ -1,3 +1,4 @@
{-# LANGUAGE DeriveAnyClass #-}
module Range where
import qualified Data.Char as Char
@ -10,7 +11,7 @@ import Test.LeanCheck
-- | A half-open interval of integers, defined by start & end indices.
data Range = Range { start :: Int, end :: Int }
deriving (Eq, Show, Generic)
deriving (Eq, Show, Generic, NFData)
-- | Make a range at a given index.
rangeAt :: Int -> Range

View File

@ -26,7 +26,7 @@ data SourcePos = SourcePos
-- Column number
--
, column :: Int
} deriving (Show, Read, Eq, Ord, Generic, Hashable)
} deriving (Show, Read, Eq, Ord, Generic, Hashable, NFData)
displaySourcePos :: SourcePos -> Text
displaySourcePos SourcePos{..} =
@ -49,7 +49,7 @@ data SourceSpan = SourceSpan
-- End of the span
--
, spanEnd :: SourcePos
} deriving (Show, Read, Eq, Ord, Generic, Hashable)
} deriving (Show, Read, Eq, Ord, Generic, Hashable, NFData)
displayStartEndPos :: SourceSpan -> Text
displayStartEndPos sp =

View File

@ -106,7 +106,7 @@ data Syntax a f
| Ty [f]
-- | A send statement has a channel and an expression in Go.
| Send f f
deriving (Eq, Foldable, Functor, Generic, Generic1, Mergeable, Ord, Show, Traversable, ToJSON)
deriving (Eq, Foldable, Functor, Generic, Generic1, Mergeable, Ord, Show, Traversable, ToJSON, NFData)
-- Instances

View File

@ -1,4 +1,4 @@
{-# LANGUAGE RankNTypes, TypeFamilies, TypeSynonymInstances #-}
{-# LANGUAGE RankNTypes, TypeFamilies, TypeSynonymInstances, UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Term where
@ -17,6 +17,12 @@ type TermF = CofreeF
type SyntaxTerm leaf fields = Term (Syntax leaf) (Record fields)
type SyntaxTermF leaf fields = TermF (Syntax leaf) (Record fields)
instance (NFData (f (Cofree f a)), NFData a, Functor f) => NFData (Cofree f a) where
rnf = rnf . runCofree
instance (NFData a, NFData (f b)) => NFData (CofreeF f a b) where
rnf (a :< s) = rnf a `seq` rnf s `seq` ()
-- | Zip two terms by combining their annotations into a pair of annotations.
-- | If the structure of the two terms don't match, then Nothing will be returned.
zipTerms :: (Traversable f, GAlign f) => Term f annotation -> Term f annotation -> Maybe (Term f (Both annotation))

View File

@ -115,11 +115,12 @@ testParse path expectedOutput = do
actual `shouldBe` expected
testDiff :: Renderer (Record '[Range, Category, SourceSpan]) -> Both FilePath -> FilePath -> Expectation
testDiff renderer paths diff = do
testDiff renderer paths expectedOutput = do
sources <- traverse readAndTranscodeFile' paths
diff' <- diffFiles parser renderer (sourceBlobs sources)
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diff'
expected <- (Verbatim . stripWhitespace) <$> B.readFile diff
diff <- diffFiles parser (sourceBlobs sources)
let diffOutput = renderer (sourceBlobs sources) diff
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diffOutput
expected <- (Verbatim . stripWhitespace) <$> B.readFile expectedOutput
actual `shouldBe` expected
where
parser = parserForFilepath filePath

View File

@ -110,19 +110,23 @@ spec = parallel $ do
describe "diffFiles" $ do
it "encodes to final JSON" $ do
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb")
let parser = parserForFilepath (path (fst sourceBlobs))
output <- diffFiles parser toc sourceBlobs
concatOutputs (pure output) `shouldBe` ("{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}" )
output <- diffOutput sourceBlobs
output `shouldBe` "{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}"
it "encodes to final JSON if there are parse errors" $ do
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")
let parser = parserForFilepath (path (fst sourceBlobs))
output <- diffFiles parser toc sourceBlobs
concatOutputs (pure output) `shouldBe` ("{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}" )
output <- diffOutput sourceBlobs
output `shouldBe` "{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}"
type Diff' = SyntaxDiff String '[Range, Category, SourceSpan]
type Term' = SyntaxTerm String '[Range, Category, SourceSpan]
diffOutput :: Both SourceBlob -> IO ByteString
diffOutput sourceBlobs = do
let parser = parserForFilepath (path (fst sourceBlobs))
diff <- diffFiles parser sourceBlobs
pure $ concatOutputs [toc sourceBlobs diff]
numTocSummaries :: Diff' -> Int
numTocSummaries diff = Prologue.length $ filter (not . isErrorSummary) (diffTOC blankDiffBlobs diff)
@ -184,17 +188,9 @@ isMethodOrFunction a = case runCofree (unListableF a) of
_ -> False
testDiff :: Both SourceBlob -> IO (Diff (Syntax Text) (Record '[Range, Category, SourceSpan]))
testDiff sourceBlobs = do
terms <- traverse (fmap (defaultFeatureVectorDecorator getLabel) . parser) sourceBlobs
pure $! stripDiff (diffTerms' terms sourceBlobs)
testDiff sourceBlobs = diffFiles parser sourceBlobs
where
parser = parserForFilepath (path . fst $ sourceBlobs)
diffTerms' terms blobs = case runBothWith areNullOids blobs of
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) -> runBothWith diffTerms terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || Source.null (source blob)
blobsForPaths :: Both FilePath -> IO (Both SourceBlob)
blobsForPaths paths = do