1
1
mirror of https://github.com/github/semantic.git synced 2024-11-28 10:15:55 +03:00

Merge pull request #308 from github/catfile-improvement

Speed up Semantic.Git.catFile and avoid crashes on non-UTF-8 input.
This commit is contained in:
Patrick Thomson 2019-10-04 17:36:14 -04:00 committed by GitHub
commit 590767697c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 49 additions and 20 deletions

View File

@ -1,9 +1,11 @@
{-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-} {-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-}
module Main where module Evaluation (benchmarks) where
import Algebra.Graph import Algebra.Graph
import Control.Monad import Control.Monad
import Control.Carrier.Parse.Simple
import qualified Data.Duration as Duration
import Data.Abstract.Evaluatable import Data.Abstract.Evaluatable
import Data.Abstract.FreeVariables import Data.Abstract.FreeVariables
import Data.Blob import Data.Blob
@ -45,7 +47,7 @@ callGraphProject' :: ( Language.SLanguage lang
-> IO (Either String (Data.Graph.Graph ControlFlowVertex)) -> IO (Either String (Data.Graph.Graph ControlFlowVertex))
callGraphProject' session proxy parser path = fmap (first show) . runTask session $ do callGraphProject' session proxy parser path = fmap (first show) . runTask session $ do
blob <- readBlobFromFile' (fileForRelPath path) blob <- readBlobFromFile' (fileForRelPath path)
package <- fmap snd <$> parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) []) package <- fmap snd <$> runParse (Duration.fromSeconds 10) (parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) []))
modules <- topologicalSort <$> runImportGraphToModules proxy package modules <- topologicalSort <$> runImportGraphToModules proxy package
runCallGraph proxy False modules package runCallGraph proxy False modules package
@ -70,8 +72,8 @@ pyCall p = nfIO $ callGraphProject (Proxy @'Language.Python) pythonParser (Path.
rbCall :: Path.RelFile -> Benchmarkable rbCall :: Path.RelFile -> Benchmarkable
rbCall p = nfIO $ callGraphProject (Proxy @'Language.Ruby) rubyParser $ (Path.relDir "bench/bench-fixtures/ruby" </> p) rbCall p = nfIO $ callGraphProject (Proxy @'Language.Ruby) rubyParser $ (Path.relDir "bench/bench-fixtures/ruby" </> p)
main :: IO () benchmarks :: Benchmark
main = defaultMain benchmarks = bgroup "evaluation"
[ bgroup "python" [ bench "assignment" . pyEval $ Path.relFile "simple-assignment.py" [ bgroup "python" [ bench "assignment" . pyEval $ Path.relFile "simple-assignment.py"
, bench "function def" . pyEval $ Path.relFile "function-definition.py" , bench "function def" . pyEval $ Path.relFile "function-definition.py"
, bench "if + function calls" . pyCall . Path.relFile $ "if-statement-functions.py" , bench "if + function calls" . pyCall . Path.relFile $ "if-statement-functions.py"

9
bench/Main.hs Normal file
View File

@ -0,0 +1,9 @@
module Main where
import Gauge
import qualified Evaluation
main :: IO ()
main = defaultMain
[ Evaluation.benchmarks
]

View File

@ -1,3 +1,7 @@
# 0.0.1.0
- Adds an `NFData` instance for `Source`.
# 0.0.0.1 # 0.0.0.1
- Loosens the upper bound on `hashable`. - Loosens the upper bound on `hashable`.

View File

@ -32,6 +32,7 @@ module Source.Source
import Prelude hiding (drop, take) import Prelude hiding (drop, take)
import Control.Arrow ((&&&)) import Control.Arrow ((&&&))
import Control.DeepSeq (NFData)
import Data.Aeson (FromJSON (..), withText) import Data.Aeson (FromJSON (..), withText)
import qualified Data.ByteString as B import qualified Data.ByteString as B
import Data.Char (ord) import Data.Char (ord)
@ -50,7 +51,7 @@ import Source.Span (Span(Span), Pos(..))
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously, -- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes. -- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
newtype Source = Source { bytes :: B.ByteString } newtype Source = Source { bytes :: B.ByteString }
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic) deriving (Eq, Semigroup, Monoid, IsString, Show, Generic, NFData)
fromUTF8 :: B.ByteString -> Source fromUTF8 :: B.ByteString -> Source
fromUTF8 = Source fromUTF8 = Source

View File

@ -68,6 +68,7 @@ common dependencies
, semilattices ^>= 0.0.0.3 , semilattices ^>= 0.0.0.3
, shelly >= 1.5 && <2 , shelly >= 1.5 && <2
, streaming ^>= 0.2.2.0 , streaming ^>= 0.2.2.0
, streaming-bytestring ^>= 0.1.6
, text ^>= 1.2.3.1 , text ^>= 1.2.3.1
, these >= 0.7 && <1 , these >= 0.7 && <1
, unix ^>= 2.7.2.2 , unix ^>= 2.7.2.2
@ -311,6 +312,7 @@ library
, semantic-tags ^>= 0 , semantic-tags ^>= 0
, semigroupoids ^>= 5.3.2 , semigroupoids ^>= 5.3.2
, split ^>= 0.2.3.3 , split ^>= 0.2.3.3
, streaming-process ^>= 0.1
, stm-chans ^>= 3.0.0.4 , stm-chans ^>= 3.0.0.4
, template-haskell ^>= 2.14 , template-haskell ^>= 2.14
, time ^>= 1.8.0.2 , time ^>= 1.8.0.2
@ -410,15 +412,16 @@ test-suite parse-examples
, foldl ^>= 1.4.5 , foldl ^>= 1.4.5
, resourcet ^>= 1.2 , resourcet ^>= 1.2
, streaming , streaming
, streaming-bytestring ^>= 0.1.6 , streaming-bytestring
, tasty , tasty
, tasty-hunit , tasty-hunit
benchmark evaluation benchmark evaluation
import: haskell, dependencies, executable-flags import: haskell, dependencies, executable-flags
hs-source-dirs: bench/evaluation hs-source-dirs: bench
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
main-is: Main.hs main-is: Main.hs
other-modules: Evaluation
ghc-options: -static ghc-options: -static
build-depends: base build-depends: base
, algebraic-graphs , algebraic-graphs

View File

@ -58,7 +58,7 @@ readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybe
, not (pathIsMinified path) , not (pathIsMinified path)
, path `notElem` excludePaths , path `notElem` excludePaths
, null includePaths || path `elem` includePaths , null includePaths || path `elem` includePaths
= Just . sourceBlob' path lang oid . Source.fromText <$> Git.catFile gitDir oid = Just . sourceBlob' path lang oid <$> Git.catFile gitDir oid
blobFromTreeEntry _ _ = pure Nothing blobFromTreeEntry _ _ = pure Nothing
sourceBlob' filepath language (Git.OID oid) source = makeBlob source filepath language oid sourceBlob' filepath language (Git.OID oid) source = makeBlob source filepath language oid

View File

@ -15,23 +15,32 @@ module Semantic.Git
, parseEntry , parseEntry
) where ) where
import Control.Monad.IO.Class import Prologue
import Data.Attoparsec.Text (Parser) import Data.Attoparsec.Text (Parser)
import Data.Attoparsec.Text as AP import Data.Attoparsec.Text as AP
import qualified Data.ByteString.Streaming as ByteStream
import Data.Char import Data.Char
import Data.Either (fromRight) import Data.Either (fromRight)
import Data.Text as Text import Data.Text as Text
import Shelly hiding (FilePath) import Shelly hiding (FilePath)
import qualified Streaming.Process
import qualified System.Process as Process (proc)
import qualified Source.Source as Source
-- | git clone --bare -- | git clone --bare
clone :: Text -> FilePath -> IO () clone :: Text -> FilePath -> IO ()
clone url path = sh $ do clone url path = sh $ do
run_ "git" ["clone", "--bare", url, pack path] run_ "git" ["clone", "--bare", url, pack path]
-- | git cat-file -p -- | Runs @git cat-file -p@. Throws 'ProcessExitedUnsuccessfully' if the
catFile :: FilePath -> OID -> IO Text -- underlying git command returns a nonzero exit code. Loads the contents
catFile gitDir (OID oid) = sh $ do -- of the file into memory all at once and strictly.
run "git" ["-C", pack gitDir, "cat-file", "-p", oid] catFile :: FilePath -> OID -> IO Source.Source
catFile gitDir (OID oid) =
let process = Process.proc "git" ["-C", gitDir, "cat-file", "-p", Text.unpack oid]
consumeStdout stream = Streaming.Process.withProcessOutput stream ByteStream.toStrict_
in Source.fromUTF8 <$> Streaming.Process.withStreamProcess process consumeStdout
-- | git ls-tree -rz -- | git ls-tree -rz
lsTree :: FilePath -> OID -> IO [TreeEntry] lsTree :: FilePath -> OID -> IO [TreeEntry]

View File

@ -15,6 +15,7 @@ import Data.Blob
import Data.Handle import Data.Handle
import qualified Semantic.Git as Git import qualified Semantic.Git as Git
import Shelly (cd, run_, shelly, silently, touchfile, writefile) import Shelly (cd, run_, shelly, silently, touchfile, writefile)
import qualified Source.Source as Source
import SpecHelpers import SpecHelpers
import System.Path ((</>)) import System.Path ((</>))
import qualified System.Path as Path import qualified System.Path as Path
@ -42,7 +43,7 @@ spec = do
trees <- Git.lsTree (dir <> "/.git") (Git.OID "HEAD") trees <- Git.lsTree (dir <> "/.git") (Git.OID "HEAD")
Just it <- pure $ find (\p -> "日本語" `isInfixOf` Git.treeEntryPath p) trees Just it <- pure $ find (\p -> "日本語" `isInfixOf` Git.treeEntryPath p) trees
Git.catFile (dir <> "/.git") (Git.treeEntryOid it) Git.catFile (dir <> "/.git") (Git.treeEntryOid it)
("日本語" `Text.isInfixOf` result) `shouldBe` True Source.toText result `shouldSatisfy` ("日本語" `Text.isInfixOf`)
describe "lsTree" $ do describe "lsTree" $ do
hasGit <- runIO $ isJust <$> findExecutable "git" hasGit <- runIO $ isJust <$> findExecutable "git"