1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 17:04:47 +03:00

Merge pull request #308 from github/catfile-improvement

Speed up Semantic.Git.catFile and avoid crashes on non-UTF-8 input.
This commit is contained in:
Patrick Thomson 2019-10-04 17:36:14 -04:00 committed by GitHub
commit 590767697c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 49 additions and 20 deletions

View File

@ -1,9 +1,11 @@
{-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-}
module Main where
module Evaluation (benchmarks) where
import Algebra.Graph
import Control.Monad
import Control.Carrier.Parse.Simple
import qualified Data.Duration as Duration
import Data.Abstract.Evaluatable
import Data.Abstract.FreeVariables
import Data.Blob
@ -45,7 +47,7 @@ callGraphProject' :: ( Language.SLanguage lang
-> IO (Either String (Data.Graph.Graph ControlFlowVertex))
callGraphProject' session proxy parser path = fmap (first show) . runTask session $ do
blob <- readBlobFromFile' (fileForRelPath path)
package <- fmap snd <$> parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) [])
package <- fmap snd <$> runParse (Duration.fromSeconds 10) (parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) []))
modules <- topologicalSort <$> runImportGraphToModules proxy package
runCallGraph proxy False modules package
@ -70,8 +72,8 @@ pyCall p = nfIO $ callGraphProject (Proxy @'Language.Python) pythonParser (Path.
rbCall :: Path.RelFile -> Benchmarkable
rbCall p = nfIO $ callGraphProject (Proxy @'Language.Ruby) rubyParser $ (Path.relDir "bench/bench-fixtures/ruby" </> p)
main :: IO ()
main = defaultMain
benchmarks :: Benchmark
benchmarks = bgroup "evaluation"
[ bgroup "python" [ bench "assignment" . pyEval $ Path.relFile "simple-assignment.py"
, bench "function def" . pyEval $ Path.relFile "function-definition.py"
, bench "if + function calls" . pyCall . Path.relFile $ "if-statement-functions.py"

9
bench/Main.hs Normal file
View File

@ -0,0 +1,9 @@
module Main where
import Gauge
import qualified Evaluation
main :: IO ()
main = defaultMain
[ Evaluation.benchmarks
]

View File

@ -1,3 +1,7 @@
# 0.0.1.0
- Adds an `NFData` instance for `Source`.
# 0.0.0.1
- Loosens the upper bound on `hashable`.

View File

@ -32,6 +32,7 @@ module Source.Source
import Prelude hiding (drop, take)
import Control.Arrow ((&&&))
import Control.DeepSeq (NFData)
import Data.Aeson (FromJSON (..), withText)
import qualified Data.ByteString as B
import Data.Char (ord)
@ -50,7 +51,7 @@ import Source.Span (Span(Span), Pos(..))
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
newtype Source = Source { bytes :: B.ByteString }
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic, NFData)
fromUTF8 :: B.ByteString -> Source
fromUTF8 = Source

View File

@ -68,6 +68,7 @@ common dependencies
, semilattices ^>= 0.0.0.3
, shelly >= 1.5 && <2
, streaming ^>= 0.2.2.0
, streaming-bytestring ^>= 0.1.6
, text ^>= 1.2.3.1
, these >= 0.7 && <1
, unix ^>= 2.7.2.2
@ -311,6 +312,7 @@ library
, semantic-tags ^>= 0
, semigroupoids ^>= 5.3.2
, split ^>= 0.2.3.3
, streaming-process ^>= 0.1
, stm-chans ^>= 3.0.0.4
, template-haskell ^>= 2.14
, time ^>= 1.8.0.2
@ -410,15 +412,16 @@ test-suite parse-examples
, foldl ^>= 1.4.5
, resourcet ^>= 1.2
, streaming
, streaming-bytestring ^>= 0.1.6
, streaming-bytestring
, tasty
, tasty-hunit
benchmark evaluation
import: haskell, dependencies, executable-flags
hs-source-dirs: bench/evaluation
hs-source-dirs: bench
type: exitcode-stdio-1.0
main-is: Main.hs
other-modules: Evaluation
ghc-options: -static
build-depends: base
, algebraic-graphs

View File

@ -58,7 +58,7 @@ readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybe
, not (pathIsMinified path)
, path `notElem` excludePaths
, null includePaths || path `elem` includePaths
= Just . sourceBlob' path lang oid . Source.fromText <$> Git.catFile gitDir oid
= Just . sourceBlob' path lang oid <$> Git.catFile gitDir oid
blobFromTreeEntry _ _ = pure Nothing
sourceBlob' filepath language (Git.OID oid) source = makeBlob source filepath language oid

View File

@ -15,23 +15,32 @@ module Semantic.Git
, parseEntry
) where
import Control.Monad.IO.Class
import Prologue
import Data.Attoparsec.Text (Parser)
import Data.Attoparsec.Text as AP
import qualified Data.ByteString.Streaming as ByteStream
import Data.Char
import Data.Either (fromRight)
import Data.Text as Text
import Shelly hiding (FilePath)
import qualified Streaming.Process
import qualified System.Process as Process (proc)
import qualified Source.Source as Source
-- | git clone --bare
clone :: Text -> FilePath -> IO ()
clone url path = sh $ do
run_ "git" ["clone", "--bare", url, pack path]
-- | git cat-file -p
catFile :: FilePath -> OID -> IO Text
catFile gitDir (OID oid) = sh $ do
run "git" ["-C", pack gitDir, "cat-file", "-p", oid]
-- | Runs @git cat-file -p@. Throws 'ProcessExitedUnsuccessfully' if the
-- underlying git command returns a nonzero exit code. Loads the contents
-- of the file into memory all at once and strictly.
catFile :: FilePath -> OID -> IO Source.Source
catFile gitDir (OID oid) =
let process = Process.proc "git" ["-C", gitDir, "cat-file", "-p", Text.unpack oid]
consumeStdout stream = Streaming.Process.withProcessOutput stream ByteStream.toStrict_
in Source.fromUTF8 <$> Streaming.Process.withStreamProcess process consumeStdout
-- | git ls-tree -rz
lsTree :: FilePath -> OID -> IO [TreeEntry]

View File

@ -15,6 +15,7 @@ import Data.Blob
import Data.Handle
import qualified Semantic.Git as Git
import Shelly (cd, run_, shelly, silently, touchfile, writefile)
import qualified Source.Source as Source
import SpecHelpers
import System.Path ((</>))
import qualified System.Path as Path
@ -42,7 +43,7 @@ spec = do
trees <- Git.lsTree (dir <> "/.git") (Git.OID "HEAD")
Just it <- pure $ find (\p -> "日本語" `isInfixOf` Git.treeEntryPath p) trees
Git.catFile (dir <> "/.git") (Git.treeEntryOid it)
("日本語" `Text.isInfixOf` result) `shouldBe` True
Source.toText result `shouldSatisfy` ("日本語" `Text.isInfixOf`)
describe "lsTree" $ do
hasGit <- runIO $ isJust <$> findExecutable "git"