mirror of
https://github.com/github/semantic.git
synced 2024-11-24 17:04:47 +03:00
Merge pull request #308 from github/catfile-improvement
Speed up Semantic.Git.catFile and avoid crashes on non-UTF-8 input.
This commit is contained in:
commit
590767697c
@ -1,9 +1,11 @@
|
||||
{-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-}
|
||||
|
||||
module Main where
|
||||
module Evaluation (benchmarks) where
|
||||
|
||||
import Algebra.Graph
|
||||
import Control.Monad
|
||||
import Control.Carrier.Parse.Simple
|
||||
import qualified Data.Duration as Duration
|
||||
import Data.Abstract.Evaluatable
|
||||
import Data.Abstract.FreeVariables
|
||||
import Data.Blob
|
||||
@ -45,7 +47,7 @@ callGraphProject' :: ( Language.SLanguage lang
|
||||
-> IO (Either String (Data.Graph.Graph ControlFlowVertex))
|
||||
callGraphProject' session proxy parser path = fmap (first show) . runTask session $ do
|
||||
blob <- readBlobFromFile' (fileForRelPath path)
|
||||
package <- fmap snd <$> parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) [])
|
||||
package <- fmap snd <$> runParse (Duration.fromSeconds 10) (parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) []))
|
||||
modules <- topologicalSort <$> runImportGraphToModules proxy package
|
||||
runCallGraph proxy False modules package
|
||||
|
||||
@ -70,8 +72,8 @@ pyCall p = nfIO $ callGraphProject (Proxy @'Language.Python) pythonParser (Path.
|
||||
rbCall :: Path.RelFile -> Benchmarkable
|
||||
rbCall p = nfIO $ callGraphProject (Proxy @'Language.Ruby) rubyParser $ (Path.relDir "bench/bench-fixtures/ruby" </> p)
|
||||
|
||||
main :: IO ()
|
||||
main = defaultMain
|
||||
benchmarks :: Benchmark
|
||||
benchmarks = bgroup "evaluation"
|
||||
[ bgroup "python" [ bench "assignment" . pyEval $ Path.relFile "simple-assignment.py"
|
||||
, bench "function def" . pyEval $ Path.relFile "function-definition.py"
|
||||
, bench "if + function calls" . pyCall . Path.relFile $ "if-statement-functions.py"
|
9
bench/Main.hs
Normal file
9
bench/Main.hs
Normal file
@ -0,0 +1,9 @@
|
||||
module Main where
|
||||
|
||||
import Gauge
|
||||
import qualified Evaluation
|
||||
|
||||
main :: IO ()
|
||||
main = defaultMain
|
||||
[ Evaluation.benchmarks
|
||||
]
|
@ -1,3 +1,7 @@
|
||||
# 0.0.1.0
|
||||
|
||||
- Adds an `NFData` instance for `Source`.
|
||||
|
||||
# 0.0.0.1
|
||||
|
||||
- Loosens the upper bound on `hashable`.
|
||||
|
@ -32,6 +32,7 @@ module Source.Source
|
||||
import Prelude hiding (drop, take)
|
||||
|
||||
import Control.Arrow ((&&&))
|
||||
import Control.DeepSeq (NFData)
|
||||
import Data.Aeson (FromJSON (..), withText)
|
||||
import qualified Data.ByteString as B
|
||||
import Data.Char (ord)
|
||||
@ -50,7 +51,7 @@ import Source.Span (Span(Span), Pos(..))
|
||||
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
|
||||
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
|
||||
newtype Source = Source { bytes :: B.ByteString }
|
||||
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
|
||||
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic, NFData)
|
||||
|
||||
fromUTF8 :: B.ByteString -> Source
|
||||
fromUTF8 = Source
|
||||
|
@ -68,6 +68,7 @@ common dependencies
|
||||
, semilattices ^>= 0.0.0.3
|
||||
, shelly >= 1.5 && <2
|
||||
, streaming ^>= 0.2.2.0
|
||||
, streaming-bytestring ^>= 0.1.6
|
||||
, text ^>= 1.2.3.1
|
||||
, these >= 0.7 && <1
|
||||
, unix ^>= 2.7.2.2
|
||||
@ -311,6 +312,7 @@ library
|
||||
, semantic-tags ^>= 0
|
||||
, semigroupoids ^>= 5.3.2
|
||||
, split ^>= 0.2.3.3
|
||||
, streaming-process ^>= 0.1
|
||||
, stm-chans ^>= 3.0.0.4
|
||||
, template-haskell ^>= 2.14
|
||||
, time ^>= 1.8.0.2
|
||||
@ -410,15 +412,16 @@ test-suite parse-examples
|
||||
, foldl ^>= 1.4.5
|
||||
, resourcet ^>= 1.2
|
||||
, streaming
|
||||
, streaming-bytestring ^>= 0.1.6
|
||||
, streaming-bytestring
|
||||
, tasty
|
||||
, tasty-hunit
|
||||
|
||||
benchmark evaluation
|
||||
import: haskell, dependencies, executable-flags
|
||||
hs-source-dirs: bench/evaluation
|
||||
hs-source-dirs: bench
|
||||
type: exitcode-stdio-1.0
|
||||
main-is: Main.hs
|
||||
other-modules: Evaluation
|
||||
ghc-options: -static
|
||||
build-depends: base
|
||||
, algebraic-graphs
|
||||
|
@ -58,7 +58,7 @@ readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybe
|
||||
, not (pathIsMinified path)
|
||||
, path `notElem` excludePaths
|
||||
, null includePaths || path `elem` includePaths
|
||||
= Just . sourceBlob' path lang oid . Source.fromText <$> Git.catFile gitDir oid
|
||||
= Just . sourceBlob' path lang oid <$> Git.catFile gitDir oid
|
||||
blobFromTreeEntry _ _ = pure Nothing
|
||||
|
||||
sourceBlob' filepath language (Git.OID oid) source = makeBlob source filepath language oid
|
||||
|
@ -15,23 +15,32 @@ module Semantic.Git
|
||||
, parseEntry
|
||||
) where
|
||||
|
||||
import Control.Monad.IO.Class
|
||||
import Prologue
|
||||
|
||||
import Data.Attoparsec.Text (Parser)
|
||||
import Data.Attoparsec.Text as AP
|
||||
import qualified Data.ByteString.Streaming as ByteStream
|
||||
import Data.Char
|
||||
import Data.Either (fromRight)
|
||||
import Data.Text as Text
|
||||
import Shelly hiding (FilePath)
|
||||
import qualified Streaming.Process
|
||||
import qualified System.Process as Process (proc)
|
||||
import qualified Source.Source as Source
|
||||
|
||||
-- | git clone --bare
|
||||
clone :: Text -> FilePath -> IO ()
|
||||
clone url path = sh $ do
|
||||
run_ "git" ["clone", "--bare", url, pack path]
|
||||
|
||||
-- | git cat-file -p
|
||||
catFile :: FilePath -> OID -> IO Text
|
||||
catFile gitDir (OID oid) = sh $ do
|
||||
run "git" ["-C", pack gitDir, "cat-file", "-p", oid]
|
||||
-- | Runs @git cat-file -p@. Throws 'ProcessExitedUnsuccessfully' if the
|
||||
-- underlying git command returns a nonzero exit code. Loads the contents
|
||||
-- of the file into memory all at once and strictly.
|
||||
catFile :: FilePath -> OID -> IO Source.Source
|
||||
catFile gitDir (OID oid) =
|
||||
let process = Process.proc "git" ["-C", gitDir, "cat-file", "-p", Text.unpack oid]
|
||||
consumeStdout stream = Streaming.Process.withProcessOutput stream ByteStream.toStrict_
|
||||
in Source.fromUTF8 <$> Streaming.Process.withStreamProcess process consumeStdout
|
||||
|
||||
-- | git ls-tree -rz
|
||||
lsTree :: FilePath -> OID -> IO [TreeEntry]
|
||||
|
@ -15,6 +15,7 @@ import Data.Blob
|
||||
import Data.Handle
|
||||
import qualified Semantic.Git as Git
|
||||
import Shelly (cd, run_, shelly, silently, touchfile, writefile)
|
||||
import qualified Source.Source as Source
|
||||
import SpecHelpers
|
||||
import System.Path ((</>))
|
||||
import qualified System.Path as Path
|
||||
@ -42,7 +43,7 @@ spec = do
|
||||
trees <- Git.lsTree (dir <> "/.git") (Git.OID "HEAD")
|
||||
Just it <- pure $ find (\p -> "日本語" `isInfixOf` Git.treeEntryPath p) trees
|
||||
Git.catFile (dir <> "/.git") (Git.treeEntryOid it)
|
||||
("日本語" `Text.isInfixOf` result) `shouldBe` True
|
||||
Source.toText result `shouldSatisfy` ("日本語" `Text.isInfixOf`)
|
||||
|
||||
describe "lsTree" $ do
|
||||
hasGit <- runIO $ isJust <$> findExecutable "git"
|
||||
|
Loading…
Reference in New Issue
Block a user