mirror of
https://github.com/github/semantic.git
synced 2024-11-28 10:15:55 +03:00
Merge pull request #308 from github/catfile-improvement
Speed up Semantic.Git.catFile and avoid crashes on non-UTF-8 input.
This commit is contained in:
commit
590767697c
@ -1,9 +1,11 @@
|
|||||||
{-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-}
|
{-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-}
|
||||||
|
|
||||||
module Main where
|
module Evaluation (benchmarks) where
|
||||||
|
|
||||||
import Algebra.Graph
|
import Algebra.Graph
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
import Control.Carrier.Parse.Simple
|
||||||
|
import qualified Data.Duration as Duration
|
||||||
import Data.Abstract.Evaluatable
|
import Data.Abstract.Evaluatable
|
||||||
import Data.Abstract.FreeVariables
|
import Data.Abstract.FreeVariables
|
||||||
import Data.Blob
|
import Data.Blob
|
||||||
@ -45,7 +47,7 @@ callGraphProject' :: ( Language.SLanguage lang
|
|||||||
-> IO (Either String (Data.Graph.Graph ControlFlowVertex))
|
-> IO (Either String (Data.Graph.Graph ControlFlowVertex))
|
||||||
callGraphProject' session proxy parser path = fmap (first show) . runTask session $ do
|
callGraphProject' session proxy parser path = fmap (first show) . runTask session $ do
|
||||||
blob <- readBlobFromFile' (fileForRelPath path)
|
blob <- readBlobFromFile' (fileForRelPath path)
|
||||||
package <- fmap snd <$> parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) [])
|
package <- fmap snd <$> runParse (Duration.fromSeconds 10) (parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) []))
|
||||||
modules <- topologicalSort <$> runImportGraphToModules proxy package
|
modules <- topologicalSort <$> runImportGraphToModules proxy package
|
||||||
runCallGraph proxy False modules package
|
runCallGraph proxy False modules package
|
||||||
|
|
||||||
@ -70,8 +72,8 @@ pyCall p = nfIO $ callGraphProject (Proxy @'Language.Python) pythonParser (Path.
|
|||||||
rbCall :: Path.RelFile -> Benchmarkable
|
rbCall :: Path.RelFile -> Benchmarkable
|
||||||
rbCall p = nfIO $ callGraphProject (Proxy @'Language.Ruby) rubyParser $ (Path.relDir "bench/bench-fixtures/ruby" </> p)
|
rbCall p = nfIO $ callGraphProject (Proxy @'Language.Ruby) rubyParser $ (Path.relDir "bench/bench-fixtures/ruby" </> p)
|
||||||
|
|
||||||
main :: IO ()
|
benchmarks :: Benchmark
|
||||||
main = defaultMain
|
benchmarks = bgroup "evaluation"
|
||||||
[ bgroup "python" [ bench "assignment" . pyEval $ Path.relFile "simple-assignment.py"
|
[ bgroup "python" [ bench "assignment" . pyEval $ Path.relFile "simple-assignment.py"
|
||||||
, bench "function def" . pyEval $ Path.relFile "function-definition.py"
|
, bench "function def" . pyEval $ Path.relFile "function-definition.py"
|
||||||
, bench "if + function calls" . pyCall . Path.relFile $ "if-statement-functions.py"
|
, bench "if + function calls" . pyCall . Path.relFile $ "if-statement-functions.py"
|
9
bench/Main.hs
Normal file
9
bench/Main.hs
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
module Main where
|
||||||
|
|
||||||
|
import Gauge
|
||||||
|
import qualified Evaluation
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = defaultMain
|
||||||
|
[ Evaluation.benchmarks
|
||||||
|
]
|
@ -1,3 +1,7 @@
|
|||||||
|
# 0.0.1.0
|
||||||
|
|
||||||
|
- Adds an `NFData` instance for `Source`.
|
||||||
|
|
||||||
# 0.0.0.1
|
# 0.0.0.1
|
||||||
|
|
||||||
- Loosens the upper bound on `hashable`.
|
- Loosens the upper bound on `hashable`.
|
||||||
|
@ -32,6 +32,7 @@ module Source.Source
|
|||||||
import Prelude hiding (drop, take)
|
import Prelude hiding (drop, take)
|
||||||
|
|
||||||
import Control.Arrow ((&&&))
|
import Control.Arrow ((&&&))
|
||||||
|
import Control.DeepSeq (NFData)
|
||||||
import Data.Aeson (FromJSON (..), withText)
|
import Data.Aeson (FromJSON (..), withText)
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import Data.Char (ord)
|
import Data.Char (ord)
|
||||||
@ -50,7 +51,7 @@ import Source.Span (Span(Span), Pos(..))
|
|||||||
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
|
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
|
||||||
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
|
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
|
||||||
newtype Source = Source { bytes :: B.ByteString }
|
newtype Source = Source { bytes :: B.ByteString }
|
||||||
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
|
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic, NFData)
|
||||||
|
|
||||||
fromUTF8 :: B.ByteString -> Source
|
fromUTF8 :: B.ByteString -> Source
|
||||||
fromUTF8 = Source
|
fromUTF8 = Source
|
||||||
|
@ -68,6 +68,7 @@ common dependencies
|
|||||||
, semilattices ^>= 0.0.0.3
|
, semilattices ^>= 0.0.0.3
|
||||||
, shelly >= 1.5 && <2
|
, shelly >= 1.5 && <2
|
||||||
, streaming ^>= 0.2.2.0
|
, streaming ^>= 0.2.2.0
|
||||||
|
, streaming-bytestring ^>= 0.1.6
|
||||||
, text ^>= 1.2.3.1
|
, text ^>= 1.2.3.1
|
||||||
, these >= 0.7 && <1
|
, these >= 0.7 && <1
|
||||||
, unix ^>= 2.7.2.2
|
, unix ^>= 2.7.2.2
|
||||||
@ -311,6 +312,7 @@ library
|
|||||||
, semantic-tags ^>= 0
|
, semantic-tags ^>= 0
|
||||||
, semigroupoids ^>= 5.3.2
|
, semigroupoids ^>= 5.3.2
|
||||||
, split ^>= 0.2.3.3
|
, split ^>= 0.2.3.3
|
||||||
|
, streaming-process ^>= 0.1
|
||||||
, stm-chans ^>= 3.0.0.4
|
, stm-chans ^>= 3.0.0.4
|
||||||
, template-haskell ^>= 2.14
|
, template-haskell ^>= 2.14
|
||||||
, time ^>= 1.8.0.2
|
, time ^>= 1.8.0.2
|
||||||
@ -410,15 +412,16 @@ test-suite parse-examples
|
|||||||
, foldl ^>= 1.4.5
|
, foldl ^>= 1.4.5
|
||||||
, resourcet ^>= 1.2
|
, resourcet ^>= 1.2
|
||||||
, streaming
|
, streaming
|
||||||
, streaming-bytestring ^>= 0.1.6
|
, streaming-bytestring
|
||||||
, tasty
|
, tasty
|
||||||
, tasty-hunit
|
, tasty-hunit
|
||||||
|
|
||||||
benchmark evaluation
|
benchmark evaluation
|
||||||
import: haskell, dependencies, executable-flags
|
import: haskell, dependencies, executable-flags
|
||||||
hs-source-dirs: bench/evaluation
|
hs-source-dirs: bench
|
||||||
type: exitcode-stdio-1.0
|
type: exitcode-stdio-1.0
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
|
other-modules: Evaluation
|
||||||
ghc-options: -static
|
ghc-options: -static
|
||||||
build-depends: base
|
build-depends: base
|
||||||
, algebraic-graphs
|
, algebraic-graphs
|
||||||
|
@ -58,7 +58,7 @@ readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybe
|
|||||||
, not (pathIsMinified path)
|
, not (pathIsMinified path)
|
||||||
, path `notElem` excludePaths
|
, path `notElem` excludePaths
|
||||||
, null includePaths || path `elem` includePaths
|
, null includePaths || path `elem` includePaths
|
||||||
= Just . sourceBlob' path lang oid . Source.fromText <$> Git.catFile gitDir oid
|
= Just . sourceBlob' path lang oid <$> Git.catFile gitDir oid
|
||||||
blobFromTreeEntry _ _ = pure Nothing
|
blobFromTreeEntry _ _ = pure Nothing
|
||||||
|
|
||||||
sourceBlob' filepath language (Git.OID oid) source = makeBlob source filepath language oid
|
sourceBlob' filepath language (Git.OID oid) source = makeBlob source filepath language oid
|
||||||
|
@ -15,23 +15,32 @@ module Semantic.Git
|
|||||||
, parseEntry
|
, parseEntry
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad.IO.Class
|
import Prologue
|
||||||
|
|
||||||
import Data.Attoparsec.Text (Parser)
|
import Data.Attoparsec.Text (Parser)
|
||||||
import Data.Attoparsec.Text as AP
|
import Data.Attoparsec.Text as AP
|
||||||
|
import qualified Data.ByteString.Streaming as ByteStream
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Either (fromRight)
|
import Data.Either (fromRight)
|
||||||
import Data.Text as Text
|
import Data.Text as Text
|
||||||
import Shelly hiding (FilePath)
|
import Shelly hiding (FilePath)
|
||||||
|
import qualified Streaming.Process
|
||||||
|
import qualified System.Process as Process (proc)
|
||||||
|
import qualified Source.Source as Source
|
||||||
|
|
||||||
-- | git clone --bare
|
-- | git clone --bare
|
||||||
clone :: Text -> FilePath -> IO ()
|
clone :: Text -> FilePath -> IO ()
|
||||||
clone url path = sh $ do
|
clone url path = sh $ do
|
||||||
run_ "git" ["clone", "--bare", url, pack path]
|
run_ "git" ["clone", "--bare", url, pack path]
|
||||||
|
|
||||||
-- | git cat-file -p
|
-- | Runs @git cat-file -p@. Throws 'ProcessExitedUnsuccessfully' if the
|
||||||
catFile :: FilePath -> OID -> IO Text
|
-- underlying git command returns a nonzero exit code. Loads the contents
|
||||||
catFile gitDir (OID oid) = sh $ do
|
-- of the file into memory all at once and strictly.
|
||||||
run "git" ["-C", pack gitDir, "cat-file", "-p", oid]
|
catFile :: FilePath -> OID -> IO Source.Source
|
||||||
|
catFile gitDir (OID oid) =
|
||||||
|
let process = Process.proc "git" ["-C", gitDir, "cat-file", "-p", Text.unpack oid]
|
||||||
|
consumeStdout stream = Streaming.Process.withProcessOutput stream ByteStream.toStrict_
|
||||||
|
in Source.fromUTF8 <$> Streaming.Process.withStreamProcess process consumeStdout
|
||||||
|
|
||||||
-- | git ls-tree -rz
|
-- | git ls-tree -rz
|
||||||
lsTree :: FilePath -> OID -> IO [TreeEntry]
|
lsTree :: FilePath -> OID -> IO [TreeEntry]
|
||||||
|
@ -15,6 +15,7 @@ import Data.Blob
|
|||||||
import Data.Handle
|
import Data.Handle
|
||||||
import qualified Semantic.Git as Git
|
import qualified Semantic.Git as Git
|
||||||
import Shelly (cd, run_, shelly, silently, touchfile, writefile)
|
import Shelly (cd, run_, shelly, silently, touchfile, writefile)
|
||||||
|
import qualified Source.Source as Source
|
||||||
import SpecHelpers
|
import SpecHelpers
|
||||||
import System.Path ((</>))
|
import System.Path ((</>))
|
||||||
import qualified System.Path as Path
|
import qualified System.Path as Path
|
||||||
@ -42,7 +43,7 @@ spec = do
|
|||||||
trees <- Git.lsTree (dir <> "/.git") (Git.OID "HEAD")
|
trees <- Git.lsTree (dir <> "/.git") (Git.OID "HEAD")
|
||||||
Just it <- pure $ find (\p -> "日本語" `isInfixOf` Git.treeEntryPath p) trees
|
Just it <- pure $ find (\p -> "日本語" `isInfixOf` Git.treeEntryPath p) trees
|
||||||
Git.catFile (dir <> "/.git") (Git.treeEntryOid it)
|
Git.catFile (dir <> "/.git") (Git.treeEntryOid it)
|
||||||
("日本語" `Text.isInfixOf` result) `shouldBe` True
|
Source.toText result `shouldSatisfy` ("日本語" `Text.isInfixOf`)
|
||||||
|
|
||||||
describe "lsTree" $ do
|
describe "lsTree" $ do
|
||||||
hasGit <- runIO $ isJust <$> findExecutable "git"
|
hasGit <- runIO $ isJust <$> findExecutable "git"
|
||||||
|
Loading…
Reference in New Issue
Block a user