1
1
mirror of https://github.com/github/semantic.git synced 2024-12-25 07:55:12 +03:00

Merge pull request #419 from github/precise-tag-benchmarks

Precise tag benchmarks showing improvement from haskell-tree-sitter optimizations
This commit is contained in:
Timothy Clem 2020-01-15 10:06:13 -08:00 committed by GitHub
commit fc88e441e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 201 additions and 64 deletions

View File

@ -11,14 +11,14 @@ on:
jobs:
build:
name: ghc ${{ matrix.ghc }}
runs-on: ubuntu-16.04
runs-on: ubuntu-latest
strategy:
matrix:
ghc: ["8.8.1"]
cabal: ["3.0"]
steps:
- uses: actions/checkout@master
- uses: actions/checkout@v2
if: github.event.action == 'opened' || github.event.action == 'synchronize' || github.event.ref == 'refs/heads/master'
- uses: actions/setup-haskell@v1
@ -27,12 +27,6 @@ jobs:
ghc-version: ${{ matrix.ghc }}
cabal-version: ${{ matrix.cabal }}
- uses: actions/cache@v1
name: Cache ~/.cabal/packages
with:
path: ~/.cabal/packages
key: ${{ runner.os }}-${{ matrix.ghc }}-v1-cabal-packages
- uses: actions/cache@v1
name: Cache ~/.cabal/store
with:
@ -43,13 +37,13 @@ jobs:
name: Cache dist-newstyle
with:
path: dist-newstyle
key: ${{ runner.os }}-${{ matrix.ghc }}-semantic-dist
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cabal }}-semantic-dist
- name: Install dependencies
run: |
cabal v2-update
cabal v2-configure --disable-optimization --enable-benchmarks --enable-tests --write-ghc-environment-files=always -j2
cabal v2-build --only-dependencies
cabal v2-build all --only-dependencies
- name: hlint
run: |

View File

@ -2,8 +2,10 @@ module Main (main) where
import Gauge
import qualified Evaluation
import qualified Tagging
main :: IO ()
main = defaultMain
[ Evaluation.benchmarks
[ Tagging.benchmarks
, Evaluation.benchmarks
]

97
bench/Tagging.hs Normal file
View File

@ -0,0 +1,97 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module Tagging (benchmarks) where
import Control.Carrier.Parse.Measured
import Control.Carrier.Reader
import Control.Exception (throwIO)
import Control.Monad
import Data.Blob
import Data.Foldable
import Data.Language (LanguageMode (..), PerLanguageModes (..))
import Gauge
import System.FilePath.Glob
import qualified System.Path as Path
import Data.Flag
import Proto.Semantic as P hiding (Blob, BlobPair)
import Semantic.Api.Symbols (parseSymbols)
import Semantic.Config as Config
import Semantic.Task
import Semantic.Task.Files
benchmarks :: Benchmark
benchmarks = bgroup "tagging"
[ pythonBenchmarks
, goBenchmarks
, rubyBenchmarks
]
pythonBenchmarks :: Benchmark
pythonBenchmarks = bgroup "python"
[ bench "precise" $ runTagging preciseLanguageModes pyDir "*.py"
, bench "a la carte" $ runTagging aLaCarteLanguageModes pyDir "*.py"
]
where pyDir = Path.relDir "tmp/python-examples/keras/keras"
goBenchmarks :: Benchmark
goBenchmarks = bgroup "go"
[ bench "precise" $ runTagging preciseLanguageModes dir "*.go"
, bench "a la carte" $ runTagging aLaCarteLanguageModes dir "*.go"
]
where dir = Path.relDir "tmp/go-examples/go/src/database/sql"
rubyBenchmarks :: Benchmark
rubyBenchmarks = bgroup "ruby"
[ bench "precise" $ runTagging preciseLanguageModes dir "*.rb"
, bench "a la carte" $ runTagging aLaCarteLanguageModes dir "*.rb"
]
where dir = Path.relDir "tmp/ruby-examples/ruby_spec/command_line"
runTagging :: PerLanguageModes -> Path.RelDir -> String -> Benchmarkable
runTagging mode dir glob = nfIO . withOptions testOptions $ \ config logger statter -> do
let session = TaskSession config "-" False logger statter
files <- globDir1 (compile glob) (Path.toString dir)
let paths = Path.relFile <$> files
for_ paths (runTask session . runParse . parseSymbolsFilePath mode >=> either throwIO pure)
parseSymbolsFilePath ::
( Has (Error SomeException) sig m
, Has Distribute sig m
, Has Parse sig m
, Has Files sig m
)
=> PerLanguageModes
-> Path.RelFile
-> m ParseTreeSymbolResponse
parseSymbolsFilePath languageModes path = readBlob (fileForTypedPath path) >>= runReader languageModes . parseSymbols . pure @[]
aLaCarteLanguageModes :: PerLanguageModes
aLaCarteLanguageModes = PerLanguageModes
{ pythonMode = ALaCarte
, rubyMode = ALaCarte
, goMode = ALaCarte
, typescriptMode = ALaCarte
, tsxMode = ALaCarte
, javascriptMode = ALaCarte
, jsxMode = ALaCarte
}
preciseLanguageModes :: PerLanguageModes
preciseLanguageModes = PerLanguageModes
{ pythonMode = Precise
, rubyMode = Precise
, goMode = Precise
, typescriptMode = Precise
, tsxMode = Precise
, javascriptMode = Precise
, jsxMode = Precise
}
testOptions :: Config.Options
testOptions = defaultOptions
{ optionsFailOnWarning = flag FailOnWarning True
, optionsLogLevel = Nothing
}

View File

@ -41,9 +41,9 @@ library
-- other-modules:
-- other-extensions:
build-depends: base ^>= 4.13
, tree-sitter ^>= 0.7.2
, tree-sitter ^>= 0.8
, semantic-source ^>= 0.0
, tree-sitter-python ^>= 0.8
, tree-sitter-python ^>= 0.8.1
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16
, pretty-simple ^>= 3.1.0.0

View File

@ -28,8 +28,8 @@ common haskell
, semantic-source ^>= 0.0
, semantic-tags ^>= 0.0
, text ^>= 1.2.3
, tree-sitter ^>= 0.7.2
, tree-sitter-go ^>= 0.4
, tree-sitter ^>= 0.8
, tree-sitter-go ^>= 0.4.1
ghc-options:
-Weverything

View File

@ -5,6 +5,7 @@ module Language.Go
) where
import Data.Proxy
import qualified Language.Go.Tags as GoTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Go (tree_sitter_go)
@ -13,8 +14,12 @@ import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Go.SourceFile a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy Go.SourceFile)
showFailure _ = TS.showFailure (Proxy :: Proxy Go.SourceFile)
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . GoTags.tags . getTerm

View File

@ -27,8 +27,8 @@ library
, fused-effects ^>= 1.0
, semantic-source ^>= 0.0
, semantic-tags ^>= 0.0
, tree-sitter ^>= 0.7.2
, tree-sitter-java ^>= 0.6
, tree-sitter ^>= 0.8
, tree-sitter-java ^>= 0.6.1
hs-source-dirs: src
default-language: Haskell2010
ghc-options:

View File

@ -4,6 +4,7 @@ module Language.Java
, TreeSitter.Java.tree_sitter_java
) where
import Data.Proxy
import qualified Language.Java.Tags as JavaTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Java (tree_sitter_java)
@ -12,8 +13,12 @@ import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Java.Program a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy Java.Program)
showFailure _ = TS.showFailure (Proxy :: Proxy Java.Program)
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . JavaTags.tags . getTerm

View File

@ -24,8 +24,8 @@ library
build-depends:
base >= 4.13 && < 5
, semantic-tags ^>= 0.0
, tree-sitter ^>= 0.7.2
, tree-sitter-json ^>= 0.5
, tree-sitter ^>= 0.8
, tree-sitter-json ^>= 0.6
hs-source-dirs: src
default-language: Haskell2010
ghc-options:

View File

@ -4,6 +4,7 @@ module Language.JSON
, TreeSitter.JSON.tree_sitter_json
) where
import Data.Proxy
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.JSON (tree_sitter_json)
import qualified TreeSitter.JSON.AST as JSON
@ -11,8 +12,12 @@ import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: JSON.Document a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy JSON.Document)
showFailure _ = TS.showFailure (Proxy :: Proxy JSON.Document)
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
-- | Tags arent really meaningful for JSON, but by implementing this we can avoid having to customize the set of parsers used for computing tags.
instance Tags.ToTags Term where

View File

@ -28,8 +28,8 @@ common haskell
, semantic-source ^>= 0.0
, semantic-tags ^>= 0.0
, text ^>= 1.2.3
, tree-sitter ^>= 0.7.2
, tree-sitter-python ^>= 0.8
, tree-sitter ^>= 0.8
, tree-sitter-python ^>= 0.8.1
ghc-options:
-Weverything

View File

@ -4,6 +4,7 @@ module Language.Python
, TreeSitter.Python.tree_sitter_python
) where
import Data.Proxy
import qualified Language.Python.Tags as PyTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python (tree_sitter_python)
@ -12,8 +13,12 @@ import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Py.Module a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy Py.Module)
showFailure _ = TS.showFailure (Proxy :: Proxy Py.Module)
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . PyTags.tags . getTerm

View File

@ -28,8 +28,8 @@ common haskell
, semantic-source ^>= 0.0
, semantic-tags ^>= 0.0
, text ^>= 1.2.3
, tree-sitter ^>= 0.7.2
, tree-sitter-ruby ^>= 0.4
, tree-sitter ^>= 0.8
, tree-sitter-ruby ^>= 0.4.1
ghc-options:
-Weverything

View File

@ -6,10 +6,10 @@ module Language.Ruby
, TreeSitter.Ruby.tree_sitter_ruby
) where
import Control.Carrier.State.Strict
import Data.Text (Text)
import qualified Language.Ruby.Tags as PyTags
import Control.Carrier.State.Strict
import Data.Proxy
import Data.Text (Text)
import qualified Language.Ruby.Tags as RbTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Ruby (tree_sitter_ruby)
import qualified TreeSitter.Ruby.AST as Rb
@ -17,8 +17,12 @@ import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Rb.Program a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy Rb.Program)
showFailure _ = TS.showFailure (Proxy :: Proxy Rb.Program)
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . evalState @[Text] [] . PyTags.tags . getTerm
tags src = Tags.runTagging src . evalState @[Text] [] . RbTags.tags . getTerm

View File

@ -28,8 +28,8 @@ common haskell
, semantic-source ^>= 0.0
, semantic-tags ^>= 0.0
, text ^>= 1.2.3
, tree-sitter ^>= 0.7.2
, tree-sitter-tsx ^>= 0.4
, tree-sitter ^>= 0.8
, tree-sitter-tsx ^>= 0.4.1
ghc-options:
-Weverything

View File

@ -5,7 +5,7 @@ module Language.TSX
, TreeSitter.TSX.tree_sitter_tsx
) where
import Data.Proxy
import qualified Language.TSX.Tags as TsxTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.TSX (tree_sitter_tsx)
@ -14,8 +14,12 @@ import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: TSX.Program a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy TSX.Program)
showFailure _ = TS.showFailure (Proxy :: Proxy TSX.Program)
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . TsxTags.tags . getTerm

View File

@ -28,8 +28,8 @@ common haskell
, semantic-source ^>= 0.0
, semantic-tags ^>= 0.0
, text ^>= 1.2.3
, tree-sitter ^>= 0.7.2
, tree-sitter-typescript ^>= 0.4
, tree-sitter ^>= 0.8
, tree-sitter-typescript ^>= 0.4.1
ghc-options:
-Weverything

View File

@ -5,7 +5,7 @@ module Language.TypeScript
, TreeSitter.TypeScript.tree_sitter_typescript
) where
import Data.Proxy
import qualified Language.TypeScript.Tags as TsTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.TypeScript (tree_sitter_typescript)
@ -14,8 +14,12 @@ import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: TypeScript.Program a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy TypeScript.Program)
showFailure _ = TS.showFailure (Proxy :: Proxy TypeScript.Program)
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . TsTags.tags . getTerm

View File

@ -59,7 +59,7 @@ common dependencies
, fused-effects-exceptions ^>= 1
, fused-effects-resumable ^>= 0.1
, hashable >= 1.2.7 && < 1.4
, tree-sitter ^>= 0.7.2
, tree-sitter ^>= 0.8
, mtl ^>= 2.2.2
, network ^>= 2.8.0.0
, pathtype ^>= 0.8.1
@ -302,12 +302,12 @@ library
, unliftio-core ^>= 0.1.2.0
, unordered-containers ^>= 0.2.9.0
, vector ^>= 0.12.0.2
, tree-sitter-go ^>= 0.4
, tree-sitter-go ^>= 0.4.1
, tree-sitter-php ^>= 0.2
, tree-sitter-python ^>= 0.8
, tree-sitter-ruby ^>= 0.4
, tree-sitter-typescript ^>= 0.4
, tree-sitter-tsx ^>= 0.4
, tree-sitter-python ^>= 0.8.1
, tree-sitter-ruby ^>= 0.4.1
, tree-sitter-typescript ^>= 0.4.1
, tree-sitter-tsx ^>= 0.4.1
executable semantic
import: haskell, dependencies, executable-flags
@ -357,7 +357,7 @@ test-suite test
, Generators
, Properties
build-depends: semantic
, tree-sitter-json ^>= 0.5
, tree-sitter-json ^>= 0.6
, Glob ^>= 0.10.0
, hedgehog ^>= 1
, hspec >= 2.6 && <3
@ -387,16 +387,19 @@ test-suite parse-examples
, tasty
, tasty-hunit
benchmark evaluation
benchmark benchmarks
import: haskell, dependencies, executable-flags
hs-source-dirs: bench
type: exitcode-stdio-1.0
main-is: Main.hs
other-modules: Evaluation
, Tagging
ghc-options: -static
build-depends: base
, algebraic-graphs
, gauge ^>= 0.2.5
, Glob
, lens >= 4.17 && < 4.19
, semantic
, semantic-source

View File

@ -9,7 +9,6 @@ module Parsing.TreeSitter
import Prologue
import Control.Carrier.Fail.Either
import Control.Carrier.Reader
import qualified Control.Exception as Exc
import Foreign
@ -58,8 +57,8 @@ parseToPreciseAST
-> m (Either TSParseException (t Loc))
parseToPreciseAST parseTimeout language blob = runParse parseTimeout language blob $ \ rootPtr ->
TS.withCursor (castPtr rootPtr) $ \ cursor ->
runFail (runReader cursor (runReader (Source.bytes (blobSource blob)) (TS.peekNode >>= TS.unmarshalNode)))
>>= either (Exc.throw . UnmarshalFailure) pure
runReader (TS.UnmarshalState (Source.bytes (blobSource blob)) cursor) (liftIO (peek rootPtr) >>= TS.unmarshalNode)
`Exc.catch` (Exc.throw . UnmarshalFailure . TS.getUnmarshalError)
instance Exception TSParseException where
displayException = \case
@ -102,8 +101,8 @@ anaM g = a where a = pure . embed <=< traverse a <=< g
nodeRange :: TS.Node -> Range
nodeRange TS.Node{..} = Range (fromIntegral nodeStartByte) (fromIntegral nodeEndByte)
nodeRange node = Range (fromIntegral (TS.nodeStartByte node)) (fromIntegral (TS.nodeEndByte node))
nodeSpan :: TS.Node -> Span
nodeSpan TS.Node{..} = nodeStartPoint `seq` nodeEndPoint `seq` Span (pointPos nodeStartPoint) (pointPos nodeEndPoint)
nodeSpan node = TS.nodeStartPoint node `seq` TS.nodeEndPoint node `seq` Span (pointPos (TS.nodeStartPoint node)) (pointPos (TS.nodeEndPoint node))
where pointPos TS.TSPoint{..} = pointRow `seq` pointColumn `seq` Pos (1 + fromIntegral pointRow) (1 + fromIntegral pointColumn)

View File

@ -7,22 +7,26 @@
(Number "1"))
(String
[
(StringContent "a") ]))
(StringContent
[]) ]))
(Pair
(Value
(Number "2"))
(String
[
(StringContent "b") ]))
(StringContent
[]) ]))
(Pair
(Value
(Number "3"))
(String
[
(StringContent "c") ]))
(StringContent
[]) ]))
(Pair
(Value
(Number "4"))
(String
[
(StringContent "d") ])) ])))
(StringContent
[]) ])) ])))

View File

@ -7,22 +7,26 @@
(Number "2"))
(String
[
(StringContent "b") ]))
(StringContent
[]) ]))
(Pair
(Value
(Number "3"))
(String
[
(StringContent "c'") ]))
(StringContent
[]) ]))
(Pair
(Value
(Number "5"))
(String
[
(StringContent "d") ]))
(StringContent
[]) ]))
(Pair
(Value
(Number "5"))
(String
[
(StringContent "e") ])) ])))
(StringContent
[]) ])) ])))

View File

@ -5,4 +5,5 @@
(Value
(String
[
(StringContent "hello") ])) ])))
(StringContent
[]) ])) ])))

View File

@ -5,4 +5,5 @@
(Value
(String
[
(StringContent "world") ])) ])))
(StringContent
[]) ])) ])))