1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 00:42:33 +03:00

Merge pull request #262 from github/semantic-tags

Tag computation for precise ASTs
This commit is contained in:
Rob Rix 2019-09-30 14:20:34 -04:00 committed by GitHub
commit 5236c3c0e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 622 additions and 223 deletions

1
.gitignore vendored
View File

@ -12,6 +12,7 @@ cabal.project.local*
dist
dist-newstyle
.ghc.environment.*
.ghci_history
tmp/
/bin/

View File

@ -1,4 +1,4 @@
packages: . semantic-core semantic-python semantic-source
packages: . semantic-core semantic-python semantic-source semantic-tags
jobs: $ncpus

View File

@ -23,9 +23,10 @@ common haskell
build-depends: base ^>=4.12
, fused-effects ^>= 0.5
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0
, text ^>= 1.2.3
, tree-sitter == 0.3.0.0
, tree-sitter-python == 0.4.0.0
, tree-sitter ^>= 0.4
, tree-sitter-python ^>= 0.5
ghc-options:
-Weverything

View File

@ -14,7 +14,6 @@ import Prelude hiding (fail)
import Control.Effect hiding ((:+:))
import Control.Effect.Reader
import Control.Monad.Fail
import Data.Bifunctor
import Data.Coerce
import Data.Core as Core
import Data.Foldable
@ -27,9 +26,9 @@ import Data.String (IsString)
import Data.Text (Text)
import GHC.Generics
import GHC.Records
import Source.Span (Span)
import qualified Source.Span as Source
import qualified TreeSitter.Python.AST as Py
import TreeSitter.Span (Span)
import qualified TreeSitter.Span as TreeSitter
-- | Access to the current filename as Text to stick into location annotations.
newtype SourcePath = SourcePath { rawPath :: Text }
@ -97,8 +96,8 @@ compile :: ( Compile py
=> py -> m (t Name)
compile t = compileCC t (pure none)
locFromTSSpan :: SourcePath -> TreeSitter.Span -> Loc
locFromTSSpan fp (TreeSitter.Span (TreeSitter.Pos a b) (TreeSitter.Pos c d))
locFromTSSpan :: SourcePath -> Source.Span -> Loc
locFromTSSpan fp (Source.Span (Source.Pos a b) (Source.Pos c d))
= Data.Loc.Loc (rawPath fp) (Data.Loc.Span (Data.Loc.Pos a b) (Data.Loc.Pos c d))
locate :: ( HasField "ann" syntax Span
@ -118,7 +117,7 @@ newtype CompileSum py = CompileSum py
instance (Generic py, GCompileSum (Rep py)) => Compile (CompileSum py) where
compileCC (CompileSum a) cc = gcompileCCSum (from a) cc
deriving via CompileSum (Either l r) instance (Compile l, Compile r) => Compile (Either l r)
deriving via CompileSum ((l :+: r) Span) instance (Compile (l Span), Compile (r Span)) => Compile ((l :+: r) Span)
instance Compile (Py.AssertStatement Span)
instance Compile (Py.Attribute Span)
@ -139,8 +138,8 @@ instance Compile (Py.Attribute Span)
-- RHS represents the right-hand-side of an assignment that we get out of tree-sitter.
-- Desugared is the "terminal" node in a sequence of assignments, i.e. given a = b = c,
-- c will be the terminal node. It is never an assignment.
type RHS a = Either (Py.Assignment a) (Either (Py.AugmentedAssignment a) (Desugared a))
type Desugared a = Either (Py.ExpressionList a) (Py.Yield a)
type RHS = Py.Assignment :+: Py.AugmentedAssignment :+: Desugared
type Desugared = Py.ExpressionList :+: Py.Yield
-- We have to pair locations and names, and tuple syntax is harder to
-- read in this case than a happy little constructor.
@ -154,11 +153,11 @@ desugar :: (Member (Reader SourcePath) sig, Carrier sig m, MonadFail m)
-> RHS Span
-> m ([Located Name], Desugared Span)
desugar acc = \case
Left Py.Assignment { left = SingleIdentifier name, right = Just rhs, ann} -> do
L1 Py.Assignment { left = SingleIdentifier name, right = Just rhs, ann} -> do
loc <- locFromTSSpan <$> ask <*> pure ann
let cons = (Located loc name :)
desugar (cons acc) rhs
Right (Right any) -> pure (acc, any)
R1 (R1 any) -> pure (acc, any)
other -> fail ("desugar: couldn't desugar RHS " <> show other)
-- This is an algebra that is invoked from a left fold but that
@ -267,8 +266,8 @@ instance Compile (Py.Identifier Span) where
instance Compile (Py.IfStatement Span) where
compileCC it@Py.IfStatement{ condition, consequence, alternative} cc =
locate it =<< (if' <$> compile condition <*> compileCC consequence cc <*> foldr clause cc alternative)
where clause (Right Py.ElseClause{ body }) _ = compileCC body cc
clause (Left Py.ElifClause{ condition, consequence }) rest =
where clause (R1 Py.ElseClause{ body }) _ = compileCC body cc
clause (L1 Py.ElifClause{ condition, consequence }) rest =
if' <$> compile condition <*> compileCC consequence cc <*> rest

View File

@ -29,12 +29,12 @@ import Data.String (fromString)
import GHC.Stack
import qualified Language.Python.Core as Py
import Prelude hiding (fail)
import qualified Source.Span as Source (Span)
import Streaming
import qualified Streaming.Prelude as Stream
import qualified Streaming.Process
import System.Directory
import System.Exit
import qualified TreeSitter.Span as TS (Span)
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Python.AST as TSP
import qualified TreeSitter.Unmarshal as TS
@ -100,7 +100,7 @@ fixtureTestTreeForFile fp = HUnit.testCaseSteps (Path.toString fp) $ \step -> wi
. runFail
. runReader (fromString @Py.SourcePath . Path.toString $ fp)
. runReader @Py.Bindings mempty
. Py.compile @(TSP.Module TS.Span) @_ @(Term (Ann :+: Core))
. Py.compile @(TSP.Module Source.Span) @_ @(Term (Ann :+: Core))
<$> result
for_ directives $ \directive -> do

21
semantic-tags/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

18
semantic-tags/README.md Normal file
View File

@ -0,0 +1,18 @@
# semantic-tags
Tags computation over ASTs.
## Development
This project consists of a Haskell package named `semantic-tags`. The librarys sources are in [`src`][].
Development of `semantic-tags` is typically done using `cabal v2-build`:
```shell
cabal v2-build # build the library
cabal v2-repl # load the package into ghci
cabal v2-test # build and run the doctests
```
[`src`]: https://github.com/github/semantic/tree/master/semantic-tags/src

2
semantic-tags/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,59 @@
cabal-version: 2.4
name: semantic-tags
version: 0.0.0.0
synopsis: Tags computation
description: Tags computation for ASTs derived from tree-sitter grammars.
homepage: https://github.com/github/semantic/tree/master/semantic-core#readme
bug-reports: https://github.com/github/semantic/issues
license: MIT
license-file: LICENSE
author: The Semantic authors
maintainer: opensource+semantic@github.com
copyright: (c) 2019 GitHub, Inc.
category: Language
build-type: Simple
stability: alpha
extra-source-files: README.md
tested-with: GHC == 8.6.5
library
exposed-modules:
Language.Python
Language.Python.Tags
Tags.Tag
Tags.Tagging.Precise
-- other-modules:
-- other-extensions:
build-depends:
base >= 4.12 && < 5
, bytestring ^>= 0.10.8.2
, fused-effects ^>= 0.5
, semantic-source ^>= 0.0
, text ^>= 1.2.3.1
, tree-sitter ^>= 0.4
, tree-sitter-python ^>= 0.5
hs-source-dirs: src
default-language: Haskell2010
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
test-suite doctest
type: exitcode-stdio-1.0
main-is: Doctest.hs
build-depends: base
, doctest >=0.7 && <1.0
, semantic-tags
hs-source-dirs: test
default-language: Haskell2010

View File

@ -0,0 +1,17 @@
-- | Semantic functionality for Python programs.
module Language.Python
( Term(..)
) where
import qualified Language.Python.Tags as PyTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python.AST as Py
import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Py.Module a }
instance TS.Unmarshal Term where
unmarshalNode node = Term <$> TS.unmarshalNode node
instance Tags.ToTags Term where
tags src = Tags.runTagging src . PyTags.tags . getTerm

View File

@ -0,0 +1,109 @@
{-# LANGUAGE AllowAmbiguousTypes, DataKinds, DisambiguateRecordFields, FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, ScopedTypeVariables, TypeApplications, TypeFamilies, TypeOperators, UndecidableInstances #-}
module Language.Python.Tags
( ToTags(..)
) where
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable (traverse_)
import Data.Maybe (listToMaybe)
import Data.Monoid (Ap(..))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Text as Text
import GHC.Generics
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python.AST as Py
class ToTags t where
tags
:: ( Carrier sig m
, Member (Reader Source) sig
, Member (Writer Tags.Tags) sig
)
=> t Loc
-> m ()
instance (ToTagsBy strategy t, strategy ~ ToTagsInstance t) => ToTags t where
tags = tags' @strategy
class ToTagsBy (strategy :: Strategy) t where
tags'
:: ( Carrier sig m
, Member (Reader Source) sig
, Member (Writer Tags.Tags) sig
)
=> t Loc
-> m ()
data Strategy = Generic | Custom
type family ToTagsInstance t :: Strategy where
ToTagsInstance (_ :+: _) = 'Custom
ToTagsInstance Py.FunctionDefinition = 'Custom
ToTagsInstance Py.ClassDefinition = 'Custom
ToTagsInstance Py.Call = 'Custom
ToTagsInstance _ = 'Generic
instance (ToTags l, ToTags r) => ToTagsBy 'Custom (l :+: r) where
tags' (L1 l) = tags l
tags' (R1 r) = tags r
instance ToTagsBy 'Custom Py.FunctionDefinition where
tags' Py.FunctionDefinition
{ ann = Loc Range { start } span
, name = Py.Identifier { bytes = name }
, parameters
, returnType
, body = Py.Block { ann = Loc Range { start = end } _, extraChildren }
} = do
src <- ask @Source
let docs = listToMaybe extraChildren >>= docComment src
sliced = slice src (Range start end)
Tags.yield (Tag name Function span (firstLine sliced) docs)
tags parameters
traverse_ tags returnType
traverse_ tags extraChildren
instance ToTagsBy 'Custom Py.ClassDefinition where
tags' Py.ClassDefinition
{ ann = Loc Range { start } span
, name = Py.Identifier { bytes = name }
, superclasses
, body = Py.Block { ann = Loc Range { start = end } _, extraChildren }
} = do
src <- ask @Source
let docs = listToMaybe extraChildren >>= docComment src
sliced = slice src (Range start end)
Tags.yield (Tag name Class span (firstLine sliced) docs)
traverse_ tags superclasses
traverse_ tags extraChildren
instance ToTagsBy 'Custom Py.Call where
tags' Py.Call
{ ann = Loc range span
, function = Py.IdentifierPrimaryExpression Py.Identifier { bytes = name }
, arguments
} = do
src <- ask @Source
let sliced = slice src range
Tags.yield (Tag name Call span (firstLine sliced) Nothing)
tags arguments
tags' Py.Call { function, arguments } = tags function >> tags arguments
docComment :: Source -> (Py.CompoundStatement :+: Py.SimpleStatement) Loc -> Maybe Text
docComment src (R1 (Py.ExpressionStatementSimpleStatement (Py.ExpressionStatement { extraChildren = L1 (Py.PrimaryExpressionExpression (Py.StringPrimaryExpression Py.String { ann })) :|_ }))) = Just (toText (slice src (byteRange ann)))
docComment _ _ = Nothing
firstLine :: Source -> Text
firstLine = Text.takeWhile (/= '\n') . toText . Source.take 180
instance (Generic1 t, Tags.GFoldable1 ToTags (Rep1 t)) => ToTagsBy 'Generic t where
tags' = getAp . Tags.gfoldMap1 @ToTags (Ap . tags) . from1

View File

@ -0,0 +1,24 @@
module Tags.Tag
( Tag(..)
, Kind(..)
) where
import Data.Text (Text)
import Source.Span
data Tag = Tag
{ name :: Text
, kind :: Kind
, span :: Span
, line :: Text
, docs :: Maybe Text
}
deriving (Eq, Show)
data Kind
= Function
| Method
| Class
| Module
| Call
deriving (Bounded, Enum, Eq, Show)

View File

@ -0,0 +1,73 @@
{-# LANGUAGE AllowAmbiguousTypes, ConstraintKinds, FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, RankNTypes, ScopedTypeVariables, TypeApplications, TypeOperators #-}
module Tags.Tagging.Precise
( Tags
, ToTags(..)
, yield
, runTagging
, GFoldable1(..)
) where
import Control.Effect.Pure
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Monoid (Endo(..))
import GHC.Generics
import Prelude hiding (span)
import Source.Loc (Loc)
import Source.Span
import Source.Source
import Tags.Tag
type Tags = Endo [Tag]
class ToTags t where
tags :: Source -> t Loc -> [Tag]
yield :: (Carrier sig m, Member (Writer Tags) sig) => Tag -> m ()
yield = tell . Endo . (:) . modSpan toOneIndexed where
modSpan f t@Tag{ span = s } = t { span = f s }
toOneIndexed (Span (Pos l1 c1) (Pos l2 c2)) = Span (Pos (l1 + 1) (c1 + 1)) (Pos (l2 + 1) (c2 + 1))
runTagging :: Source -> ReaderC Source (WriterC Tags PureC) () -> [Tag]
runTagging source
= ($ [])
. appEndo
. run
. execWriter
. runReader source
-- FIXME: move GFoldable1 into semantic-ast.
class GFoldable1 c t where
-- | Generically map functions over fields of kind @* -> *@, monoidally combining the results.
gfoldMap1
:: Monoid b
=> (forall f . c f => f a -> b)
-> t a
-> b
instance GFoldable1 c f => GFoldable1 c (M1 i c' f) where
gfoldMap1 alg = gfoldMap1 @c alg . unM1
instance (GFoldable1 c f, GFoldable1 c g) => GFoldable1 c (f :*: g) where
gfoldMap1 alg (f :*: g) = gfoldMap1 @c alg f <> gfoldMap1 @c alg g
instance (GFoldable1 c f, GFoldable1 c g) => GFoldable1 c (f :+: g) where
gfoldMap1 alg (L1 l) = gfoldMap1 @c alg l
gfoldMap1 alg (R1 r) = gfoldMap1 @c alg r
instance GFoldable1 c (K1 R t) where
gfoldMap1 _ _ = mempty
instance GFoldable1 c Par1 where
gfoldMap1 _ _ = mempty
instance c t => GFoldable1 c (Rec1 t) where
gfoldMap1 alg (Rec1 t) = alg t
instance (Foldable f, GFoldable1 c g) => GFoldable1 c (f :.: g) where
gfoldMap1 alg = foldMap (gfoldMap1 @c alg) . unComp1
instance GFoldable1 c U1 where
gfoldMap1 _ _ = mempty

View File

@ -0,0 +1,12 @@
module Main
( main
) where
import System.Environment
import Test.DocTest
main :: IO ()
main = do
args <- getArgs
autogen <- fmap (<> "/build/doctest/autogen") <$> lookupEnv "HASKELL_DIST_DIR"
doctest (maybe id ((:) . ("-i" <>)) autogen ("-isemantic-tags/src" : "--fast" : if null args then ["semantic-tags/src"] else args))

View File

@ -56,7 +56,7 @@ common dependencies
, fused-effects ^>= 0.5.0.0
, fused-effects-exceptions ^>= 0.2.0.0
, hashable ^>= 1.2.7.0
, tree-sitter == 0.3.0.0
, tree-sitter ^>= 0.4
, mtl ^>= 2.2.2
, network ^>= 2.8.0.0
, pathtype ^>= 0.8.1
@ -179,7 +179,6 @@ library
, Data.Syntax.Literal
, Data.Syntax.Statement
, Data.Syntax.Type
, Data.Tag
, Data.Term
-- Diffing algorithms & interpretation thereof
, Diffing.Algorithm
@ -302,6 +301,7 @@ library
, pretty-show ^>= 1.9.5
, profunctors ^>= 5.3
, reducers ^>= 3.12.3
, semantic-tags ^>= 0
, semigroupoids ^>= 5.3.2
, split ^>= 0.2.3.3
, stm-chans ^>= 3.0.0.4
@ -310,15 +310,15 @@ library
, unliftio-core ^>= 0.1.2.0
, unordered-containers ^>= 0.2.9.0
, vector ^>= 0.12.0.2
, tree-sitter-go == 0.2.0.0
, tree-sitter-haskell == 0.2.0.0
, tree-sitter-json == 0.2.0.0
, tree-sitter-php == 0.2.0.0
, tree-sitter-python == 0.4.0.0
, tree-sitter-ruby == 0.2.0.0
, tree-sitter-typescript == 0.2.1.0
, tree-sitter-tsx == 0.2.1.0
, tree-sitter-java == 0.2.0.0
, tree-sitter-go ^>= 0.2
, tree-sitter-haskell ^>= 0.2
, tree-sitter-json ^>= 0.2
, tree-sitter-php ^>= 0.2
, tree-sitter-python ^>= 0.5
, tree-sitter-ruby ^>= 0.2
, tree-sitter-typescript ^>= 0.2.1
, tree-sitter-tsx ^>= 0.2.1
, tree-sitter-java ^>= 0.2
if flag(release)
cpp-options: -DCOMPUTE_GIT_SHA

View File

@ -1,4 +1,4 @@
{-# LANGUAGE GADTs, GeneralizedNewtypeDeriving, TypeOperators #-}
{-# LANGUAGE GeneralizedNewtypeDeriving, TypeOperators #-}
module Analysis.Abstract.Caching.FlowInsensitive
( cachingTerms
, convergingModules

View File

@ -10,6 +10,9 @@ module Data.Language
, codeNavLanguages
, textToLanguage
, languageToText
, PerLanguageModes(..)
, LanguageMode(..)
, modeForLanguage
) where
import Data.Aeson
@ -137,3 +140,19 @@ textToLanguage = \case
"TSX" -> TSX
"PHP" -> PHP
_ -> Unknown
newtype PerLanguageModes = PerLanguageModes
{ pythonMode :: LanguageMode
}
deriving (Eq, Ord, Show)
data LanguageMode
= ALaCarte
| Precise
deriving (Bounded, Enum, Eq, Ord, Read, Show)
modeForLanguage :: PerLanguageModes -> Language -> LanguageMode
modeForLanguage modes = \case
Python -> pythonMode modes
_ -> ALaCarte

View File

@ -1,28 +0,0 @@
{-# LANGUAGE DeriveAnyClass #-}
module Data.Tag
( Tag (..)
) where
import Prelude hiding (span)
import Prologue
import Data.Aeson
import Control.Lens.Lens
import Source.Span
-- | These selectors aren't prefixed with @tag@ for reasons of JSON
-- backwards compatibility.
data Tag = Tag
{ name :: Text
, kind :: Text
, span :: Span
, context :: [Text]
, line :: Maybe Text
, docs :: Maybe Text
} deriving (Eq, Show, Generic, ToJSON)
instance HasSpan Tag where
span_ = lens span (\t s -> t { span = s })
{-# INLINE span_ #-}

View File

@ -8,7 +8,6 @@ module Parsing.Parser
, someASTParser
, someAnalysisParser
, ApplyAll
, ApplyAll'
-- À la carte parsers
, goParser
, goASTParser
@ -24,6 +23,8 @@ module Parsing.Parser
, phpParser
, phpASTParser
, haskellParser
-- Precise parsers
, precisePythonParser
) where
import Assigning.Assignment
@ -43,10 +44,12 @@ import qualified Language.Haskell.Assignment as Haskell
import qualified Language.JSON.Assignment as JSON
import qualified Language.Markdown.Assignment as Markdown
import qualified Language.PHP.Assignment as PHP
import qualified Language.Python as Py
import qualified Language.Python.Assignment as Python
import qualified Language.Ruby.Assignment as Ruby
import qualified Language.TSX.Assignment as TSX
import qualified Language.TypeScript.Assignment as TypeScript
import Prelude hiding (fail)
import Prologue
import TreeSitter.Go
import TreeSitter.Haskell
@ -54,20 +57,16 @@ import TreeSitter.JSON
import qualified TreeSitter.Language as TS (Language, Symbol)
import TreeSitter.PHP
import TreeSitter.Python
import TreeSitter.Ruby
import TreeSitter.Ruby (tree_sitter_ruby)
import TreeSitter.TSX
import TreeSitter.TypeScript
import TreeSitter.Unmarshal
type family ApplyAll' (typeclasses :: [(* -> *) -> Constraint]) (fs :: [* -> *]) :: Constraint where
ApplyAll' (typeclass ': typeclasses) fs = (Apply typeclass fs, ApplyAll' typeclasses fs)
ApplyAll' '[] fs = ()
-- | A parser, suitable for program analysis, for some specific language, producing 'Term's whose syntax satisfies a list of typeclass constraints.
data SomeAnalysisParser typeclasses ann where
SomeAnalysisParser :: ( ApplyAll' typeclasses fs
SomeAnalysisParser :: ( ApplyAll typeclasses (Sum fs)
, Apply (VertexDeclaration' (Sum fs)) fs
, Element Syntax.Identifier fs
, HasPrelude lang
)
=> Parser (Term (Sum fs) ann)
@ -75,24 +74,24 @@ data SomeAnalysisParser typeclasses ann where
-> SomeAnalysisParser typeclasses ann
-- | A parser for some specific language, producing 'Term's whose syntax satisfies a list of typeclass constraints.
someAnalysisParser :: ( ApplyAll' typeclasses Go.Syntax
, ApplyAll' typeclasses PHP.Syntax
, ApplyAll' typeclasses Python.Syntax
, ApplyAll' typeclasses Ruby.Syntax
, ApplyAll' typeclasses TypeScript.Syntax
, ApplyAll' typeclasses Haskell.Syntax
someAnalysisParser :: ( ApplyAll typeclasses (Sum Go.Syntax)
, ApplyAll typeclasses (Sum PHP.Syntax)
, ApplyAll typeclasses (Sum Python.Syntax)
, ApplyAll typeclasses (Sum Ruby.Syntax)
, ApplyAll typeclasses (Sum TypeScript.Syntax)
, ApplyAll typeclasses (Sum Haskell.Syntax)
)
=> proxy typeclasses -- ^ A proxy for the list of typeclasses required, e.g. @(Proxy :: Proxy '[Show1])@.
-> Language -- ^ The 'Language' to select.
-> SomeAnalysisParser typeclasses Loc -- ^ A 'SomeAnalysisParser' abstracting the syntax type to be produced.
someAnalysisParser _ Go = SomeAnalysisParser goParser (Proxy :: Proxy 'Go)
someAnalysisParser _ Haskell = SomeAnalysisParser haskellParser (Proxy :: Proxy 'Haskell)
someAnalysisParser _ JavaScript = SomeAnalysisParser typescriptParser (Proxy :: Proxy 'JavaScript)
someAnalysisParser _ PHP = SomeAnalysisParser phpParser (Proxy :: Proxy 'PHP)
someAnalysisParser _ Python = SomeAnalysisParser pythonParser (Proxy :: Proxy 'Python)
someAnalysisParser _ Ruby = SomeAnalysisParser rubyParser (Proxy :: Proxy 'Ruby)
someAnalysisParser _ TypeScript = SomeAnalysisParser typescriptParser (Proxy :: Proxy 'TypeScript)
someAnalysisParser _ TSX = SomeAnalysisParser typescriptParser (Proxy :: Proxy 'TSX)
someAnalysisParser _ Go = SomeAnalysisParser goParser (Proxy @'Go)
someAnalysisParser _ Haskell = SomeAnalysisParser haskellParser (Proxy @'Haskell)
someAnalysisParser _ JavaScript = SomeAnalysisParser typescriptParser (Proxy @'JavaScript)
someAnalysisParser _ PHP = SomeAnalysisParser phpParser (Proxy @'PHP)
someAnalysisParser _ Python = SomeAnalysisParser pythonParser (Proxy @'Python)
someAnalysisParser _ Ruby = SomeAnalysisParser rubyParser (Proxy @'Ruby)
someAnalysisParser _ TypeScript = SomeAnalysisParser typescriptParser (Proxy @'TypeScript)
someAnalysisParser _ TSX = SomeAnalysisParser typescriptParser (Proxy @'TSX)
someAnalysisParser _ l = error $ "Analysis not supported for: " <> show l
@ -100,6 +99,8 @@ someAnalysisParser _ l = error $ "Analysis not supported for: " <> show
data Parser term where
-- | A parser producing 'AST' using a 'TS.Language'.
ASTParser :: (Bounded grammar, Enum grammar, Show grammar) => Ptr TS.Language -> Parser (AST [] grammar)
-- | A parser 'Unmarshal'ing to a precise AST type using a 'TS.Language'.
UnmarshalParser :: Unmarshal t => Ptr TS.Language -> Parser (t Loc)
-- | A parser producing an à la carte term given an 'AST'-producing parser and an 'Assignment' onto 'Term's in some syntax type.
AssignmentParser :: (Enum grammar, Ix grammar, Show grammar, TS.Symbol grammar, Syntax.Error :< fs, Eq1 ast, Apply Foldable fs, Apply Functor fs, Foldable ast, Functor ast)
=> Parser (Term ast (Node grammar)) -- ^ A parser producing AST.
@ -163,6 +164,10 @@ markdownParser :: Parser Markdown.Term
markdownParser = AssignmentParser MarkdownParser Markdown.assignment
precisePythonParser :: Parser (Py.Term Loc)
precisePythonParser = UnmarshalParser tree_sitter_python
data SomeTerm typeclasses ann where
SomeTerm :: ApplyAll typeclasses syntax => Term syntax ann -> SomeTerm typeclasses ann

View File

@ -2,14 +2,15 @@
module Parsing.TreeSitter
( Duration(..)
, parseToAST
, parseToPreciseAST
) where
import Prologue hiding (bracket)
import Prologue
import Control.Effect.Resource
import Control.Effect.Fail
import Control.Effect.Lift
import Control.Effect.Reader
import Control.Effect.Trace
import qualified Control.Exception as Exc (bracket)
import Data.ByteString.Unsafe (unsafeUseAsCStringLen)
import Foreign
import Foreign.C.Types (CBool (..))
import Foreign.Marshal.Array (allocaArray)
@ -19,45 +20,21 @@ import Data.Blob
import Data.Duration
import Data.Term
import Source.Loc
import Source.Source (Source)
import qualified Source.Source as Source
import Source.Span
import qualified TreeSitter.Cursor as TS
import qualified TreeSitter.Language as TS
import qualified TreeSitter.Node as TS
import qualified TreeSitter.Parser as TS
import qualified TreeSitter.Tree as TS
import qualified TreeSitter.Unmarshal as TS
data Result grammar
= Failed
| Succeeded (AST [] grammar)
runParser :: (Enum grammar, Bounded grammar) => Ptr TS.Parser -> Source -> IO (Result grammar)
runParser parser blobSource = unsafeUseAsCStringLen (Source.bytes blobSource) $ \ (source, len) -> do
alloca (\ rootPtr -> do
let acquire = do
-- Change this to TS.ts_parser_loop_until_cancelled if you want to test out cancellation
TS.ts_parser_parse_string parser nullPtr source len
let release t
| t == nullPtr = pure ()
| otherwise = TS.ts_tree_delete t
let go treePtr = do
if treePtr == nullPtr
then pure Failed
else do
TS.ts_tree_root_node_p treePtr rootPtr
ptr <- peek rootPtr
Succeeded <$> anaM toAST ptr
Exc.bracket acquire release go)
-- | Parse 'Source' with the given 'TS.Language' and return its AST.
-- Returns Nothing if the operation timed out.
-- | Parse a 'Blob' with the given 'TS.Language' and return its AST.
-- Returns 'Nothing' if the operation timed out.
parseToAST :: ( Bounded grammar
, Carrier sig m
, Enum grammar
, Member Resource sig
, Member Trace sig
, MonadIO m
)
@ -65,19 +42,49 @@ parseToAST :: ( Bounded grammar
-> Ptr TS.Language
-> Blob
-> m (Maybe (AST [] grammar))
parseToAST parseTimeout language b@Blob{..} = bracket (liftIO TS.ts_parser_new) (liftIO . TS.ts_parser_delete) $ \ parser -> do
compatible <- liftIO $ do
parseToAST parseTimeout language blob = runParse parseTimeout language blob (fmap Right . anaM toAST <=< peek)
parseToPreciseAST
:: ( Carrier sig m
, Member Trace sig
, MonadIO m
, TS.Unmarshal t
)
=> Duration
-> Ptr TS.Language
-> Blob
-> m (Maybe (t Loc))
parseToPreciseAST parseTimeout language blob = runParse parseTimeout language blob $ \ rootPtr ->
TS.withCursor (castPtr rootPtr) $ \ cursor ->
runM (runFail (runReader cursor (runReader (Source.bytes (blobSource blob)) (TS.peekNode >>= TS.unmarshalNode))))
runParse
:: ( Carrier sig m
, Member Trace sig
, MonadIO m
)
=> Duration
-> Ptr TS.Language
-> Blob
-> (Ptr TS.Node -> IO (Either String a))
-> m (Maybe a)
runParse parseTimeout language b@Blob{..} action = do
result <- liftIO . TS.withParser language $ \ parser -> do
let timeoutMicros = fromIntegral $ toMicroseconds parseTimeout
TS.ts_parser_set_timeout_micros parser timeoutMicros
TS.ts_parser_halt_on_error parser (CBool 1)
TS.ts_parser_set_language parser language
result <- if compatible then
liftIO $ runParser parser blobSource
compatible <- TS.ts_parser_set_language parser language
if compatible then
TS.withParseTree parser (Source.bytes blobSource) $ \ treePtr -> do
if treePtr == nullPtr then
pure (Left "tree-sitter: null root node")
else
Failed <$ trace "tree-sitter: incompatible versions"
TS.withRootNode treePtr action
else
pure (Left "tree-sitter: incompatible versions")
case result of
Failed -> Nothing <$ trace ("tree-sitter: parsing failed " <> blobPath b)
(Succeeded ast) -> Just ast <$ trace ("tree-sitter: parsing succeeded " <> blobPath b)
Left err -> Nothing <$ trace err <* trace ("tree-sitter: parsing failed " <> blobPath b)
Right ast -> Just ast <$ trace ("tree-sitter: parsing succeeded " <> blobPath b)
toAST :: forall grammar . (Bounded grammar, Enum grammar) => TS.Node -> IO (Base (AST [] grammar) TS.Node)
toAST node@TS.Node{..} = do

View File

@ -6,15 +6,17 @@ module Semantic.Api.Symbols
) where
import Control.Effect.Error
import Control.Effect.Reader
import Control.Exception
import Control.Lens
import Data.Blob hiding (File (..))
import Data.ByteString.Builder
import Data.Maybe
import Data.Language
import Data.Term
import qualified Data.Text as T
import qualified Data.Vector as V
import Data.Text (pack)
import qualified Language.Python as Py
import Parsing.Parser
import Prologue
import Semantic.Api.Bridge
@ -26,11 +28,12 @@ import Serializing.Format
import Source.Loc
import Tags.Taggable
import Tags.Tagging
import qualified Tags.Tagging.Precise as Precise
legacyParseSymbols :: (Member Distribute sig, ParseEffects sig m, Traversable t) => t Blob -> m Legacy.ParseTreeSymbolResponse
legacyParseSymbols blobs = Legacy.ParseTreeSymbolResponse <$> distributeFoldMap go blobs
where
go :: (Member (Error SomeException) sig, Member Task sig, Carrier sig m) => Blob -> m [Legacy.File]
go :: ParseEffects sig m => Blob -> m [Legacy.File]
go blob@Blob{..} = (doParse blob >>= withSomeTerm renderToSymbols) `catchError` (\(SomeException _) -> pure (pure emptyFile))
where
emptyFile = tagsToFile []
@ -49,8 +52,8 @@ legacyParseSymbols blobs = Legacy.ParseTreeSymbolResponse <$> distributeFoldMap
tagToSymbol Tag{..}
= Legacy.Symbol
{ symbolName = name
, symbolKind = kind
, symbolLine = fromMaybe mempty line
, symbolKind = pack (show kind)
, symbolLine = line
, symbolSpan = converting #? span
}
@ -58,30 +61,39 @@ parseSymbolsBuilder :: (Member Distribute sig, ParseEffects sig m, Traversable t
parseSymbolsBuilder format blobs = parseSymbols blobs >>= serialize format
parseSymbols :: (Member Distribute sig, ParseEffects sig m, Traversable t) => t Blob -> m ParseTreeSymbolResponse
parseSymbols blobs = ParseTreeSymbolResponse . V.fromList . toList <$> distributeFor blobs go
parseSymbols blobs = do
modes <- ask
ParseTreeSymbolResponse . V.fromList . toList <$> distributeFor blobs (go modes)
where
go :: (Member (Error SomeException) sig, Member Task sig, Carrier sig m) => Blob -> m File
go blob@Blob{..} = (doParse blob >>= withSomeTerm renderToSymbols) `catchError` (\(SomeException e) -> pure $ errorFile (show e))
go :: ParseEffects sig m => PerLanguageModes -> Blob -> m File
go modes blob@Blob{..}
| Precise <- pythonMode modes
, Python <- blobLanguage'
= catching $ renderPreciseToSymbols <$> parse precisePythonParser blob
| otherwise = catching $ withSomeTerm renderToSymbols <$> doParse blob
where
catching m = m `catchError` (\(SomeException e) -> pure $ errorFile (show e))
blobLanguage' = blobLanguage blob
blobPath' = pack $ blobPath blob
errorFile e = File blobPath' (bridging # blobLanguage blob) mempty (V.fromList [ParseError (T.pack e)]) blobOid
errorFile e = File blobPath' (bridging # blobLanguage') mempty (V.fromList [ParseError (T.pack e)]) blobOid
symbolsToSummarize :: [Text]
symbolsToSummarize = ["Function", "Method", "Class", "Module", "Call", "Send"]
renderToSymbols :: IsTaggable f => Term f Loc -> File
renderToSymbols term = tagsToFile (runTagging blob symbolsToSummarize term)
renderToSymbols :: (IsTaggable f, Applicative m) => Term f Loc -> m File
renderToSymbols term = pure $ tagsToFile (runTagging blob symbolsToSummarize term)
renderPreciseToSymbols :: Py.Term Loc -> File
renderPreciseToSymbols term = tagsToFile (Precise.tags blobSource term)
tagsToFile :: [Tag] -> File
tagsToFile tags = File blobPath' (bridging # blobLanguage') (V.fromList (fmap tagToSymbol tags)) mempty blobOid
tagToSymbol :: Tag -> Symbol
tagToSymbol Tag{..}
= Symbol
symbolsToSummarize :: [Text]
symbolsToSummarize = ["Function", "Method", "Class", "Module", "Call", "Send"]
tagToSymbol :: Tag -> Symbol
tagToSymbol Tag{..} = Symbol
{ symbol = name
, kind = kind
, line = fromMaybe mempty line
, kind = pack (show kind)
, line = line
, span = converting #? span
, docs = fmap Docstring docs
}

View File

@ -16,6 +16,7 @@ module Semantic.Api.Terms
import Analysis.ConstructorName (ConstructorName)
import Control.Effect.Error
import Control.Effect.Reader
import Control.Lens
import Control.Monad
import Control.Monad.IO.Class
@ -69,7 +70,7 @@ data TermOutputFormat
deriving (Eq, Show)
parseTermBuilder :: (Traversable t, Member Distribute sig, ParseEffects sig m, MonadIO m)
=> TermOutputFormat-> t Blob -> m Builder
=> TermOutputFormat -> t Blob -> m Builder
parseTermBuilder TermJSONTree = distributeFoldMap jsonTerm >=> serialize Format.JSON -- NB: Serialize happens at the top level for these two JSON formats to collect results of multiple blobs.
parseTermBuilder TermJSONGraph = termGraph >=> serialize Format.JSON
parseTermBuilder TermSExpression = distributeFoldMap sexpTerm
@ -101,7 +102,7 @@ quietTerm blob = showTiming blob <$> time' ( (doParse blob >>= withSomeTerm (fma
in stringUtf8 (status <> "\t" <> show (blobLanguage blob) <> "\t" <> blobPath blob <> "\t" <> show duration <> " ms\n")
type ParseEffects sig m = (Member (Error SomeException) sig, Member Task sig, Carrier sig m)
type ParseEffects sig m = (Member (Error SomeException) sig, Member (Reader PerLanguageModes) sig, Member Task sig, Carrier sig m)
type TermConstraints =
'[ Taggable

View File

@ -1,6 +1,7 @@
{-# LANGUAGE ApplicativeDo #-}
module Semantic.CLI (main) where
import Control.Effect.Reader
import Control.Exception as Exc (displayException)
import Data.Blob
import Data.Blob.IO
@ -101,15 +102,39 @@ parseCommand :: Mod CommandFields (Task.TaskEff Builder)
parseCommand = command "parse" (info parseArgumentsParser (progDesc "Generate parse trees for path(s)"))
where
parseArgumentsParser = do
renderer <- flag (parseTermBuilder TermSExpression) (parseTermBuilder TermSExpression) (long "sexpression" <> help "Output s-expression parse trees (default)")
<|> flag' (parseTermBuilder TermJSONTree) (long "json" <> help "Output JSON parse trees")
<|> flag' (parseTermBuilder TermJSONGraph) (long "json-graph" <> help "Output JSON adjacency list")
<|> flag' (parseSymbolsBuilder JSON) (long "symbols" <> help "Output JSON symbol list")
<|> flag' (parseSymbolsBuilder JSON) (long "json-symbols" <> help "Output JSON symbol list")
<|> flag' (parseSymbolsBuilder Proto) (long "proto-symbols" <> help "Output JSON symbol list")
<|> flag' (parseTermBuilder TermDotGraph) (long "dot" <> help "Output DOT graph parse trees")
<|> flag' (parseTermBuilder TermShow) (long "show" <> help "Output using the Show instance (debug only, format subject to change without notice)")
<|> flag' (parseTermBuilder TermQuiet) (long "quiet" <> help "Don't produce output, but show timing stats")
languageModes <- Language.PerLanguageModes
<$> option auto ( long "python-mode"
<> help "The AST representation to use for Python sources"
<> metavar "ALaCarte|Precise"
<> value Language.ALaCarte
<> showDefault)
renderer
<- flag (parseTermBuilder TermSExpression)
(parseTermBuilder TermSExpression)
( long "sexpression"
<> help "Output s-expression parse trees (default)")
<|> flag' (parseTermBuilder TermJSONTree)
( long "json"
<> help "Output JSON parse trees")
<|> flag' (parseTermBuilder TermJSONGraph)
( long "json-graph"
<> help "Output JSON adjacency list")
<|> flag' (parseSymbolsBuilder JSON)
( long "symbols"
<> long "json-symbols"
<> help "Output JSON symbol list")
<|> flag' (parseSymbolsBuilder Proto)
( long "proto-symbols"
<> help "Output protobufs symbol list")
<|> flag' (parseTermBuilder TermDotGraph)
( long "dot"
<> help "Output DOT graph parse trees")
<|> flag' (parseTermBuilder TermShow)
( long "show"
<> help "Output using the Show instance (debug only, format subject to change without notice)")
<|> flag' (parseTermBuilder TermQuiet)
( long "quiet"
<> help "Don't produce output, but show timing stats")
filesOrStdin <- FilesFromGitRepo
<$> option str (long "gitDir" <> help "A .git directory to read from")
<*> option shaReader (long "sha" <> help "The commit SHA1 to read from")
@ -119,7 +144,7 @@ parseCommand = command "parse" (info parseArgumentsParser (progDesc "Generate pa
<|> IncludePathsFromHandle <$> flag' stdin (long "only-stdin" <> help "Include only the paths given to stdin"))
<|> FilesFromPaths <$> some (argument filePathReader (metavar "FILES..."))
<|> pure (FilesFromHandle stdin)
pure $ Task.readBlobs filesOrStdin >>= renderer
pure $ Task.readBlobs filesOrStdin >>= runReader languageModes . renderer
tsParseCommand :: Mod CommandFields (Task.TaskEff Builder)
tsParseCommand = command "ts-parse" (info tsParseArgumentsParser (progDesc "Generate raw tree-sitter parse trees for path(s)"))

View File

@ -269,6 +269,12 @@ runParser blob@Blob{..} parser = case parser of
parseToAST (configTreeSitterParseTimeout config) language blob
>>= maybeM (throwError (SomeException ParserTimedOut))
UnmarshalParser language ->
time "parse.tree_sitter_ast_parse" languageTag $ do
config <- asks config
parseToPreciseAST (configTreeSitterParseTimeout config) language blob
>>= maybeM (throwError (SomeException ParserTimedOut))
AssignmentParser parser assignment -> runAssignment Assignment.assign parser assignment
DeterministicParser parser assignment -> runAssignment Deterministic.assign parser assignment

View File

@ -48,14 +48,14 @@ import qualified Language.TypeScript.Syntax as TypeScript
-- TODO: Move to src/Data
data Token
= Enter { tokenName :: Text, tokenSnippetRange :: Maybe Range }
| Exit { tokenName :: Text, tokenSnippetRange :: Maybe Range}
= Enter { tokenName :: Text, tokenSnippetRange :: Range }
| Exit { tokenName :: Text, tokenSnippetRange :: Range}
| Iden { identifierName :: Text, tokenSpan :: Span, docsLiteralRange :: Maybe Range }
deriving (Eq, Show)
type Tagger = Stream (Of Token)
enter, exit :: Monad m => String -> Maybe Range -> Tagger m ()
enter, exit :: Monad m => String -> Range -> Tagger m ()
enter c = yield . Enter (pack c)
exit c = yield . Exit (pack c)
@ -69,7 +69,7 @@ class Taggable constr where
)
=> Language -> constr (Term syntax Loc) -> Maybe Range
snippet :: Foldable syntax => Loc -> constr (Term syntax Loc) -> Maybe Range
snippet :: Foldable syntax => Loc -> constr (Term syntax Loc) -> Range
symbolName :: Declarations1 syntax => constr (Term syntax Loc) -> Maybe Name
@ -83,8 +83,8 @@ class TaggableBy (strategy :: Strategy) constr where
=> Language -> constr (Term syntax Loc) -> Maybe Range
docsLiteral' _ _ = Nothing
snippet' :: (Foldable syntax) => Loc -> constr (Term syntax Loc) -> Maybe Range
snippet' _ _ = Nothing
snippet' :: (Foldable syntax) => Loc -> constr (Term syntax Loc) -> Range
snippet' ann _ = byteRange ann
symbolName' :: Declarations1 syntax => constr (Term syntax Loc) -> Maybe Name
symbolName' _ = Nothing
@ -157,7 +157,7 @@ instance Taggable a => TaggableBy 'Custom (TermF a Loc) where
symbolName' t = symbolName (termFOut t)
instance TaggableBy 'Custom Syntax.Context where
snippet' ann (Syntax.Context _ (Term (In subj _))) = Just (subtractLoc ann subj)
snippet' ann (Syntax.Context _ (Term (In subj _))) = subtractLoc ann subj
instance TaggableBy 'Custom Declaration.Function where
docsLiteral' Python (Declaration.Function _ _ _ (Term (In _ bodyF)))
@ -165,7 +165,7 @@ instance TaggableBy 'Custom Declaration.Function where
, isTextElement exprF = Just (byteRange exprAnn)
| otherwise = Nothing
docsLiteral' _ _ = Nothing
snippet' ann (Declaration.Function _ _ _ (Term (In body _))) = Just $ subtractLoc ann body
snippet' ann (Declaration.Function _ _ _ (Term (In body _))) = subtractLoc ann body
symbolName' = declaredName . Declaration.functionName
instance TaggableBy 'Custom Declaration.Method where
@ -174,7 +174,7 @@ instance TaggableBy 'Custom Declaration.Method where
, isTextElement exprF = Just (byteRange exprAnn)
| otherwise = Nothing
docsLiteral' _ _ = Nothing
snippet' ann (Declaration.Method _ _ _ _ (Term (In body _)) _) = Just $ subtractLoc ann body
snippet' ann (Declaration.Method _ _ _ _ (Term (In body _)) _) = subtractLoc ann body
symbolName' = declaredName . Declaration.methodName
instance TaggableBy 'Custom Declaration.Class where
@ -183,28 +183,28 @@ instance TaggableBy 'Custom Declaration.Class where
, isTextElement exprF = Just (byteRange exprAnn)
| otherwise = Nothing
docsLiteral' _ _ = Nothing
snippet' ann (Declaration.Class _ _ _ (Term (In body _))) = Just $ subtractLoc ann body
snippet' ann (Declaration.Class _ _ _ (Term (In body _))) = subtractLoc ann body
symbolName' = declaredName . Declaration.classIdentifier
instance TaggableBy 'Custom Ruby.Class where
snippet' ann (Ruby.Class _ _ (Term (In body _))) = Just $ subtractLoc ann body
snippet' ann (Ruby.Class _ _ (Term (In body _))) = subtractLoc ann body
symbolName' = declaredName . Ruby.classIdentifier
instance TaggableBy 'Custom Ruby.Module where
snippet' ann (Ruby.Module _ (Term (In body _):_)) = Just $ subtractLoc ann body
snippet' ann (Ruby.Module _ _) = Just $ byteRange ann
snippet' ann (Ruby.Module _ (Term (In body _):_)) = subtractLoc ann body
snippet' ann (Ruby.Module _ _) = byteRange ann
symbolName' = declaredName . Ruby.moduleIdentifier
instance TaggableBy 'Custom TypeScript.Module where
snippet' ann (TypeScript.Module _ (Term (In body _):_)) = Just $ subtractLoc ann body
snippet' ann (TypeScript.Module _ _ ) = Just $ byteRange ann
snippet' ann (TypeScript.Module _ (Term (In body _):_)) = subtractLoc ann body
snippet' ann (TypeScript.Module _ _ ) = byteRange ann
symbolName' = declaredName . TypeScript.moduleIdentifier
instance TaggableBy 'Custom Expression.Call where
snippet' ann (Expression.Call _ _ _ (Term (In body _))) = Just $ subtractLoc ann body
snippet' ann (Expression.Call _ _ _ (Term (In body _))) = subtractLoc ann body
symbolName' = declaredName . Expression.callFunction
instance TaggableBy 'Custom Ruby.Send where
snippet' ann (Ruby.Send _ _ _ (Just (Term (In body _)))) = Just $ subtractLoc ann body
snippet' ann _ = Just $ byteRange ann
snippet' ann (Ruby.Send _ _ _ (Just (Term (In body _)))) = subtractLoc ann body
snippet' ann _ = byteRange ann
symbolName' Ruby.Send{..} = declaredName =<< sendSelector

View File

@ -2,6 +2,7 @@
module Tags.Tagging
( runTagging
, Tag(..)
, Kind(..)
)
where
@ -14,10 +15,10 @@ import Streaming
import qualified Streaming.Prelude as Streaming
import Data.Blob
import Data.Tag
import Data.Term
import Source.Loc
import qualified Source.Source as Source
import Tags.Tag
import Tags.Taggable
runTagging :: (IsTaggable syntax)
@ -29,30 +30,41 @@ runTagging blob symbolsToSummarize
= Eff.run
. evalState @[ContextToken] []
. Streaming.toList_
. contextualizing blob symbolsToSummarize
. contextualizing blob toKind
. tagging blob
where
toKind x = do
guard (x `elem` symbolsToSummarize)
case x of
"Function" -> Just Function
"Method" -> Just Method
"Class" -> Just Class
"Module" -> Just Module
"Call" -> Just Call
"Send" -> Just Call -- Rubys Send is considered to be a kind of 'Call'
_ -> Nothing
type ContextToken = (Text, Maybe Range)
type ContextToken = (Text, Range)
contextualizing :: ( Member (State [ContextToken]) sig
, Carrier sig m
)
=> Blob
-> [Text]
-> (Text -> Maybe Kind)
-> Stream (Of Token) m a
-> Stream (Of Tag) m a
contextualizing Blob{..} symbolsToSummarize = Streaming.mapMaybeM $ \case
contextualizing Blob{..} toKind = Streaming.mapMaybeM $ \case
Enter x r -> Nothing <$ enterScope (x, r)
Exit x r -> Nothing <$ exitScope (x, r)
Iden iden span docsLiteralRange -> get @[ContextToken] >>= pure . \case
((x, r):("Context", cr):xs) | x `elem` symbolsToSummarize
-> Just $ Tag iden x span (fmap fst xs) (firstLine (slice r)) (slice cr)
((x, r):xs) | x `elem` symbolsToSummarize
-> Just $ Tag iden x span (fmap fst xs) (firstLine (slice r)) (slice docsLiteralRange)
((x, r):("Context", cr):_) | Just kind <- toKind x
-> Just $ Tag iden kind span (firstLine (slice r)) (Just (slice cr))
((x, r):_) | Just kind <- toKind x
-> Just $ Tag iden kind span (firstLine (slice r)) (slice <$> docsLiteralRange)
_ -> Nothing
where
slice = fmap (stripEnd . Source.toText . Source.slice blobSource)
firstLine = fmap (T.take 180 . fst . breakOn "\n")
slice = stripEnd . Source.toText . Source.slice blobSource
firstLine = T.take 180 . fst . breakOn "\n"
enterScope, exitScope :: ( Member (State [ContextToken]) sig
, Carrier sig m

View File

@ -24,7 +24,6 @@ import Data.Functor.Both
import qualified Data.Language as Language
import Data.List.NonEmpty
import Data.Patch
import Data.Semigroup.App
import qualified Data.Syntax as Syntax
import qualified Data.Syntax.Comment as Comment
import qualified Data.Syntax.Declaration as Declaration

View File

@ -3,6 +3,7 @@
module Main (main) where
import Control.Effect
import Control.Effect.Reader
import Control.Exception (displayException)
import qualified Control.Foldl as Foldl
import Data.Function ((&))
@ -17,6 +18,7 @@ import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy.Char8 as BLC
import qualified Data.ByteString.Streaming.Char8 as ByteStream
import Data.Either
import Data.Language (LanguageMode(..), PerLanguageModes(..))
import Data.Set (Set)
import Data.Traversable
import Data.Typeable
@ -121,4 +123,4 @@ knownFailuresForPath tsDir (Just path)
parseFilePath :: (Member (Error SomeException) sig, Member Distribute sig, Member Task sig, Member Files sig, Carrier sig m, MonadIO m) => Path.RelFile -> m Bool
parseFilePath path = readBlob (fileForRelPath path) >>= parseTermBuilder @[] TermShow . pure >>= const (pure True)
parseFilePath path = readBlob (fileForRelPath path) >>= runReader (PerLanguageModes ALaCarte) . parseTermBuilder @[] TermShow . pure >>= const (pure True)

View File

@ -1,5 +1,6 @@
module Semantic.CLI.Spec (testTree) where
import Control.Effect.Reader
import Data.ByteString.Builder
import Semantic.Api hiding (Blob, BlobPair, File)
import Semantic.Task
@ -50,17 +51,18 @@ testForParseFixture (format, runParse, files, expected) =
parseFixtures :: [(String, [Blob] -> TaskEff Builder, [File], Path.RelFile)]
parseFixtures =
[ ("s-expression", parseTermBuilder TermSExpression, path, Path.relFile "test/fixtures/ruby/corpus/and-or.parseA.txt")
, ("json", parseTermBuilder TermJSONTree, path, prefix </> Path.file "parse-tree.json")
, ("json", parseTermBuilder TermJSONTree, path', prefix </> Path.file "parse-trees.json")
, ("json", parseTermBuilder TermJSONTree, [], prefix </> Path.file "parse-tree-empty.json")
, ("symbols", parseSymbolsBuilder Serializing.Format.JSON, path'', prefix </> Path.file "parse-tree.symbols.json")
, ("protobuf symbols", parseSymbolsBuilder Serializing.Format.Proto, path'', prefix </> Path.file "parse-tree.symbols.protobuf.bin")
[ ("s-expression", run . parseTermBuilder TermSExpression, path, Path.relFile "test/fixtures/ruby/corpus/and-or.parseA.txt")
, ("json", run . parseTermBuilder TermJSONTree, path, prefix </> Path.file "parse-tree.json")
, ("json", run . parseTermBuilder TermJSONTree, path', prefix </> Path.file "parse-trees.json")
, ("json", run . parseTermBuilder TermJSONTree, [], prefix </> Path.file "parse-tree-empty.json")
, ("symbols", run . parseSymbolsBuilder Serializing.Format.JSON, path'', prefix </> Path.file "parse-tree.symbols.json")
, ("protobuf symbols", run . parseSymbolsBuilder Serializing.Format.Proto, path'', prefix </> Path.file "parse-tree.symbols.protobuf.bin")
]
where path = [File "test/fixtures/ruby/corpus/and-or.A.rb" Ruby]
path' = [File "test/fixtures/ruby/corpus/and-or.A.rb" Ruby, File "test/fixtures/ruby/corpus/and-or.B.rb" Ruby]
path'' = [File "test/fixtures/ruby/corpus/method-declaration.A.rb" Ruby]
prefix = Path.relDir "test/fixtures/cli"
run = runReader (PerLanguageModes ALaCarte)
diffFixtures :: [(String, [BlobPair] -> TaskEff Builder, [Both File], Path.RelFile)]
diffFixtures =

View File

@ -1,5 +1,6 @@
module Semantic.Spec (spec) where
import Control.Effect.Reader
import Control.Exception (fromException)
import SpecHelpers
@ -15,17 +16,17 @@ spec :: Spec
spec = do
describe "parseBlob" $ do
it "returns error if given an unknown language (json)" $ do
output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermJSONTree [ setBlobLanguage Unknown methodsBlob ]
output <- fmap runBuilder . runTaskOrDie . runReader (PerLanguageModes ALaCarte) $ parseTermBuilder TermJSONTree [ setBlobLanguage Unknown methodsBlob ]
output `shouldBe` "{\"trees\":[{\"path\":\"methods.rb\",\"error\":\"NoLanguageForBlob \\\"methods.rb\\\"\",\"language\":\"Unknown\"}]}\n"
it "throws if given an unknown language for sexpression output" $ do
res <- runTaskWithOptions defaultOptions (parseTermBuilder TermSExpression [setBlobLanguage Unknown methodsBlob])
res <- runTaskWithOptions defaultOptions (runReader (PerLanguageModes ALaCarte) (parseTermBuilder TermSExpression [setBlobLanguage Unknown methodsBlob]))
case res of
Left exc -> fromException exc `shouldBe` Just (NoLanguageForBlob "methods.rb")
Right _bad -> fail "Expected parseTermBuilder to fail for an unknown language"
it "renders with the specified renderer" $ do
output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob]
output <- fmap runBuilder . runTaskOrDie . runReader (PerLanguageModes ALaCarte) $ parseTermBuilder TermSExpression [methodsBlob]
output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n"
describe "git ls-tree parsing" $ do

View File

@ -42,7 +42,7 @@ import Data.Project as X
import Data.Proxy as X
import Data.Foldable (toList)
import Data.Functor.Listable as X
import Data.Language as X
import Data.Language as X hiding (Precise)
import Data.List.NonEmpty as X (NonEmpty(..))
import Data.Semilattice.Lower as X
import Source.Source as X (Source)
@ -95,7 +95,7 @@ diffFilePaths session paths
parseFilePath :: TaskSession -> Path.RelFile -> IO (Either SomeException ByteString)
parseFilePath session path = do
blob <- readBlobFromFile (fileForRelPath path)
res <- runTask session $ parseTermBuilder TermSExpression (toList blob)
res <- runTask session . runReader (PerLanguageModes ALaCarte) $ parseTermBuilder TermSExpression (toList blob)
pure (runBuilder <$> res)
-- | Read two files to a BlobPair.

View File

@ -2,7 +2,7 @@ module Tags.Spec (spec) where
import Data.Text (Text)
import SpecHelpers
import Tags.Tagging
import Tags.Tagging as Tags
import qualified System.Path as Path
spec :: Spec
@ -11,89 +11,89 @@ spec = do
it "produces tags for functions with docs" $ do
(blob, tree) <- parseTestFile goParser (Path.relFile "test/fixtures/go/tags/simple_functions.go")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "TestFromBits" "Function" (Span (Pos 6 1) (Pos 8 2)) ["Statements"] (Just "func TestFromBits(t *testing.T) {") (Just "// TestFromBits ...")
, Tag "Hi" "Function" (Span (Pos 10 1) (Pos 11 2)) ["Statements"] (Just "func Hi()") Nothing ]
[ Tag "TestFromBits" Function (Span (Pos 6 1) (Pos 8 2)) "func TestFromBits(t *testing.T) {" (Just "// TestFromBits ...")
, Tag "Hi" Function (Span (Pos 10 1) (Pos 11 2)) "func Hi()" Nothing ]
it "produces tags for methods" $ do
(blob, tree) <- parseTestFile goParser (Path.relFile "test/fixtures/go/tags/method.go")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "CheckAuth" "Method" (Span (Pos 3 1) (Pos 3 100)) ["Statements"] (Just "func (c *apiClient) CheckAuth(req *http.Request, user, repo string) (*authenticatedActor, error)") Nothing]
[ Tag "CheckAuth" Method (Span (Pos 3 1) (Pos 3 100)) "func (c *apiClient) CheckAuth(req *http.Request, user, repo string) (*authenticatedActor, error)" Nothing]
it "produces tags for calls" $ do
(blob, tree) <- parseTestFile goParser (Path.relFile "test/fixtures/go/tags/simple_functions.go")
runTagging blob ["Call"] tree `shouldBe`
[ Tag "Hi" "Call" (Span (Pos 7 2) (Pos 7 6)) ["Function", "Context", "Statements"] (Just "Hi()") Nothing]
[ Tag "Hi" Call (Span (Pos 7 2) (Pos 7 6)) "Hi()" Nothing]
describe "javascript and typescript" $ do
it "produces tags for functions with docs" $ do
(blob, tree) <- parseTestFile typescriptParser (Path.relFile "test/fixtures/javascript/tags/simple_function_with_docs.js")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "myFunction" "Function" (Span (Pos 2 1) (Pos 4 2)) ["Statements"] (Just "function myFunction()") (Just "// This is myFunction") ]
[ Tag "myFunction" Function (Span (Pos 2 1) (Pos 4 2)) "function myFunction()" (Just "// This is myFunction") ]
it "produces tags for classes" $ do
(blob, tree) <- parseTestFile typescriptParser (Path.relFile "test/fixtures/typescript/tags/class.ts")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "FooBar" "Class" (Span (Pos 1 1) (Pos 1 16)) ["Statements"] (Just "class FooBar") Nothing ]
[ Tag "FooBar" Class (Span (Pos 1 1) (Pos 1 16)) "class FooBar" Nothing ]
it "produces tags for modules" $ do
(blob, tree) <- parseTestFile typescriptParser (Path.relFile "test/fixtures/typescript/tags/module.ts")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "APromise" "Module" (Span (Pos 1 1) (Pos 1 20)) ["Statements"] (Just "module APromise { }") Nothing ]
[ Tag "APromise" Tags.Module (Span (Pos 1 1) (Pos 1 20)) "module APromise { }" Nothing ]
describe "python" $ do
it "produces tags for functions" $ do
(blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/simple_functions.py")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "Foo" "Function" (Span (Pos 1 1) (Pos 5 17)) ["Statements"] (Just "def Foo(x):") Nothing
, Tag "Bar" "Function" (Span (Pos 7 1) (Pos 11 13)) ["Statements"] (Just "def Bar():") Nothing
, Tag "local" "Function" (Span (Pos 8 5) (Pos 9 17)) ["Statements", "Function", "Statements"] (Just "def local():") Nothing
[ Tag "Foo" Function (Span (Pos 1 1) (Pos 5 17)) "def Foo(x):" Nothing
, Tag "Bar" Function (Span (Pos 7 1) (Pos 11 13)) "def Bar():" Nothing
, Tag "local" Function (Span (Pos 8 5) (Pos 9 17)) "def local():" Nothing
]
it "produces tags for functions with docs" $ do
(blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/simple_function_with_docs.py")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "Foo" "Function" (Span (Pos 1 1) (Pos 3 13)) ["Statements"] (Just "def Foo(x):") (Just "\"\"\"This is the foo function\"\"\"") ]
[ Tag "Foo" Function (Span (Pos 1 1) (Pos 3 13)) "def Foo(x):" (Just "\"\"\"This is the foo function\"\"\"") ]
it "produces tags for classes" $ do
(blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/class.py")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "Foo" "Class" (Span (Pos 1 1) (Pos 5 17)) ["Statements"] (Just "class Foo:") (Just "\"\"\"The Foo class\"\"\"")
, Tag "f" "Function" (Span (Pos 3 5) (Pos 5 17)) ["Statements", "Class", "Statements"] (Just "def f(self):") (Just "\"\"\"The f method\"\"\"")
[ Tag "Foo" Class (Span (Pos 1 1) (Pos 5 17)) "class Foo:" (Just "\"\"\"The Foo class\"\"\"")
, Tag "f" Function (Span (Pos 3 5) (Pos 5 17)) "def f(self):" (Just "\"\"\"The f method\"\"\"")
]
it "produces tags for multi-line functions" $ do
(blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/multiline.py")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "Foo" "Function" (Span (Pos 1 1) (Pos 3 13)) ["Statements"] (Just "def Foo(x,") Nothing ]
[ Tag "Foo" Function (Span (Pos 1 1) (Pos 3 13)) "def Foo(x," Nothing ]
describe "ruby" $ do
it "produces tags for methods" $ do
(blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/simple_method.rb")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "foo" "Method" (Span (Pos 1 1) (Pos 4 4)) ["Statements"] (Just "def foo") Nothing ]
[ Tag "foo" Method (Span (Pos 1 1) (Pos 4 4)) "def foo" Nothing ]
it "produces tags for sends" $ do
(blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/simple_method.rb")
runTagging blob ["Send"] tree `shouldBe`
[ Tag "puts" "Send" (Span (Pos 2 3) (Pos 2 12)) ["Statements", "Method", "Statements"] (Just "puts \"hi\"") Nothing
, Tag "bar" "Send" (Span (Pos 3 3) (Pos 3 8)) ["Statements", "Method", "Statements"] (Just "a.bar") Nothing
, Tag "a" "Send" (Span (Pos 3 3) (Pos 3 4)) ["Send", "Statements", "Method", "Statements"] (Just "a") Nothing
[ Tag "puts" Call (Span (Pos 2 3) (Pos 2 12)) "puts \"hi\"" Nothing
, Tag "bar" Call (Span (Pos 3 3) (Pos 3 8)) "a.bar" Nothing
, Tag "a" Call (Span (Pos 3 3) (Pos 3 4)) "a" Nothing
]
it "produces tags for methods with docs" $ do
(blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/simple_method_with_docs.rb")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "foo" "Method" (Span (Pos 2 1) (Pos 3 4)) ["Statements"] (Just "def foo") (Just "# Public: foo") ]
[ Tag "foo" Method (Span (Pos 2 1) (Pos 3 4)) "def foo" (Just "# Public: foo") ]
it "produces tags for methods and classes with docs" $ do
(blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/class_module.rb")
runTagging blob symbolsToSummarize tree `shouldBe`
[ Tag "Foo" "Module" (Span (Pos 2 1 ) (Pos 12 4)) ["Statements"] (Just "module Foo") (Just "# Public: Foo")
, Tag "Bar" "Class" (Span (Pos 5 3 ) (Pos 11 6)) ["Module", "Context", "Statements"] (Just "class Bar") (Just "# Public: Bar")
, Tag "baz" "Method" (Span (Pos 8 5 ) (Pos 10 8)) ["Class", "Context", "Module", "Context", "Statements"] (Just "def baz(a)") (Just "# Public: baz")
, Tag "C" "Class" (Span (Pos 14 1) (Pos 20 4)) ["Statements"] (Just "class A::B::C") Nothing
, Tag "foo" "Method" (Span (Pos 15 3) (Pos 17 6)) ["Statements", "Class", "Statements"] (Just "def foo") Nothing
, Tag "foo" "Method" (Span (Pos 18 3) (Pos 19 6)) ["Statements", "Class", "Statements"] (Just "def self.foo") Nothing
[ Tag "Foo" Tags.Module (Span (Pos 2 1 ) (Pos 12 4)) "module Foo" (Just "# Public: Foo")
, Tag "Bar" Class (Span (Pos 5 3 ) (Pos 11 6)) "class Bar" (Just "# Public: Bar")
, Tag "baz" Method (Span (Pos 8 5 ) (Pos 10 8)) "def baz(a)" (Just "# Public: baz")
, Tag "C" Class (Span (Pos 14 1) (Pos 20 4)) "class A::B::C" Nothing
, Tag "foo" Method (Span (Pos 15 3) (Pos 17 6)) "def foo" Nothing
, Tag "foo" Method (Span (Pos 18 3) (Pos 19 6)) "def self.foo" Nothing
]
symbolsToSummarize :: [Text]