1
1
mirror of https://github.com/github/semantic.git synced 2024-12-24 23:42:31 +03:00

Merge remote-tracking branch 'origin/master' into razing-the-fields

This commit is contained in:
Patrick Thomson 2020-01-24 09:43:44 -05:00
commit 33f1027092
16 changed files with 245 additions and 41 deletions

View File

@ -27,11 +27,17 @@ jobs:
ghc-version: ${{ matrix.ghc }}
cabal-version: ${{ matrix.cabal }}
- uses: actions/cache@v1
name: Cache ~/.cabal/packages
with:
path: ~/.cabal/packages
key: ${{ runner.os }}-${{ matrix.ghc }}-cabal-packages
- uses: actions/cache@v1
name: Cache ~/.cabal/store
with:
path: ~/.cabal/store
key: ${{ runner.os }}-${{ matrix.ghc }}-v1-cabal-store
key: ${{ runner.os }}-${{ matrix.ghc }}-v3-cabal-store
- uses: actions/cache@v1
name: Cache dist-newstyle
@ -39,17 +45,17 @@ jobs:
path: dist-newstyle
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cabal }}-semantic-dist
# - name: hlint
# run: |
# test -f dist-newstyle/hlint || cabal install hlint --installdir=dist-newstyle
# dist-newstyle/hlint src semantic-python
- name: Install dependencies
run: |
cabal v2-update
cabal v2-configure --project-file=cabal.project.ci --disable-optimization --enable-benchmarks --enable-tests --write-ghc-environment-files=always -j2
cabal v2-build --project-file=cabal.project.ci all --only-dependencies
- name: hlint
run: |
test -f dist-newstyle/hlint || cabal install hlint --installdir=dist-newstyle --overwrite-policy=always
dist-newstyle/hlint src semantic-python
- name: Build & test
run: |
cabal v2-build --project-file=cabal.project.ci

View File

@ -12,6 +12,7 @@
- {name: init, within: []}
- {name: last, within: []}
- {name: fromJust, within: []}
- {name: decodeUtf8, within: [], message: "Use decodeUtf8' or decodeUtf8With lenientDecode"}
# Replace a $ b $ c with a . b $ c
- group: {name: dollar, enabled: true}

View File

@ -37,7 +37,8 @@ common haskell
library
import: haskell
exposed-modules:
exposed-modules: Marshal.JSON
, Marshal.Examples
-- other-modules:
-- other-extensions:
build-depends: base ^>= 4.13
@ -47,9 +48,14 @@ library
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16
, pretty-simple ^>= 3.1.0.0
, aeson ^>= 1.4.2.0
, text ^>= 1.2.3.1
, bytestring ^>= 0.10.8.2
, aeson-pretty ^>= 0.8.8
hs-source-dirs: src
default-language: Haskell2010
executable semantic-ast
import: haskell
main-is: Main.hs
@ -63,5 +69,8 @@ executable semantic-ast
, bytestring
, optparse-applicative
, pretty-simple
, aeson
, bytestring
, aeson-pretty
hs-source-dirs: src
default-language: Haskell2010

View File

@ -1,5 +1,4 @@
{-# LANGUAGE TypeApplications #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}
module Main (main) where
@ -8,12 +7,16 @@ import qualified TreeSitter.Python.AST as AST
import qualified TreeSitter.Python as Python
import Source.Range
import Source.Span
import Data.Aeson (toJSON)
import Data.ByteString.Char8
import Data.ByteString (readFile)
import Options.Applicative hiding (style)
import Text.Pretty.Simple (pPrint, pPrintNoColor)
import Data.Foldable (traverse_)
import Control.Monad ((>=>))
import Marshal.JSON (marshal)
import Data.ByteString.Lazy.Char8 (putStrLn)
import Data.Aeson.Encode.Pretty (encodePretty)
data SemanticAST = SemanticAST
{ format :: Format
@ -51,13 +54,13 @@ generateAST (SemanticAST format noColor source) =
Left filePaths -> traverse Data.ByteString.readFile filePaths
Right source -> pure [Data.ByteString.Char8.pack source]
go = ast >=> display
ast = parseByteString @AST.Module @(Range, Span) Python.tree_sitter_python
ast = parseByteString @AST.Module @(Range, Span) Python.tree_sitter_python -- TODO: generalize for all languages
display = case format of
Json -> Data.ByteString.Lazy.Char8.putStrLn . encodePretty . either toJSON (marshal . fmap (const ())) -- TODO: replacing range and span annotations with () for which there is a ToJSON instance for now, deal with this later
Show -> print
Pretty | noColor -> pPrintNoColor
| otherwise -> pPrint
-- need AST in scope for case format and ..
opts :: ParserInfo SemanticAST
opts = info (parseAST <**> helper)
@ -68,6 +71,5 @@ opts = info (parseAST <**> helper)
-- TODO: Define formats for json, sexpression, etc.
data Format = Show
| Pretty
| Json
deriving (Read)
-- bool field would break Read

View File

@ -0,0 +1,86 @@
{-# LANGUAGE DataKinds, DeriveAnyClass, DeriveGeneric, DuplicateRecordFields, TypeOperators #-}
module Marshal.Examples () where
import Control.Effect.Reader
import Control.Monad.Fail
import Data.Aeson
import qualified Data.ByteString as B
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import GHC.Generics ((:+:), Generic1, Generic)
import Numeric (readDec)
import Prelude hiding (fail)
import Source.Range
import TreeSitter.Token
import TreeSitter.Unmarshal
-- | An example of a sum-of-products datatype.
data Expr a
= IfExpr (If a)
| BlockExpr (Block a)
| VarExpr (Var a)
| LitExpr (Lit a)
| BinExpr (Bin a)
deriving (Generic1, Unmarshal)
-- | Product with multiple fields.
data If a = If { ann :: a, condition :: Expr a, consequence :: Expr a, alternative :: Maybe (Expr a) }
deriving (Generic1, Unmarshal)
instance SymbolMatching If where
symbolMatch _ _ = False
showFailure _ _ = ""
-- | Single-field product.
data Block a = Block { ann :: a, body :: [Expr a] }
deriving (Generic1, Unmarshal)
instance SymbolMatching Block where
symbolMatch _ _ = False
showFailure _ _ = ""
-- | Leaf node.
data Var a = Var { ann :: a, text :: Text.Text }
deriving (Generic1, Unmarshal)
instance SymbolMatching Var where
symbolMatch _ _ = False
showFailure _ _ = ""
-- | Custom leaf node.
data Lit a = Lit { ann :: a, lit :: IntegerLit }
deriving (Generic1, Unmarshal)
instance SymbolMatching Lit where
symbolMatch _ _ = False
showFailure _ _ = ""
-- | Product with anonymous sum field.
data Bin a = Bin { ann :: a, lhs :: Expr a, op :: (AnonPlus :+: AnonTimes) a, rhs :: Expr a }
deriving (Generic1, Unmarshal)
instance SymbolMatching Bin where
symbolMatch _ _ = False
showFailure _ _ = ""
-- | Anonymous leaf node.
type AnonPlus = Token "+" 0
-- | Anonymous leaf node.
type AnonTimes = Token "*" 1
newtype IntegerLit = IntegerLit Integer
deriving (Generic, ToJSON)
instance UnmarshalAnn IntegerLit where
unmarshalAnn node = do
Range start end <- unmarshalAnn node
bytestring <- ask
let drop = B.drop start
take = B.take (end - start)
slice = take . drop
str = Text.unpack (Text.decodeUtf8 (slice bytestring))
case readDec str of
(i, _):_ -> pure (IntegerLit i)
_ -> fail ("could not parse '" <> str <> "'")

View File

@ -0,0 +1,88 @@
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE UndecidableInstances #-}
module Marshal.JSON
( MarshalJSON(..)
) where
import Data.Aeson as Aeson
import Data.List.NonEmpty (NonEmpty)
import GHC.Generics
import Data.Text (Text)
import qualified Data.Text as Text
-- TODO: range and span will require a new release of semantic-source
-- TODO: use toEncoding -- direct serialization to ByteString
-- Serialize unmarshaled ASTs into JSON representation by auto-deriving Aeson instances generically
class MarshalJSON t where
marshal :: (ToJSON a) => t a -> Value
marshal = object . fields []
fields :: (ToJSON a) => [(Text, Value)] -> t a -> [(Text, Value)]
default fields :: ( Generic1 t, GFields (Rep1 t), ToJSON a) => [(Text, Value)] -> t a -> [(Text, Value)]
fields acc = gfields acc . from1
-- Implement the sum case
instance {-# OVERLAPPING #-} (MarshalJSON f, MarshalJSON g) => MarshalJSON (f :+: g) where
fields acc (L1 f) = fields acc f
fields acc (R1 g) = fields acc g
-- Create MarshalJSON instances for each type constructor
instance (GFields (Rep1 t), Generic1 t) => MarshalJSON t
-- Stores meta-data for datatypes
instance (GFields f, Datatype c) => GFields (M1 D c f) where
gfields acc x = gfields ((Text.pack "type", String (Text.pack (datatypeName x))): acc) $ unM1 x
-- Fold over S1 product types and pass the result to Aeson objects
instance GFields fields => GFields (C1 c fields) where
gfields acc x = gfields acc (unM1 x)
-- Implement base case for products
-- To get a value out of this datum, we define another typeclass: @GValue@ with the method @gvalue@.
instance (GValue p, Selector s) => GFields (S1 s p) where
gfields acc x = (Text.pack (selName x), gvalue (unM1 x)) : acc
-- Implement inductive case for product case
-- Product datatypes are marshalled to an object with a type field holding the constructor name and a separate field for each selector in the datatype.
instance (GFields f, GFields g) => GFields (f :*: g) where
gfields acc (f :*: g) = gfields (gfields acc g) f
-- GValue for leaves
instance ToJSON a => GValue (K1 i a) where
gvalue = toJSON . unK1
-- Par1 instance
instance GValue Par1 where
gvalue = toJSON . unPar1
instance (MarshalJSON t) => GValue (Rec1 t) where
gvalue (Rec1 f) = marshal f
instance (GValue t) => GValue (Maybe :.: t) where
gvalue (Comp1 (Just t)) = gvalue t
gvalue (Comp1 Nothing) = Null
instance (GValue t) => GValue ([] :.: t) where
gvalue (Comp1 ts) = toJSON $ map gvalue ts
instance (GValue t) => GValue (NonEmpty :.: t) where
gvalue (Comp1 ts) = toJSON $ fmap gvalue ts
-- GFields operates on product field types: it takes an accumulator, a datatype, and returns a new accumulator value.
class GFields f where
gfields :: ToJSON a => [(Text, Value)] -> f a -> [(Text, Value)]
-- gvalue is a wrapper that calls to @toJSON@ (for leaf nodes such as @Text@) or recurses via @marshal@
class GValue f where
gvalue :: (ToJSON a) => f a -> Value

View File

@ -47,7 +47,7 @@ instance ToTags Go.MethodDeclaration where
tags t@Go.MethodDeclaration
{ ann = loc@Loc { byteRange }
, name = Go.FieldIdentifier { text }
} = yieldTag text Function loc byteRange >> gtags t
} = yieldTag text Method loc byteRange >> gtags t
instance ToTags Go.CallExpression where
tags t@Go.CallExpression

View File

@ -147,7 +147,7 @@ yieldMethodNameTag t loc range (Rb.MethodName expr) = enterScope True $ case exp
-- Prj Rb.Symbol { extraChildren = [Prj Rb.EscapeSequence { text = name }] } -> yield name
_ -> gtags t
where
yield name = yieldTag name Function loc range >> gtags t
yield name = yieldTag name Method loc range >> gtags t
enterScope :: (Has (State [Text]) sig m) => Bool -> m () -> m ()
enterScope createNew m = do

View File

@ -1,4 +1,4 @@
{-# LANGUAGE DeriveGeneric, RankNTypes #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, RankNTypes #-}
module Source.Range
( Range(..)
, point
@ -10,6 +10,7 @@ module Source.Range
) where
import Control.DeepSeq (NFData)
import Data.Aeson (ToJSON)
import Data.Hashable (Hashable)
import Data.Semilattice.Lower (Lower(..))
import GHC.Generics (Generic)
@ -19,7 +20,7 @@ data Range = Range
{ start :: {-# UNPACK #-} !Int
, end :: {-# UNPACK #-} !Int
}
deriving (Eq, Generic, Ord, Show)
deriving (Eq, Generic, Ord, Show, ToJSON)
instance Hashable Range
instance NFData Range

View File

@ -67,7 +67,7 @@ instance ToTags Tsx.MethodDefinition where
-- TODO: There are more here
_ -> gtags t
where
yield name = yieldTag name Call loc byteRange >> gtags t
yield name = yieldTag name Method loc byteRange >> gtags t
instance ToTags Tsx.ClassDeclaration where
tags t@Tsx.ClassDeclaration

View File

@ -67,7 +67,7 @@ instance ToTags Ts.MethodDefinition where
-- TODO: There are more here
_ -> gtags t
where
yield name = yieldTag name Call loc byteRange >> gtags t
yield name = yieldTag name Method loc byteRange >> gtags t
instance ToTags Ts.ClassDeclaration where
tags t@Ts.ClassDeclaration

View File

@ -59,7 +59,7 @@ common dependencies
, fused-effects-exceptions ^>= 1
, fused-effects-resumable ^>= 0.1
, hashable >= 1.2.7 && < 1.4
, tree-sitter ^>= 0.8
, tree-sitter ^>= 0.8.0.2
, mtl ^>= 2.2.2
, network ^>= 2.8.0.0
, pathtype ^>= 0.8.1
@ -303,7 +303,7 @@ library
, unliftio-core ^>= 0.1.2.0
, unordered-containers ^>= 0.2.9.0
, vector ^>= 0.12.0.2
, tree-sitter-go ^>= 0.4.1
, tree-sitter-go ^>= 0.4.1.1
, tree-sitter-php ^>= 0.2
, tree-sitter-python ^>= 0.8.1
, tree-sitter-ruby ^>= 0.4.1

View File

@ -58,9 +58,9 @@ runParser blob@Blob{..} parser = case parser of
>>= either (\e -> trace (displayException e) *> throwError (SomeException e)) pure
UnmarshalParser language ->
time "parse.tree_sitter_ast_parse" languageTag $ do
time "parse.tree_sitter_precise_ast_parse" languageTag $ do
config <- asks config
parseToPreciseAST (configTreeSitterParseTimeout config) language blob
parseToPreciseAST (configTreeSitterParseTimeout config) (configTreeSitterUnmarshalTimeout config) language blob
>>= either (\e -> trace (displayException e) *> throwError (SomeException e)) pure
AssignmentParser parser assignment -> runAssignment Assignment.assign parser blob assignment

View File

@ -51,7 +51,7 @@ runParser timeout blob@Blob{..} parser = case parser of
>>= either (throwError . SomeException) pure
UnmarshalParser language ->
parseToPreciseAST timeout language blob
parseToPreciseAST timeout timeout language blob
>>= either (throwError . SomeException) pure
AssignmentParser parser assignment ->

View File

@ -21,6 +21,7 @@ import Data.Term
import Source.Loc
import qualified Source.Source as Source
import Source.Span
import qualified System.Timeout as System
import qualified TreeSitter.Cursor as TS
import qualified TreeSitter.Language as TS
@ -32,6 +33,7 @@ import qualified TreeSitter.Unmarshal as TS
data TSParseException
= ParserTimedOut
| IncompatibleVersions
| UnmarshalTimedOut
| UnmarshalFailure String
deriving (Eq, Show, Generic)
@ -52,18 +54,24 @@ parseToPreciseAST
, TS.Unmarshal t
)
=> Duration
-> Duration
-> Ptr TS.Language
-> Blob
-> m (Either TSParseException (t Loc))
parseToPreciseAST parseTimeout language blob = runParse parseTimeout language blob $ \ rootPtr ->
TS.withCursor (castPtr rootPtr) $ \ cursor ->
runReader (TS.UnmarshalState (Source.bytes (blobSource blob)) cursor) (liftIO (peek rootPtr) >>= TS.unmarshalNode)
`Exc.catch` (Exc.throw . UnmarshalFailure . TS.getUnmarshalError)
parseToPreciseAST parseTimeout unmarshalTimeout language blob = runParse parseTimeout language blob $ \ rootPtr ->
withTimeout $
TS.withCursor (castPtr rootPtr) $ \ cursor ->
runReader (TS.UnmarshalState (Source.bytes (blobSource blob)) cursor) (liftIO (peek rootPtr) >>= TS.unmarshalNode)
`Exc.catch` (Exc.throw . UnmarshalFailure . TS.getUnmarshalError)
where
withTimeout :: IO a -> IO a
withTimeout action = System.timeout (toMicroseconds unmarshalTimeout) action >>= maybeM (Exc.throw UnmarshalTimedOut)
instance Exception TSParseException where
displayException = \case
ParserTimedOut -> "tree-sitter: parser timed out"
IncompatibleVersions -> "tree-sitter: incompatible versions"
UnmarshalTimedOut -> "tree-sitter: unmarshal timed out"
UnmarshalFailure s -> "tree-sitter: unmarshal failure - " <> show s
runParse

View File

@ -43,20 +43,21 @@ data FailOnParseError = FailOnParseError
data Config
= Config
{ configAppName :: String -- ^ Application name ("semantic")
, configHostName :: String -- ^ HostName from getHostName
, configProcessID :: ProcessID -- ^ ProcessID from getProcessID
, configStatsHost :: Stat.Host -- ^ Host of statsd/datadog (default: "127.0.0.1")
, configStatsPort :: Stat.Port -- ^ Port of statsd/datadog (default: "28125")
, configTreeSitterParseTimeout :: Duration -- ^ Timeout in milliseconds before canceling tree-sitter parsing (default: 6000).
, configAssignmentTimeout :: Duration -- ^ Millisecond timeout for assignment (default: 4000)
, configMaxTelemetyQueueSize :: Int -- ^ Max size of telemetry queues before messages are dropped (default: 1000).
, configIsTerminal :: Flag IsTerminal -- ^ Whether a terminal is attached (set automaticaly at runtime).
, configLogPrintSource :: Flag LogPrintSource -- ^ Whether to print the source reference when logging errors (set automatically at runtime).
, configLogFormatter :: LogFormatter -- ^ Log formatter to use (set automatically at runtime).
, configSHA :: String -- ^ SHA to include in log messages (set automatically).
, configFailParsingForTesting :: Flag FailTestParsing -- ^ Simulate internal parse failure for testing (default: False).
, configOptions :: Options -- ^ Options configurable via command line arguments.
{ configAppName :: String -- ^ Application name ("semantic")
, configHostName :: String -- ^ HostName from getHostName
, configProcessID :: ProcessID -- ^ ProcessID from getProcessID
, configStatsHost :: Stat.Host -- ^ Host of statsd/datadog (default: "127.0.0.1")
, configStatsPort :: Stat.Port -- ^ Port of statsd/datadog (default: "28125")
, configTreeSitterParseTimeout :: Duration -- ^ Timeout in milliseconds before canceling tree-sitter parsing (default: 6000).
, configTreeSitterUnmarshalTimeout :: Duration -- ^ Timeout in milliseconds before canceling tree-sitter unmarshalling (default: 4000).
, configAssignmentTimeout :: Duration -- ^ Millisecond timeout for assignment (default: 4000)
, configMaxTelemetyQueueSize :: Int -- ^ Max size of telemetry queues before messages are dropped (default: 1000).
, configIsTerminal :: Flag IsTerminal -- ^ Whether a terminal is attached (set automaticaly at runtime).
, configLogPrintSource :: Flag LogPrintSource -- ^ Whether to print the source reference when logging errors (set automatically at runtime).
, configLogFormatter :: LogFormatter -- ^ Log formatter to use (set automatically at runtime).
, configSHA :: String -- ^ SHA to include in log messages (set automatically).
, configFailParsingForTesting :: Flag FailTestParsing -- ^ Simulate internal parse failure for testing (default: False).
, configOptions :: Options -- ^ Options configurable via command line arguments.
}
-- Options configurable via command line arguments.
@ -85,6 +86,7 @@ defaultConfig options@Options{..} = do
(statsHost, statsPort) <- lookupStatsAddr
size <- envLookupNum 1000 "MAX_TELEMETRY_QUEUE_SIZE"
parseTimeout <- envLookupNum 6000 "TREE_SITTER_PARSE_TIMEOUT"
unmarshalTimeout <- envLookupNum 4000 "TREE_SITTER_UNMARSHAL_TIMEOUT"
assignTimeout <- envLookupNum 4000 "SEMANTIC_ASSIGNMENT_TIMEOUT"
pure Config
{ configAppName = "semantic"
@ -94,6 +96,7 @@ defaultConfig options@Options{..} = do
, configStatsPort = statsPort
, configTreeSitterParseTimeout = fromMilliseconds parseTimeout
, configTreeSitterUnmarshalTimeout = fromMilliseconds unmarshalTimeout
, configAssignmentTimeout = fromMilliseconds assignTimeout
, configMaxTelemetyQueueSize = size
, configIsTerminal = flag IsTerminal isTerminal