1
1
mirror of https://github.com/github/semantic.git synced 2024-12-26 16:33:03 +03:00

Merge branch 'bump-haskell-tree-sitter' into test-precise-parsing

This commit is contained in:
Timothy Clem 2019-12-09 11:27:51 -08:00
commit d3763cbd69
10 changed files with 157 additions and 55 deletions

View File

@ -30,6 +30,9 @@ package semantic-python
package semantic-tags
ghc-options: -Werror
package semantic-ast
ghc-options: -Werror
source-repository-package
type: git
location: https://github.com/tclem/proto-lens-jsonpb
@ -38,4 +41,4 @@ source-repository-package
source-repository-package
type: git
location: https://github.com/antitypical/fused-syntax.git
tag: 5b7512db962d5b3f973002615b8bc86ab074d5aa
tag: 6b412694e64cc275ed06513b3c360f03bb1f04fd

View File

@ -114,7 +114,7 @@ def foo(x):
Now, let's graph.
``` bash
$ semantic graph main.py
$ semantic graph --language Python main.py
digraph
{
@ -129,7 +129,7 @@ digraph
To make this easier to visualize, let's use the `dot` utility from `graphviz` and write this graph to SVG:
```
$ semantic graph main.py | dot -Tsvg > main.html && open main.html
$ semantic graph --language Python main.py | dot -Tsvg > main.html && open main.html
```
You'll get something that looks like this:
@ -141,7 +141,7 @@ You'll get something that looks like this:
Call graphs expand on the import graphing capabilities by adding in some additional vertices and edges to the graph to identify named symbols and the connections between them. Taking the same example code, simply add `--call` to the invocation of semantic:
```
$ semantic graph --calls main.py | dot -Tsvg > main.html && open main.html
$ semantic graph --language Python --calls main.py | dot -Tsvg > main.html && open main.html
```
![a call graph](images/call_graph.svg)

View File

@ -18,7 +18,25 @@ extra-source-files: CHANGELOG.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
library
import: haskell
exposed-modules:
-- other-modules:
-- other-extensions:
@ -33,10 +51,17 @@ library
default-language: Haskell2010
executable semantic-ast
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base ^>=4.12.0.0
, semantic-ast
, tree-sitter ^>= 0.6
, semantic-source ^>= 0.0
, tree-sitter-python ^>= 0.7
, bytestring ^>= 0.10.8.2
, optparse-applicative ^>= 0.14.3.0
, pretty-simple ^>= 3.1.0.0
hs-source-dirs: src
default-language: Haskell2010

View File

@ -1,14 +1,15 @@
{-# LANGUAGE TypeApplications #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}
module Main (main) where
import System.Environment
import TreeSitter.Unmarshal
import qualified TreeSitter.Python.AST as AST
import qualified TreeSitter.Python as Python
import Source.Range
import Source.Span
import Data.ByteString.Char8
import Data.ByteString (pack, readFile, ByteString)
import Data.ByteString (readFile)
import System.IO (FilePath)
import Options.Applicative hiding (style)
import Data.Semigroup ((<>))

View File

@ -1,5 +1,6 @@
{-# LANGUAGE DeriveGeneric, DeriveTraversable, FlexibleContexts, LambdaCase, MultiParamTypeClasses, OverloadedStrings, QuantifiedConstraints, RankNTypes,
ScopedTypeVariables, StandaloneDeriving, TypeFamilies, TypeOperators, UndecidableInstances #-}
{-# LANGUAGE DeriveGeneric, DeriveTraversable, FlexibleContexts, LambdaCase, MultiParamTypeClasses, OverloadedStrings,
QuantifiedConstraints, RankNTypes, ScopedTypeVariables, StandaloneDeriving, TypeFamilies, TypeOperators,
UndecidableInstances #-}
module Core.Core
( Core(..)
, rec
@ -47,10 +48,12 @@ import Data.Text (Text)
import GHC.Generics (Generic1)
import GHC.Stack
import Source.Span
import Syntax.Foldable
import Syntax.Module
import Syntax.Scope
import Syntax.Stack
import Syntax.Module
import Syntax.Term
import Syntax.Traversable
data Core f a
-- | Recursive local binding of a name in a scope; strict evaluation of the name in the body will diverge.
@ -89,6 +92,8 @@ infixl 9 :.
infix 3 :=
instance HFunctor Core
instance HFoldable Core
instance HTraversable Core
deriving instance (Eq a, forall a . Eq a => Eq (f a), Monad f) => Eq (Core f a)
deriving instance (Ord a, forall a . Eq a => Eq (f a)
@ -229,6 +234,8 @@ data Ann ann f a
deriving (Eq, Foldable, Functor, Generic1, Ord, Show, Traversable)
instance HFunctor (Ann ann)
instance HFoldable (Ann ann)
instance HTraversable (Ann ann)
instance RightModule (Ann ann) where
Ann l b >>=* f = Ann l (b >>= f)

View File

@ -51,6 +51,7 @@ library
exposed-modules:
Language.Python
Language.Python.Core
Language.Python.Failure
Language.Python.Tags
hs-source-dirs: src

View File

@ -12,15 +12,15 @@ import Prelude hiding (fail)
import AST.Element
import Control.Effect hiding ((:+:))
import Control.Effect.Reader
import Control.Monad.Fail
import Core.Core as Core
import Core.Name as Name
import Data.Coerce
import Data.Foldable
import Data.Function
import Data.List.NonEmpty (NonEmpty (..))
import Data.Traversable
import Data.Maybe
import GHC.Records
import Language.Python.Failure
import Source.Span (Span)
import Syntax.Stack (Stack (..))
import qualified Syntax.Stack as Stack
@ -51,29 +51,26 @@ pattern SingleIdentifier name <- Py.ExpressionList
-- compiled term.
type CoreSyntax sig t = ( Member Core sig
, Member (Ann Span) sig
, Member Failure sig
, Carrier sig t
, Foldable t
)
class Compile (py :: * -> *) where
-- FIXME: rather than failing the compilation process entirely
-- with MonadFail, we should emit core that represents failure
compile :: ( CoreSyntax syn t
, Member (Reader Bindings) sig
, Carrier sig m
, MonadFail m
)
=> py Span
-> (t Name -> m (t Name))
-> (t Name -> m (t Name))
default compile :: (MonadFail m, Show (py Span)) => py Span -> (t Name -> m (t Name)) -> (t Name -> m (t Name))
default compile :: (Applicative m, Member Failure syn, Carrier syn t, Show (py Span)) => py Span -> (t Name -> m (t Name)) -> (t Name -> m (t Name))
compile a _ _ = defaultCompile a
toplevelCompile :: ( CoreSyntax syn t
, Member (Reader Bindings) sig
, Carrier sig m
, MonadFail m
)
=> Py.Module Span
-> m (t Name)
@ -92,8 +89,8 @@ locate :: ( HasField "ann" syntax Span
-> t a
locate syn = Core.annAt (getField @"ann" syn)
defaultCompile :: (MonadFail m, Show py) => py -> m (t Name)
defaultCompile t = fail $ "compilation unimplemented for " <> show t
defaultCompile :: (Applicative m, Member Failure syn, Carrier syn t, Show py) => py -> m (t Name)
defaultCompile = pure . unimplemented
instance (Compile l, Compile r) => Compile (l :+: r) where
@ -129,15 +126,14 @@ data Located a = Located Span a
-- Desugaring an RHS involves walking as deeply as possible into an
-- assignment, storing the names we encounter as we go and eventually
-- returning a terminal expression. We have to keep track of which
desugar :: MonadFail m
=> [Located Name]
desugar :: [Located Name]
-> RHS Span
-> m ([Located Name], Desugared Span)
-> Either String ([Located Name], Desugared Span)
desugar acc = \case
Prj Py.Assignment { left = SingleIdentifier name, right = Just rhs, ann} ->
desugar (Located ann name : acc) rhs
R1 any -> pure (acc, any)
other -> fail ("desugar: couldn't desugar RHS " <> show other)
other -> Left ("desugar: couldn't desugar RHS " <> show other)
-- This is an algebra that is invoked from a left fold but that
-- returns a function (the 'difference' pattern) so that we can pass
@ -160,11 +156,13 @@ instance Compile Py.Assignment where
{ left = SingleIdentifier name
, right = Just rhs
, ann
} cc next = do
(names, val) <- desugar [Located ann name] rhs
compile val pure next >>= foldr collapseDesugared cc names >>= pure . locate it
} cc next = case desugar [Located ann name] rhs of
Right (names, val) -> compile val pure next >>= foldr collapseDesugared cc names >>= pure . locate it
Left msg -> pure $ unimplemented msg
compile other _ _ = fail ("Unhandled assignment case: " <> show other)
compile other _ _ = pure $ invariantViolated ("Unhandled assignment case: " <> show other)
-- End assignment compilation
@ -188,12 +186,12 @@ instance Compile Py.Call where
func <- compile function pure next
let compileArg = \case
Prj expr -> compile (expr :: Py.Expression Span) pure next
other -> fail ("Can't compile non-expression function argument: " <> show other)
other -> pure . invariantViolated $ "Can't compile non-expression function argument: " <> show other
-- Python function arguments are defined to evaluate left to right.
args <- traverse compileArg args
locate it (func $$* args) & cc
compile it _ _ = fail ("can't compile Call node with generator expression: " <> show it)
compile it _ _ = pure . invariantViolated $ "can't compile Call node with generator expression: " <> show it
instance Compile Py.ClassDefinition where
compile it@Py.ClassDefinition { body = pybody, name = Py.Identifier _ann (Name -> n) } cc next = do
@ -226,12 +224,15 @@ instance Compile Py.DecoratedDefinition where
, extraChildren = [ Py.Decorator { extraChildren } ]
} cc next = do
let thenReassign item = do
_ :> lastbound <- asks unBindings
tocall <- compile extraChildren pure next
let callit go = (pure lastbound .= (tocall $$ pure lastbound)) >>> go
fmap callit (cc item)
bindings <- asks unBindings
case bindings of
_ :> lastbound -> do
tocall <- compile extraChildren pure next
let callit go = (pure lastbound .= (tocall $$ pure lastbound)) >>> go
fmap callit (cc item)
_ -> pure . invariantViolated $ "Encountered a decorated definition without a corresponding function"
locate it <$> compile definition thenReassign next
compile it _ _ = fail ("Can't figure out decorated definition " <> show it)
compile it _ _ = pure . invariantViolated $ "Can't figure out decorated definition " <> show it
instance Compile Py.DeleteStatement
instance Compile Py.Dictionary
instance Compile Py.DictionaryComprehension
@ -260,7 +261,7 @@ instance Compile Py.ExpressionList where
= fmap (locate it)
. compile child cc
compile Py.ExpressionList { Py.extraChildren = items } _
= const (fail ("unimplemented: ExpressionList of length " <> show items))
= const . pure . unimplemented $ "ExpressionList of length " <> show items
instance Compile Py.False where
@ -276,17 +277,20 @@ instance Compile Py.FunctionDefinition where
, body
} cc next = do
-- Compile each of the parameters, then the body.
parameters' <- traverse param parameters
body' <- compile body pure next
-- Build a lambda.
let located = locate it (rec (Name.named' (Name name)) (lams parameters' body'))
-- Give it a name (below), then augment the current continuation
-- with the new name (with 'def'), so that calling contexts know
-- that we have built an exportable definition.
assigning located <$> local (def (Name name)) (cc next)
where param (Py.Parameter (Prj (Py.Identifier _pann pname))) = pure . named' . Name $ pname
param x = unimplemented x
unimplemented x = fail $ "unimplemented: " <> show x
let parameterMs = fmap param parameters
if any isNothing parameterMs
then pure . invariantViolated $ "Couldn't extract parameters"
else do
let parameters' = catMaybes parameterMs
body' <- compile body pure next
-- Build a lambda.
let located = locate it (rec (Name.named' (Name name)) (lams parameters' body'))
-- Give it a name (below), then augment the current continuation
-- with the new name (with 'def'), so that calling contexts know
-- that we have built an exportable definition.
assigning located <$> local (def (Name name)) (cc next)
where param (Py.Parameter (Prj (Py.Identifier _pann pname))) = Just . named' . Name $ pname
param _ = Nothing
assigning item f = (Name.named' (Name name) :<- item) >>>= f
instance Compile Py.FutureImportStatement
@ -349,7 +353,7 @@ instance Compile Py.ReturnStatement where
compile it@Py.ReturnStatement { Py.extraChildren = vals } _ next = locate it <$> case vals of
Nothing -> pure none
Just Py.ExpressionList { extraChildren = [val] } -> compile val pure next
Just Py.ExpressionList { extraChildren = vals } -> fail ("unimplemented: return statement returning " <> show (length vals) <> " values")
Just Py.ExpressionList { extraChildren = vals } -> pure (invariantViolated ("unimplemented: return statement returning " <> show (length vals) <> " values"))
instance Compile Py.RaiseStatement
@ -360,12 +364,16 @@ deriving instance Compile Py.SimpleStatement
instance Compile Py.String where
compile it@Py.String { extraChildren } cc _ = do
contents <- for extraChildren $ \case
Prj Py.EscapeSequence { text } -> pure text
other -> fail ("Couldn't string-desugar " <> show other)
let extract = \case
Prj Py.EscapeSequence { text } -> Just text
_other -> Nothing
let new = pure "__semantic_prelude" ... "str" ... "__slots" ... "__new__"
cc $ locate it (new $$ Core.string (mconcat contents))
let contents = fmap extract extraChildren
if any isNothing contents
then pure . invariantViolated $ "Couldn't string-desugar " <> show it
else let new = pure "__semantic_prelude" ... "str" ... "__slots" ... "__new__"
in cc $ locate it (new $$ Core.string (mconcat (catMaybes contents)))
instance Compile Py.Subscript
@ -377,8 +385,7 @@ instance Compile Py.TryStatement
instance Compile Py.Tuple where
compile it@Py.Tuple { Py.extraChildren = [] } cc _ = cc $ locate it unit
compile it _ _
= fail ("Unimplemented: non-empty tuple " <> show it)
compile it _ _ = pure $ unimplemented it
instance Compile Py.UnaryOperator
instance Compile Py.WhileStatement

View File

@ -0,0 +1,54 @@
{-# LANGUAGE DeriveFoldable, DeriveFunctor, DeriveGeneric, DeriveTraversable, ExistentialQuantification,
FlexibleContexts, KindSignatures, MultiParamTypeClasses, QuantifiedConstraints, RankNTypes,
StandaloneDeriving, TypeOperators #-}
module Language.Python.Failure
( Failure (..)
, unimplemented
, invariantViolated
, eliminateFailures
) where
import Prelude hiding (fail)
import Control.Effect.Carrier
import Control.Monad.Fail
import Data.Coerce
import Data.Kind
import GHC.Generics (Generic1)
import Syntax.Foldable
import Syntax.Module
import Syntax.Term
import Syntax.Traversable
data Failure (f :: Type -> Type) a
= Unimplemented String
| InvariantViolated String
deriving Generic1
instance Show (Failure f a) where
show (Unimplemented a) = "unimplemented: " <> a
show (InvariantViolated a) = "invariant violated: " <> a
deriving instance Functor (Failure f)
deriving instance Foldable (Failure f)
deriving instance Traversable (Failure f)
instance HFunctor Failure
instance HFoldable Failure
instance HTraversable Failure
instance RightModule Failure where
a >>=* _ = coerce a
unimplemented :: (Show ast, Member Failure sig, Carrier sig m) => ast -> m a
unimplemented = send . Unimplemented . show
invariantViolated :: (Member Failure sig, Carrier sig m) => String -> m a
invariantViolated = send . InvariantViolated
eliminateFailures :: (MonadFail m, HTraversable sig, RightModule sig)
=> Term (Failure :+: sig) a
-> m (Term sig a)
eliminateFailures = Syntax.Term.handle (pure . pure) (fail . show)

View File

@ -31,6 +31,7 @@ import Data.Maybe
import Data.Text (Text)
import GHC.Stack
import qualified Language.Python.Core as Py
import Language.Python.Failure
import Prelude hiding (fail)
import Source.Span
import Streaming
@ -108,6 +109,7 @@ assertEvaluatesTo core k val = do
assertTreeEqual :: Term Core Name -> Term Core Name -> HUnit.Assertion
assertTreeEqual t item = HUnit.assertEqual ("got (pretty)" <> showCore item) t item
checkPythonFile :: HasCallStack => Path.RelFile -> Tasty.TestTree
checkPythonFile fp = HUnit.testCaseSteps (Path.toString fp) $ \step -> withFrozenCallStack $ do
-- Extract the directives and the core associated with the provided file
@ -118,8 +120,10 @@ checkPythonFile fp = HUnit.testCaseSteps (Path.toString fp) $ \step -> withFroze
-- Run the compiler
let coreResult = Control.Effect.run
. runFail
. eliminateFailures
. Control.Effect.run
. runReader @Py.Bindings mempty
. Py.toplevelCompile
. Py.toplevelCompile @(Failure :+: Ann Span :+: Core) @(Term _)
<$> result
-- Dispatch based on the result-directive pair

View File

@ -137,7 +137,7 @@ keywordArgument :: Assignment (Term Loc)
keywordArgument = makeTerm <$> symbol KeywordArgument <*> children (Statement.Assignment [] <$> term expression <*> term expression)
parenthesizedExpression :: Assignment (Term Loc)
parenthesizedExpression = symbol ParenthesizedExpression *> children expressions
parenthesizedExpression = (symbol ParenthesizedExpression <|> symbol ParenthesizedExpression') *> children expressions
parameter :: Assignment (Term Loc)
parameter = makeTerm <$> symbol DefaultParameter <*> children (Statement.Assignment [] <$> term expression <*> term expression)