1
1
mirror of https://github.com/github/semantic.git synced 2025-01-03 21:16:12 +03:00

Merge pull request #227 from github/python-test-programs

Institute semantic-python test suite/address milestone 1
This commit is contained in:
Patrick Thomson 2019-09-09 12:52:19 -04:00 committed by GitHub
commit d9f73b668b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 404 additions and 38 deletions

View File

@ -38,6 +38,7 @@ script:
- cabal new-run semantic:test
- cabal new-run semantic-core:spec
- cabal new-run semantic-core:doctest
- cabal new-run semantic-python:test
# parse-examples is disabled because it slaughters our CI
# - cabal new-run semantic:parse-examples

View File

@ -1,6 +1,7 @@
{-# LANGUAGE FlexibleContexts, OverloadedStrings, RankNTypes, RecordWildCards, TypeApplications, TypeOperators #-}
module Analysis.ScopeGraph
( ScopeGraph(..)
, Ref (..)
, Decl(..)
, scopeGraph
, scopeGraphAnalysis

View File

@ -1,6 +1,7 @@
{-# LANGUAGE FlexibleInstances, GeneralizedNewtypeDeriving, MultiParamTypeClasses, OverloadedStrings, RecordWildCards, TypeOperators, UndecidableInstances #-}
module Data.Loc
( Loc(..)
, interactive
, Span(..)
, emptySpan
, Pos(..)
@ -26,6 +27,8 @@ data Loc = Loc
}
deriving (Eq, Ord, Show)
interactive :: Loc
interactive = Loc "<interactive>" emptySpan
data Span = Span
{ spanStart :: {-# UNPACK #-} !Pos

View File

@ -18,17 +18,14 @@ extra-source-files: README.md
tested-with: GHC == 8.6.4
library
exposed-modules:
Language.Python.Core
-- other-modules:
-- other-extensions:
build-depends: base ^>=4.12.0.0
, semantic-core ^>= 0.0.0.0
, tree-sitter ^>= 0.1.0.0
, tree-sitter-python ^>= 0.1.0.1
hs-source-dirs: src
common haskell
default-language: Haskell2010
build-depends: base ^>=4.12
, fused-effects ^>= 0.5
, semantic-core ^>= 0.0
, tree-sitter ^>= 0.2
, tree-sitter-python ^>= 0.2
ghc-options:
-Weverything
-Wno-missing-local-signatures
@ -41,3 +38,38 @@ library
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
library
import: haskell
exposed-modules: Language.Python.Core
hs-source-dirs: src
test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
ghc-options: -threaded
other-modules: Directive
, Instances
build-depends: semantic-python == 0.0.0.0
, aeson ^>= 1.4.4
, aeson-pretty ^>= 0.8.7
, bytestring ^>= 0.10.8.2
, containers ^>= 0.6
, directory ^>= 1.3.3
, exceptions ^>= 0.10.2
, filepath ^>= 1.4.2.1
, process ^>= 1.6.5
, streaming ^>= 0.2.2
, streaming-process ^>= 0.1
, streaming-bytestring ^>= 0.1.6
, tasty ^>= 1.2.3
, tasty-hunit ^>= 0.10.0.2
, text ^>= 1.2.3
, trifecta >= 2 && <3
, unordered-containers ^>= 0.2.10

View File

@ -1,28 +1,40 @@
{-# LANGUAGE DefaultSignatures, DeriveGeneric, FlexibleContexts, FlexibleInstances, RecordWildCards, StandaloneDeriving, TypeOperators #-}
{-# LANGUAGE DefaultSignatures, DisambiguateRecordFields, FlexibleContexts, FlexibleInstances, OverloadedStrings, ScopedTypeVariables, NamedFieldPuns, TypeOperators #-}
module Language.Python.Core
( compile
) where
import Prelude hiding (fail)
import Control.Effect hiding ((:+:))
import Control.Monad.Fail
import Data.Core as Core
import Data.Foldable
import Data.Name as Name
import GHC.Generics
import Prelude hiding (fail)
import qualified TreeSitter.Python.AST as Py
class Compile t where
class Compile py where
-- FIXME: we should really try not to fail
compile :: MonadFail m => t -> m Core
default compile :: (MonadFail m, Show t) => t -> m Core
compile :: (Member Core sig, Carrier sig t, Foldable t, MonadFail m) => py -> m (t Name)
default compile :: (MonadFail m, Show py) => py -> m (t Name)
compile = defaultCompile
defaultCompile :: (MonadFail m, Show t) => t -> m Core
defaultCompile :: (MonadFail m, Show py) => py -> m (t Name)
defaultCompile t = fail $ "compilation unimplemented for " <> show t
instance (Compile l, Compile r) => Compile (Either l r) where compile = compileSum
instance Compile Py.AssertStatement
instance Compile Py.Attribute
instance Compile Py.Assignment where
compile (Py.Assignment (Py.ExpressionList [lhs]) (Just rhs) Nothing) = do
target <- compile lhs
value <- compile rhs
pure (target .= value)
compile other = fail ("Unhandled assignment case: " <> show other)
instance Compile Py.AugmentedAssignment
instance Compile Py.Await
instance Compile Py.BinaryOperator
instance Compile Py.Block
@ -46,9 +58,18 @@ instance Compile Py.ExecStatement
instance Compile Py.Expression where compile = compileSum
instance Compile Py.ExpressionStatement
instance Compile Py.ExpressionStatement where
compile (Py.ExpressionStatement children) = do
actions <- traverse compile children
pure $ do' (fmap (Nothing :<-) actions)
instance Compile Py.False where compile _ = pure (Bool False)
instance Compile Py.ExpressionList where
compile (Py.ExpressionList exprs) = do
actions <- traverse compile exprs
pure $ do' (fmap (Nothing :<-) actions)
instance Compile Py.False where compile _ = pure (bool False)
instance Compile Py.Float
instance Compile Py.ForStatement
@ -57,15 +78,15 @@ instance Compile Py.FunctionDefinition where
compile Py.FunctionDefinition
{ name = Py.Identifier name
, parameters = Py.Parameters parameters
, ..
, body
} = do
parameters' <- params
body' <- compile body
pure (Let (User name) := lams parameters' body')
pure (pure name .= lams parameters' body')
where params = case parameters of
Nothing -> pure []
Just p -> traverse param [p] -- FIXME: this is wrong in node-types.json, @p@ should already be a list
param (Right (Right (Right (Left (Py.Identifier name))))) = pure (User name)
param (Right (Right (Right (Left (Py.Identifier name))))) = pure (named' name)
param x = unimplemented x
unimplemented x = fail $ "unimplemented: " <> show x
@ -74,33 +95,39 @@ instance Compile Py.GeneratorExpression
instance Compile Py.GlobalStatement
instance Compile Py.Identifier where
compile (Py.Identifier text) = pure (Var (User text))
compile (Py.Identifier bytes) = pure (pure bytes)
instance Compile Py.IfStatement where
compile Py.IfStatement{..} = If <$> compile condition <*> compile consequence <*> case alternative of
Nothing -> pure Unit
Just clauses -> foldr clause (pure Unit) clauses
where clause (Left Py.ElifClause{..}) rest = If <$> compile condition <*> compile consequence <*> rest
clause (Right Py.ElseClause{..}) _ = compile body
compile Py.IfStatement{ condition, consequence, alternative } =
if' <$> compile condition <*> compile consequence <*> foldr clause (pure unit) alternative
where clause (Right Py.ElseClause{ body }) _ = compile body
clause (Left Py.ElifClause{ condition, consequence }) rest =
if' <$> compile condition <*> compile consequence <*> rest
instance Compile Py.ImportFromStatement
instance Compile Py.ImportStatement
instance Compile Py.Integer
instance Compile Py.KeywordIdentifier
instance Compile Py.Lambda
instance Compile Py.List
instance Compile Py.ListComprehension
instance Compile Py.Module where
compile (Py.Module Nothing) = pure Unit
compile (Py.Module (Just statements)) = block <$> traverse compile statements
compile (Py.Module stmts) = do
-- Buggy and ad-hoc: the toList call promotes too many variables
-- to top-level scope.
res <- traverse compile stmts
let names = concatMap toList res
pure . record $ zip names res
instance Compile Py.NamedExpression
instance Compile Py.None
instance Compile Py.NonlocalStatement
instance Compile Py.NotOperator
instance Compile Py.ParenthesizedExpression
instance Compile Py.PassStatement
instance Compile Py.PassStatement where
compile (Py.PassStatement _) = pure Core.unit
instance Compile Py.PrimaryExpression where compile = compileSum
@ -115,20 +142,24 @@ instance Compile Py.SimpleStatement where compile = compileSum
instance Compile Py.String
instance Compile Py.Subscript
instance Compile Py.True where compile _ = pure (Bool True)
instance Compile Py.True where compile _ = pure (bool True)
instance Compile Py.TryStatement
instance Compile Py.Tuple
instance Compile Py.Tuple where
compile (Py.Tuple []) = pure Core.unit
compile (Py.Tuple t) = fail ("Unimplemented: non-empty tuple " <> show t)
instance Compile Py.UnaryOperator
instance Compile Py.WhileStatement
instance Compile Py.WithStatement
instance Compile Py.Yield
compileSum :: (Generic t, GCompileSum (Rep t), MonadFail m) => t -> m Core
compileSum :: (Generic py, GCompileSum (Rep py), Member Core sig, Foldable t, Carrier sig t, MonadFail m) => py -> m (t Name)
compileSum = gcompileSum . from
class GCompileSum f where
gcompileSum :: MonadFail m => f a -> m Core
gcompileSum :: (Foldable t, Member Core sig, Carrier sig t, MonadFail m) => f a -> m (t Name)
instance GCompileSum f => GCompileSum (M1 D d f) where
gcompileSum (M1 f) = gcompileSum f

View File

@ -0,0 +1,67 @@
module Directive ( Directive (..)
, parseDirectives
, describe
, toProcess
) where
import Control.Applicative
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as ByteString
import Data.List.NonEmpty (NonEmpty)
import Data.Coerce
import System.Process
import qualified Text.Trifecta as Trifecta
{- |
Directives are parsed from magic comments in test files and
describe to the test suite how to query the results of a given test
case. A directive that looks like this:
@
# CHECK-JQ: has("mach")
@
would, after converting the contents of the file to a Core expression,
dump that expression to JSON and pipe said JSON to @jq -e
'has("mach")@, which will return an error code unless the passed JSON
is a hash containing the @"mach"@ key.
This syntax was inspired by LLVM's
[FileCheck](https://llvm.org/docs/CommandGuide/FileCheck.html). This
approach is less direct than tests that pattern-match over an AST, but
enable us to keep the text of test cases in close proximity to the
assertions we want to make, which improves maintainability
significantly and has been a successful strategy for the LLVM and Rust
projects.
-}
data Directive = JQ ByteString -- | @# CHECK-JQ: expr@
| Fails -- | @# CHECK-FAILS@ fails unless translation fails.
deriving (Eq, Show)
describe :: Directive -> String
describe Fails = "<expect failure>"
describe (JQ b) = ByteString.unpack b
fails :: Trifecta.Parser Directive
fails = Fails <$ Trifecta.string "# CHECK-FAILS"
jq :: Trifecta.Parser Directive
jq = do
Trifecta.string "# CHECK-JQ: "
JQ . ByteString.pack <$> many (Trifecta.noneOf "\n")
directive :: Trifecta.Parser Directive
directive = fails <|> jq
toplevel :: Trifecta.Parser (NonEmpty Directive)
toplevel = directive `Trifecta.sepEndByNonEmpty` Trifecta.char '\n'
parseDirectives :: ByteString -> Either String (NonEmpty Directive)
parseDirectives = Trifecta.foldResult (Left . show) Right
. Trifecta.parseByteString toplevel mempty
toProcess :: Directive -> CreateProcess
toProcess (JQ d) = proc "jq" ["-e", ByteString.unpack d]
toProcess x = error ("can't call toProcess on " <> show x)

View File

@ -0,0 +1,125 @@
{-# LANGUAGE DeriveAnyClass, DerivingStrategies, GeneralizedNewtypeDeriving, LambdaCase, StandaloneDeriving, FlexibleInstances, NamedFieldPuns, OverloadedStrings, QuantifiedConstraints, TypeOperators, UndecidableInstances, TypeApplications #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Instances () where
-- Testing code depends on certain instances that we don't want to
-- expose in semantic-core proper, yet are important enough that
-- we should keep track of them in a dedicated file.
import Analysis.ScopeGraph
import Control.Effect.Sum
import Data.Aeson
import qualified Data.HashMap.Strict as HashMap
import Data.Loc
import Data.Core (Core, Ann (..))
import qualified Data.Map as Map
import Data.File
import Data.Term
import Data.Text (Text)
import Data.Scope (Scope, Incr)
import qualified Data.Scope as Scope
import Data.Name
instance ToJSON a => ToJSON (Named a) where
toJSON _ = object []
instance ToJSON1 Named where
liftToJSON f _ (Named i a) = object
[ "name" .= i
, "value" .= f a
]
-- Loses information compared to the toJSON instance
-- due to an infelicity in how Aeson's toJSON1 is implemented.
-- The correct thing to do here is to manually munge the bytestring
-- together as a builder, but we don't even hit this code path,
-- so it will do for now.
liftToEncoding f _ (Named name a) = f a
instance ToJSON2 Incr where
liftToJSON2 f _ g _ = \case
Scope.Z a -> f a
Scope.S b -> g b
liftToEncoding2 f _ g _ = \case
Scope.Z a -> f a
Scope.S b -> g b
deriving newtype instance (ToJSON a) => ToJSON (Ignored a)
instance (Functor f, ToJSON1 f, ToJSON a) => ToJSON1 (Scope a f) where
liftToJSON f g (Scope.Scope a) = toJSON1 (fmap (toJSON2 . fmap (liftToJSON f g)) a)
liftToEncoding f g (Scope.Scope a) = liftToEncoding inner outer a where
inner = liftToEncoding2 toEncoding toEncodingList hoist loist
outer = liftToEncodingList2 toEncoding toEncodingList hoist loist
hoist = liftToEncoding f g
loist = liftToEncodingList f g
deriving anyclass instance (Functor f, ToJSON1 f) => ToJSON1 (Core f)
instance (ToJSON1 (sig (Term sig))) => ToJSON1 (Term sig) where
liftToJSON f _ (Var a) = f a
liftToJSON f g (Term s) = liftToJSON f g s
liftToEncoding f _ (Var a) = f a
liftToEncoding f g (Term s) = liftToEncoding f g s
instance (ToJSON1 (f k), ToJSON1 (g k)) => ToJSON1 ((:+:) f g k) where
liftToJSON f g (L h) = liftToJSON f g h
liftToJSON f g (R h) = liftToJSON f g h
liftToEncoding f g (L h) = liftToEncoding f g h
liftToEncoding f g (R h) = liftToEncoding f g h
instance (ToJSON1 f) => ToJSON1 (Ann f) where
liftToJSON f g (Ann loc term) =
let
rest = case liftToJSON f g term of
Object os -> HashMap.toList os
other -> ["value" .= other]
in object (["location" .= loc] <> rest)
-- We default to deriving the default toEncoding definition (that piggybacks
-- off of toJSON) so that we never hit the problematic code paths associated
-- with toEncoding above.
instance ToJSON a => ToJSON (File a) where
toJSON File{fileLoc, fileBody} = object
[ "location" .= fileLoc
, "body" .= fileBody
]
instance ToJSON Span where
toJSON Span{spanStart, spanEnd} = object
[ "kind" .= ("span" :: Text)
, "start" .= spanStart
, "end" .= spanEnd
]
instance ToJSON Pos where
toJSON Pos{posLine, posCol} = object
[ "kind" .= ("pos" :: Text)
, "line" .= posLine
, "column" .= posCol
]
instance ToJSON Loc where
toJSON Loc{locPath, locSpan} = object
[ "kind" .= ("loc" :: Text)
, "path" .= locPath
, "span" .= locSpan
]
instance ToJSON Ref where
toJSON (Ref loc) = object [ "kind" .= ("ref" :: Text)
, "location" .= loc]
instance ToJSON Decl where
toJSON Decl{declSymbol, declLoc} = object
[ "kind" .= ("decl" :: Text)
, "symbol" .= declSymbol
, "location" .= declLoc
]
instance ToJSON ScopeGraph where
toJSON (ScopeGraph sc) = toJSON . Map.mapKeys declSymbol $ sc

View File

@ -0,0 +1,95 @@
{-# LANGUAGE DeriveAnyClass, DerivingStrategies, GeneralizedNewtypeDeriving, OverloadedStrings, TypeApplications, TypeOperators #-}
module Main (main) where
import qualified Analysis.Eval as Eval
import Control.Effect
import Control.Effect.Fail
import Control.Monad hiding (fail)
import Control.Monad.Catch
import Control.Monad.IO.Class
import qualified Data.Aeson as Aeson
import qualified Data.Aeson.Encode.Pretty as Aeson
import qualified Data.ByteString.Char8 as ByteString
import qualified Data.ByteString.Lazy.Char8 as ByteString.Lazy
import qualified Data.ByteString.Streaming.Char8 as ByteStream
import Data.Core
import Data.Core.Pretty
import Data.File
import Data.Foldable
import Data.Function
import Data.List (sort)
import Data.Loc
import Data.Maybe
import Data.Name
import Data.Term
import GHC.Stack
import qualified Language.Python.Core as Py
import Prelude hiding (fail)
import Streaming
import qualified Streaming.Process
import System.Directory
import System.Exit
import System.FilePath
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Python.AST as TSP
import qualified TreeSitter.Unmarshal as TS
import qualified Test.Tasty as Tasty
import qualified Test.Tasty.HUnit as HUnit
import Analysis.ScopeGraph
import qualified Directive
import Instances ()
assertJQExpressionSucceeds :: Directive.Directive -> Term (Ann :+: Core) Name -> HUnit.Assertion
assertJQExpressionSucceeds directive core = do
bod <- case scopeGraph Eval.eval [File interactive core] of
(heap, [File _ (Right result)]) -> pure $ Aeson.object
[ "scope" Aeson..= heap
, "heap" Aeson..= result
, "tree" Aeson..= Aeson.toJSON1 core
]
_other -> HUnit.assertFailure "Couldn't run scope dumping mechanism; this shouldn't happen"
let ignore = ByteStream.effects . hoist ByteStream.effects
sgJSON = ByteStream.fromLazy $ Aeson.encode bod
jqPipeline = Streaming.Process.withStreamingProcess (Directive.toProcess directive) sgJSON ignore
errorMsg = "jq(1) returned non-zero exit code"
dirMsg = "jq expression: " <> show directive
jsonMsg = "JSON value: " <> ByteString.Lazy.unpack (Aeson.encodePretty bod)
treeMsg = "Core expr: " <> showCore (stripAnnotations core)
catch @_ @Streaming.Process.ProcessExitedUnsuccessfully jqPipeline $ \err -> do
HUnit.assertFailure (unlines [errorMsg, dirMsg, jsonMsg, treeMsg, show err])
fixtureTestTreeForFile :: HasCallStack => FilePath -> Tasty.TestTree
fixtureTestTreeForFile fp = HUnit.testCaseSteps fp $ \step -> withFrozenCallStack $ do
fileContents <- ByteString.readFile ("semantic-python/test/fixtures" </> fp)
directives <- case Directive.parseDirectives fileContents of
Right dir -> pure dir
Left err -> HUnit.assertFailure ("Directive parsing error: " <> err)
result <- TS.parseByteString TSP.tree_sitter_python fileContents
let coreResult = fmap (Control.Effect.run . runFail . Py.compile @TSP.Module @_ @(Term (Ann :+: Core))) result
for_ directives $ \directive -> do
step (Directive.describe directive)
case coreResult of
Left err -> HUnit.assertFailure ("Parsing failed: " <> err)
Right (Left _) | directive == Directive.Fails -> pure ()
Right (Right _) | directive == Directive.Fails -> HUnit.assertFailure ("Expected translation to fail")
Right (Right item) -> assertJQExpressionSucceeds directive item
Right (Left err) -> HUnit.assertFailure ("Compilation failed: " <> err)
milestoneFixtures :: IO Tasty.TestTree
milestoneFixtures = do
files <- liftIO (listDirectory "semantic-python/test/fixtures")
let pythons = sort (filter ("py" `isExtensionOf`) files)
pure $ Tasty.testGroup "Translation" (fmap fixtureTestTreeForFile pythons)
main :: IO ()
main = do
jq <- findExecutable "jq"
when (isNothing jq) (die "Error: jq(1) not found in $PATH.")
milestoneFixtures >>= Tasty.defaultMain

View File

@ -0,0 +1,2 @@
# CHECK-JQ: .scope == {}
# CHECK-JQ: .heap == {}

View File

@ -0,0 +1,2 @@
# CHECK-JQ: .scope == {}
pass

View File

@ -0,0 +1,2 @@
# CHECK-JQ: .scope == {} and .tree.contents == []
()

View File

@ -0,0 +1,3 @@
# CHECK-JQ: .scope | has("hello") and has("goodbye")
hello = ()
goodbye = ()

View File

@ -0,0 +1,2 @@
# CHECK-FAILS
eval("'We will never support eval.'")

View File

@ -58,7 +58,7 @@ common dependencies
, fused-effects ^>= 0.5.0.0
, fused-effects-exceptions ^>= 0.2.0.0
, hashable ^>= 1.2.7.0
, tree-sitter ^>= 0.1.0.0
, tree-sitter ^>= 0.2
, mtl ^>= 2.2.2
, network ^>= 2.8.0.0
, process ^>= 1.6.3.0
@ -323,7 +323,7 @@ library
, tree-sitter-haskell ^>= 0.1.0.0
, tree-sitter-json ^>= 0.1.0.0
, tree-sitter-php ^>= 0.1.0.0
, tree-sitter-python ^>= 0.1.0.1
, tree-sitter-python ^>= 0.2.0.0
, tree-sitter-ruby ^>= 0.1.0.0
, tree-sitter-typescript ^>= 0.1.0.0
, tree-sitter-tsx ^>= 0.1.0.0