diff --git a/.gitignore b/.gitignore index d267679dd..9352acf31 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ cabal.project.local* dist dist-newstyle .ghc.environment.* +.ghci_history tmp/ /bin/ diff --git a/bench/evaluation/Main.hs b/bench/evaluation/Main.hs index 2eb122822..405cfe423 100644 --- a/bench/evaluation/Main.hs +++ b/bench/evaluation/Main.hs @@ -1,50 +1,85 @@ -{-# LANGUAGE DataKinds, FlexibleContexts, TypeFamilies, TypeApplications #-} +{-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-} module Main where +import Algebra.Graph import Control.Monad -import Criterion.Main +import Data.Abstract.Evaluatable +import Data.Abstract.FreeVariables +import Data.Blob +import Data.Blob.IO (readBlobFromFile') +import Data.Bifunctor +import Data.Functor.Classes +import "semantic" Data.Graph (Graph (..), topologicalSort) +import Data.Graph.ControlFlowVertex import qualified Data.Language as Language +import Data.Project import Data.Proxy +import Data.Term +import Gauge.Main import Parsing.Parser import Semantic.Config (defaultOptions) -import Semantic.Task (withOptions) -import Semantic.Util hiding (evalRubyProject, evalPythonProject, evaluateProject) +import Semantic.Graph +import Semantic.Task (SomeException, TaskSession (..), runTask, withOptions) +import Semantic.Util hiding (evalPythonProject, evalRubyProject, evaluateProject) +import Source.Loc +import qualified System.Path as Path +import System.Path (()) -- Duplicating this stuff from Util to shut off the logging + +callGraphProject' :: ( Language.SLanguage lang + , Ord1 syntax + , Declarations1 syntax + , Evaluatable syntax + , FreeVariables1 syntax + , AccessControls1 syntax + , HasPrelude lang + , Functor syntax + , VertexDeclarationWithStrategy (VertexDeclarationStrategy syntax) syntax syntax + ) + => TaskSession + -> Proxy lang + -> Parser (Term syntax Loc) + -> Path.RelFile + -> IO (Either String (Data.Graph.Graph ControlFlowVertex)) +callGraphProject' session proxy parser path = fmap (first show) . runTask session $ do + blob <- readBlobFromFile' (fileForRelPath path) + package <- fmap snd <$> parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] (Language.reflect proxy) []) + modules <- topologicalSort <$> runImportGraphToModules proxy package + runCallGraph proxy False modules package + +callGraphProject proxy parser paths = withOptions defaultOptions $ \ config logger statter -> + callGraphProject' (TaskSession config "" False logger statter) proxy parser paths + evalRubyProject = justEvaluating <=< evaluateProject (Proxy :: Proxy 'Language.Ruby) rubyParser evalPythonProject = justEvaluating <=< evaluateProject (Proxy :: Proxy 'Language.Python) pythonParser -evaluateProject proxy parser paths = withOptions defaultOptions $ \ config logger statter -> - evaluateProject' (TaskConfig config logger statter) proxy parser paths +evaluateProject proxy parser path = withOptions defaultOptions $ \ config logger statter -> + evaluateProject' (TaskSession config "" False logger statter) proxy parser [Path.toString path] --- We use `fmap show` to ensure that all the parts of the result of evaluation are --- evaluated themselves. While an NFData instance is the most morally correct way --- to do this, I'm reluctant to add NFData instances to every single datatype in the --- project—coercing the result into a string will suffice, though it throws off the --- memory allocation results a bit. -pyEval :: FilePath -> Benchmarkable -pyEval p = nfIO . evalPythonProject $ ["bench/bench-fixtures/python/" <> p] +pyEval :: Path.RelFile -> Benchmarkable +pyEval p = nfIO $ evalPythonProject (Path.relDir "bench/bench-fixtures/python" p) -rbEval :: FilePath -> Benchmarkable -rbEval p = nfIO . evalRubyProject $ ["bench/bench-fixtures/ruby/" <> p] +rbEval :: Path.RelFile -> Benchmarkable +rbEval p = nfIO $ evalRubyProject (Path.relDir "bench/bench-fixtures/python" p) -pyCall :: FilePath -> Benchmarkable -pyCall p = nfIO $ callGraphProject pythonParser (Proxy @'Language.Python) defaultOptions ["bench/bench-fixtures/python/" <> p] +pyCall :: Path.RelFile -> Benchmarkable +pyCall p = nfIO $ callGraphProject (Proxy @'Language.Python) pythonParser (Path.relDir "bench/bench-fixtures/python/" p) -rbCall :: FilePath -> Benchmarkable -rbCall p = nfIO $ callGraphProject rubyParser (Proxy @'Language.Ruby) defaultOptions ["bench/bench-fixtures/ruby/" <> p] +rbCall :: Path.RelFile -> Benchmarkable +rbCall p = nfIO $ callGraphProject (Proxy @'Language.Ruby) rubyParser $ (Path.relDir "bench/bench-fixtures/ruby" p) main :: IO () main = defaultMain - [ bgroup "python" [ bench "assignment" $ pyEval "simple-assignment.py" - , bench "function def" $ pyEval "function-definition.py" - , bench "if + function calls" $ pyEval "if-statement-functions.py" - , bench "call graph" $ pyCall "if-statement-functions.py" + [ bgroup "python" [ bench "assignment" . pyEval $ Path.relFile "simple-assignment.py" + , bench "function def" . pyEval $ Path.relFile "function-definition.py" + , bench "if + function calls" . pyCall . Path.relFile $ "if-statement-functions.py" + , bench "call graph" $ pyCall . Path.relFile $ "if-statement-functions.py" ] - , bgroup "ruby" [ bench "assignment" $ rbEval "simple-assignment.rb" - , bench "function def" $ rbEval "function-definition.rb" - , bench "if + function calls" $ rbEval "if-statement-functions.rb" - , bench "call graph" $ rbCall "if-statement-functions.rb" + , bgroup "ruby" [ bench "assignment" . rbEval $ Path.relFile "simple-assignment.rb" + , bench "function def" . rbEval . Path.relFile $ "function-definition.rb" + , bench "if + function calls" . rbCall $ Path.relFile "if-statement-functions.rb" + , bench "call graph" $ rbCall $ Path.relFile "if-statement-functions.rb" ] ] diff --git a/cabal.project b/cabal.project index 7c80b50dd..ab843049c 100644 --- a/cabal.project +++ b/cabal.project @@ -1,4 +1,4 @@ -packages: . semantic-core semantic-python semantic-source +packages: . semantic-core semantic-python semantic-source semantic-tags jobs: $ncpus diff --git a/semantic-python/semantic-python.cabal b/semantic-python/semantic-python.cabal index 83d9bf52c..a5cf77a90 100644 --- a/semantic-python/semantic-python.cabal +++ b/semantic-python/semantic-python.cabal @@ -23,9 +23,10 @@ common haskell build-depends: base ^>=4.12 , fused-effects ^>= 0.5 , semantic-core ^>= 0.0 + , semantic-source ^>= 0.0 , text ^>= 1.2.3 - , tree-sitter == 0.3.0.0 - , tree-sitter-python == 0.4.0.0 + , tree-sitter ^>= 0.4 + , tree-sitter-python ^>= 0.5 ghc-options: -Weverything diff --git a/semantic-python/src/Language/Python/Core.hs b/semantic-python/src/Language/Python/Core.hs index b259be847..fcb71517b 100644 --- a/semantic-python/src/Language/Python/Core.hs +++ b/semantic-python/src/Language/Python/Core.hs @@ -1,7 +1,7 @@ {-# LANGUAGE ConstraintKinds, DataKinds, DefaultSignatures, DeriveAnyClass, DeriveGeneric, DerivingStrategies, DerivingVia, DisambiguateRecordFields, FlexibleContexts, FlexibleInstances, GeneralizedNewtypeDeriving, - NamedFieldPuns, OverloadedLists, OverloadedStrings, ScopedTypeVariables, StandaloneDeriving, - TypeApplications, TypeOperators, UndecidableInstances #-} + LambdaCase, NamedFieldPuns, OverloadedLists, OverloadedStrings, PatternSynonyms, ScopedTypeVariables, + StandaloneDeriving, TypeApplications, TypeOperators, UndecidableInstances, ViewPatterns #-} module Language.Python.Core ( compile @@ -17,6 +17,7 @@ import Control.Monad.Fail import Data.Coerce import Data.Core as Core import Data.Foldable +import Data.Loc (Loc) import qualified Data.Loc import Data.Name as Name import Data.Stack (Stack) @@ -25,17 +26,18 @@ import Data.String (IsString) import Data.Text (Text) import GHC.Generics import GHC.Records +import Source.Span (Span) +import qualified Source.Span as Source import qualified TreeSitter.Python.AST as Py -import TreeSitter.Span (Span) -import qualified TreeSitter.Span as TreeSitter +-- | Access to the current filename as Text to stick into location annotations. newtype SourcePath = SourcePath { rawPath :: Text } deriving stock (Eq, Show) deriving newtype IsString --- Keeps track of the current scope's bindings (so that we can, when --- compiling a class or module, return the list of bound variables --- as a Core record so that all immediate definitions are exposed) +-- | Keeps track of the current scope's bindings (so that we can, when +-- compiling a class or module, return the list of bound variables as +-- a Core record so that all immediate definitions are exposed) newtype Bindings = Bindings { unBindings :: Stack Name } deriving stock (Eq, Show) deriving newtype (Semigroup, Monoid) @@ -43,6 +45,17 @@ newtype Bindings = Bindings { unBindings :: Stack Name } def :: Name -> Bindings -> Bindings def n = coerce (Stack.:> n) +-- | Useful pattern synonym for extracting a single identifier from +-- a Python ExpressionList. Easier than pattern-matching every time. +-- TODO: when this is finished, we won't need this pattern, as we'll +-- handle ExpressionLists the smart way every time. +pattern SingleIdentifier :: Name -> Py.ExpressionList a +pattern SingleIdentifier name <- Py.ExpressionList + { Py.extraChildren = + [ Py.PrimaryExpressionExpression (Py.IdentifierPrimaryExpression (Py.Identifier { bytes = name })) + ] + } + -- We leave the representation of Core syntax abstract so that it's not -- possible for us to 'cheat' by pattern-matching on or eliminating a -- compiled term. @@ -83,17 +96,18 @@ compile :: ( Compile py => py -> m (t Name) compile t = compileCC t (pure none) +locFromTSSpan :: SourcePath -> Source.Span -> Loc +locFromTSSpan fp (Source.Span (Source.Pos a b) (Source.Pos c d)) + = Data.Loc.Loc (rawPath fp) (Data.Loc.Span (Data.Loc.Pos a b) (Data.Loc.Pos c d)) + locate :: ( HasField "ann" syntax Span , CoreSyntax syn t , Member (Reader SourcePath) sig , Carrier sig m ) => syntax -> t a -> m (t a) locate syn item = do - fp <- asks @SourcePath rawPath - let locFromTSSpan (TreeSitter.Span (TreeSitter.Pos a b) (TreeSitter.Pos c d)) - = Data.Loc.Loc fp (Data.Loc.Span (Data.Loc.Pos a b) (Data.Loc.Pos c d)) - - pure (Core.annAt (locFromTSSpan (getField @"ann" syn)) item) + fp <- ask @SourcePath + pure (Core.annAt (locFromTSSpan fp (getField @"ann" syn)) item) defaultCompile :: (MonadFail m, Show py) => py -> m (t Name) defaultCompile t = fail $ "compilation unimplemented for " <> show t @@ -103,25 +117,79 @@ newtype CompileSum py = CompileSum py instance (Generic py, GCompileSum (Rep py)) => Compile (CompileSum py) where compileCC (CompileSum a) cc = gcompileCCSum (from a) cc -deriving via CompileSum (Either l r) instance (Compile l, Compile r) => Compile (Either l r) +deriving via CompileSum ((l :+: r) Span) instance (Compile (l Span), Compile (r Span)) => Compile ((l :+: r) Span) instance Compile (Py.AssertStatement Span) instance Compile (Py.Attribute Span) +-- Assignment compilation. Assignments are an uneasy hybrid of expressions +-- (since they appear to have values, i.e. `a = b = c`) and statements (because +-- they introduce bindings). For that reason, they deserve special attention. +-- +-- The correct desugaring for the expression above looks like, given a continuation @cont@: +-- @ +-- (b :<- c) >>>= (a :<- b) >>>= cont +-- @ +-- The tree structure that we get out of tree-sitter is not particulary conducive to expressing +-- this naturally, so we engage in a small desugaring step so that we can turn a list [a, b, c] +-- into a sequenced Core expression using >>>= and a left fold. (It's a left fold that has +-- information—specifically the LHS to assign—flowing through it rightward.) + +-- RHS represents the right-hand-side of an assignment that we get out of tree-sitter. +-- Desugared is the "terminal" node in a sequence of assignments, i.e. given a = b = c, +-- c will be the terminal node. It is never an assignment. +type RHS = Py.Assignment :+: Py.AugmentedAssignment :+: Desugared +type Desugared = Py.ExpressionList :+: Py.Yield + +-- We have to pair locations and names, and tuple syntax is harder to +-- read in this case than a happy little constructor. +data Located a = Located Loc a + +-- Desugaring an RHS involves walking as deeply as possible into an +-- assignment, storing the names we encounter as we go and eventually +-- returning a terminal expression. We have to keep track of which +desugar :: (Member (Reader SourcePath) sig, Carrier sig m, MonadFail m) + => [Located Name] + -> RHS Span + -> m ([Located Name], Desugared Span) +desugar acc = \case + L1 Py.Assignment { left = SingleIdentifier name, right = Just rhs, ann} -> do + loc <- locFromTSSpan <$> ask <*> pure ann + let cons = (Located loc name :) + desugar (cons acc) rhs + R1 (R1 any) -> pure (acc, any) + other -> fail ("desugar: couldn't desugar RHS " <> show other) + +-- This is an algebra that is invoked from a left fold but that +-- returns a function (the 'difference' pattern) so that we can pass +-- information about what RHS we need down the chain: unlike most fold +-- functions, it has four parameters, not three (since our fold +-- returns a function). There's some pun to be made on "collapsing +-- sugar", like "icing" or "sugar water" but I'll leave that as an +-- exercise to the reader. +collapseDesugared :: (CoreSyntax syn t, Member (Reader Bindings) sig, Carrier sig m) + => Located Name -- The current LHS to which to assign + -> (t Name -> m (t Name)) -- A meta-continuation: it takes a name and returns a continuation + -> t Name -- The current RHS to which to assign, yielded from an outer continuation + -> m (t Name) -- The properly-sequenced resolut +collapseDesugared (Located loc n) cont rem = + let assigning = fmap (Core.annAt loc . ((Name.named' n :<- rem) >>>=)) + in assigning (local (def n) (cont (pure n))) -- gotta call local here to record this assignment + instance Compile (Py.Assignment Span) where compileCC it@Py.Assignment - { Py.left = Py.ExpressionList - { Py.extraChildren = - [ Py.PrimaryExpressionExpression (Py.IdentifierPrimaryExpression (Py.Identifier { Py.bytes = name })) - ] - } - , Py.right = Just rhs + { left = SingleIdentifier name + , right = Just rhs + , ann } cc = do - value <- compile rhs - let assigning n = (Name.named' name :<- value) >>>= n - locate it =<< assigning <$> local (def name) cc + p <- ask @SourcePath + (names, val) <- desugar [Located (locFromTSSpan p ann) name] rhs + compile val >>= foldr collapseDesugared (const cc) names >>= locate it + compileCC other _ = fail ("Unhandled assignment case: " <> show other) +-- End assignment compilation + instance Compile (Py.AugmentedAssignment Span) instance Compile (Py.Await Span) instance Compile (Py.BinaryOperator Span) @@ -198,8 +266,8 @@ instance Compile (Py.Identifier Span) where instance Compile (Py.IfStatement Span) where compileCC it@Py.IfStatement{ condition, consequence, alternative} cc = locate it =<< (if' <$> compile condition <*> compileCC consequence cc <*> foldr clause cc alternative) - where clause (Right Py.ElseClause{ body }) _ = compileCC body cc - clause (Left Py.ElifClause{ condition, consequence }) rest = + where clause (R1 Py.ElseClause{ body }) _ = compileCC body cc + clause (L1 Py.ElifClause{ condition, consequence }) rest = if' <$> compile condition <*> compileCC consequence cc <*> rest diff --git a/semantic-python/test/Test.hs b/semantic-python/test/Test.hs index 540f37a5b..af691d9f7 100644 --- a/semantic-python/test/Test.hs +++ b/semantic-python/test/Test.hs @@ -29,12 +29,12 @@ import Data.String (fromString) import GHC.Stack import qualified Language.Python.Core as Py import Prelude hiding (fail) +import qualified Source.Span as Source (Span) import Streaming import qualified Streaming.Prelude as Stream import qualified Streaming.Process import System.Directory import System.Exit -import qualified TreeSitter.Span as TS (Span) import qualified TreeSitter.Python as TSP import qualified TreeSitter.Python.AST as TSP import qualified TreeSitter.Unmarshal as TS @@ -100,7 +100,7 @@ fixtureTestTreeForFile fp = HUnit.testCaseSteps (Path.toString fp) $ \step -> wi . runFail . runReader (fromString @Py.SourcePath . Path.toString $ fp) . runReader @Py.Bindings mempty - . Py.compile @(TSP.Module TS.Span) @_ @(Term (Ann :+: Core)) + . Py.compile @(TSP.Module Source.Span) @_ @(Term (Ann :+: Core)) <$> result for_ directives $ \directive -> do diff --git a/semantic-python/test/fixtures/2-04-multiple-assign.py b/semantic-python/test/fixtures/2-04-multiple-assign.py new file mode 100644 index 000000000..0581fd874 --- /dev/null +++ b/semantic-python/test/fixtures/2-04-multiple-assign.py @@ -0,0 +1,2 @@ +# CHECK-TREE: { z <- #true; y <- z; x <- y; #record { z : z, y : y, x : x }} +x = y = z = True diff --git a/semantic-tags/LICENSE b/semantic-tags/LICENSE new file mode 100644 index 000000000..331b241b3 --- /dev/null +++ b/semantic-tags/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 GitHub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/semantic-tags/README.md b/semantic-tags/README.md new file mode 100644 index 000000000..1a0662907 --- /dev/null +++ b/semantic-tags/README.md @@ -0,0 +1,18 @@ +# semantic-tags + +Tags computation over ASTs. + + +## Development + +This project consists of a Haskell package named `semantic-tags`. The library’s sources are in [`src`][]. + +Development of `semantic-tags` is typically done using `cabal v2-build`: + +```shell +cabal v2-build # build the library +cabal v2-repl # load the package into ghci +cabal v2-test # build and run the doctests +``` + +[`src`]: https://github.com/github/semantic/tree/master/semantic-tags/src diff --git a/semantic-tags/Setup.hs b/semantic-tags/Setup.hs new file mode 100644 index 000000000..9a994af67 --- /dev/null +++ b/semantic-tags/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/semantic-tags/semantic-tags.cabal b/semantic-tags/semantic-tags.cabal new file mode 100644 index 000000000..3731d929f --- /dev/null +++ b/semantic-tags/semantic-tags.cabal @@ -0,0 +1,59 @@ +cabal-version: 2.4 + +name: semantic-tags +version: 0.0.0.0 +synopsis: Tags computation +description: Tags computation for ASTs derived from tree-sitter grammars. +homepage: https://github.com/github/semantic/tree/master/semantic-core#readme +bug-reports: https://github.com/github/semantic/issues +license: MIT +license-file: LICENSE +author: The Semantic authors +maintainer: opensource+semantic@github.com +copyright: (c) 2019 GitHub, Inc. +category: Language +build-type: Simple +stability: alpha +extra-source-files: README.md + +tested-with: GHC == 8.6.5 + +library + exposed-modules: + Language.Python + Language.Python.Tags + Tags.Tag + Tags.Tagging.Precise + -- other-modules: + -- other-extensions: + build-depends: + base >= 4.12 && < 5 + , bytestring ^>= 0.10.8.2 + , fused-effects ^>= 0.5 + , semantic-source ^>= 0.0 + , text ^>= 1.2.3.1 + , tree-sitter ^>= 0.4 + , tree-sitter-python ^>= 0.5 + hs-source-dirs: src + default-language: Haskell2010 + ghc-options: + -Weverything + -Wno-missing-local-signatures + -Wno-missing-import-lists + -Wno-implicit-prelude + -Wno-safe + -Wno-unsafe + -Wno-name-shadowing + -Wno-monomorphism-restriction + -Wno-missed-specialisations + -Wno-all-missed-specialisations + -Wno-star-is-type + +test-suite doctest + type: exitcode-stdio-1.0 + main-is: Doctest.hs + build-depends: base + , doctest >=0.7 && <1.0 + , semantic-tags + hs-source-dirs: test + default-language: Haskell2010 diff --git a/semantic-tags/src/Language/Python.hs b/semantic-tags/src/Language/Python.hs new file mode 100644 index 000000000..66c5803a1 --- /dev/null +++ b/semantic-tags/src/Language/Python.hs @@ -0,0 +1,17 @@ +-- | Semantic functionality for Python programs. +module Language.Python +( Term(..) +) where + +import qualified Language.Python.Tags as PyTags +import qualified Tags.Tagging.Precise as Tags +import qualified TreeSitter.Python.AST as Py +import qualified TreeSitter.Unmarshal as TS + +newtype Term a = Term { getTerm :: Py.Module a } + +instance TS.Unmarshal Term where + unmarshalNode node = Term <$> TS.unmarshalNode node + +instance Tags.ToTags Term where + tags src = Tags.runTagging src . PyTags.tags . getTerm diff --git a/semantic-tags/src/Language/Python/Tags.hs b/semantic-tags/src/Language/Python/Tags.hs new file mode 100644 index 000000000..7aea8809c --- /dev/null +++ b/semantic-tags/src/Language/Python/Tags.hs @@ -0,0 +1,109 @@ +{-# LANGUAGE AllowAmbiguousTypes, DataKinds, DisambiguateRecordFields, FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, NamedFieldPuns, ScopedTypeVariables, TypeApplications, TypeFamilies, TypeOperators, UndecidableInstances #-} +module Language.Python.Tags +( ToTags(..) +) where + +import Control.Effect.Reader +import Control.Effect.Writer +import Data.Foldable (traverse_) +import Data.Maybe (listToMaybe) +import Data.Monoid (Ap(..)) +import Data.List.NonEmpty (NonEmpty(..)) +import Data.Text as Text +import GHC.Generics +import Source.Loc +import Source.Range +import Source.Source as Source +import Tags.Tag +import qualified Tags.Tagging.Precise as Tags +import qualified TreeSitter.Python.AST as Py + +class ToTags t where + tags + :: ( Carrier sig m + , Member (Reader Source) sig + , Member (Writer Tags.Tags) sig + ) + => t Loc + -> m () + +instance (ToTagsBy strategy t, strategy ~ ToTagsInstance t) => ToTags t where + tags = tags' @strategy + + +class ToTagsBy (strategy :: Strategy) t where + tags' + :: ( Carrier sig m + , Member (Reader Source) sig + , Member (Writer Tags.Tags) sig + ) + => t Loc + -> m () + + +data Strategy = Generic | Custom + +type family ToTagsInstance t :: Strategy where + ToTagsInstance (_ :+: _) = 'Custom + ToTagsInstance Py.FunctionDefinition = 'Custom + ToTagsInstance Py.ClassDefinition = 'Custom + ToTagsInstance Py.Call = 'Custom + ToTagsInstance _ = 'Generic + + +instance (ToTags l, ToTags r) => ToTagsBy 'Custom (l :+: r) where + tags' (L1 l) = tags l + tags' (R1 r) = tags r + +instance ToTagsBy 'Custom Py.FunctionDefinition where + tags' Py.FunctionDefinition + { ann = Loc Range { start } span + , name = Py.Identifier { bytes = name } + , parameters + , returnType + , body = Py.Block { ann = Loc Range { start = end } _, extraChildren } + } = do + src <- ask @Source + let docs = listToMaybe extraChildren >>= docComment src + sliced = slice src (Range start end) + Tags.yield (Tag name Function span (firstLine sliced) docs) + tags parameters + traverse_ tags returnType + traverse_ tags extraChildren + +instance ToTagsBy 'Custom Py.ClassDefinition where + tags' Py.ClassDefinition + { ann = Loc Range { start } span + , name = Py.Identifier { bytes = name } + , superclasses + , body = Py.Block { ann = Loc Range { start = end } _, extraChildren } + } = do + src <- ask @Source + let docs = listToMaybe extraChildren >>= docComment src + sliced = slice src (Range start end) + Tags.yield (Tag name Class span (firstLine sliced) docs) + traverse_ tags superclasses + traverse_ tags extraChildren + +instance ToTagsBy 'Custom Py.Call where + tags' Py.Call + { ann = Loc range span + , function = Py.IdentifierPrimaryExpression Py.Identifier { bytes = name } + , arguments + } = do + src <- ask @Source + let sliced = slice src range + Tags.yield (Tag name Call span (firstLine sliced) Nothing) + tags arguments + tags' Py.Call { function, arguments } = tags function >> tags arguments + +docComment :: Source -> (Py.CompoundStatement :+: Py.SimpleStatement) Loc -> Maybe Text +docComment src (R1 (Py.ExpressionStatementSimpleStatement (Py.ExpressionStatement { extraChildren = L1 (Py.PrimaryExpressionExpression (Py.StringPrimaryExpression Py.String { ann })) :|_ }))) = Just (toText (slice src (byteRange ann))) +docComment _ _ = Nothing + +firstLine :: Source -> Text +firstLine = Text.takeWhile (/= '\n') . toText . Source.take 180 + + +instance (Generic1 t, Tags.GFoldable1 ToTags (Rep1 t)) => ToTagsBy 'Generic t where + tags' = getAp . Tags.gfoldMap1 @ToTags (Ap . tags) . from1 diff --git a/semantic-tags/src/Tags/Tag.hs b/semantic-tags/src/Tags/Tag.hs new file mode 100644 index 000000000..b8561255d --- /dev/null +++ b/semantic-tags/src/Tags/Tag.hs @@ -0,0 +1,24 @@ +module Tags.Tag +( Tag(..) +, Kind(..) +) where + +import Data.Text (Text) +import Source.Span + +data Tag = Tag + { name :: Text + , kind :: Kind + , span :: Span + , line :: Text + , docs :: Maybe Text + } + deriving (Eq, Show) + +data Kind + = Function + | Method + | Class + | Module + | Call + deriving (Bounded, Enum, Eq, Show) diff --git a/semantic-tags/src/Tags/Tagging/Precise.hs b/semantic-tags/src/Tags/Tagging/Precise.hs new file mode 100644 index 000000000..e9d0c1d47 --- /dev/null +++ b/semantic-tags/src/Tags/Tagging/Precise.hs @@ -0,0 +1,73 @@ +{-# LANGUAGE AllowAmbiguousTypes, ConstraintKinds, FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, RankNTypes, ScopedTypeVariables, TypeApplications, TypeOperators #-} +module Tags.Tagging.Precise +( Tags +, ToTags(..) +, yield +, runTagging +, GFoldable1(..) +) where + +import Control.Effect.Pure +import Control.Effect.Reader +import Control.Effect.Writer +import Data.Monoid (Endo(..)) +import GHC.Generics +import Prelude hiding (span) +import Source.Loc (Loc) +import Source.Span +import Source.Source +import Tags.Tag + +type Tags = Endo [Tag] + +class ToTags t where + tags :: Source -> t Loc -> [Tag] + + +yield :: (Carrier sig m, Member (Writer Tags) sig) => Tag -> m () +yield = tell . Endo . (:) . modSpan toOneIndexed where + modSpan f t@Tag{ span = s } = t { span = f s } + toOneIndexed (Span (Pos l1 c1) (Pos l2 c2)) = Span (Pos (l1 + 1) (c1 + 1)) (Pos (l2 + 1) (c2 + 1)) + +runTagging :: Source -> ReaderC Source (WriterC Tags PureC) () -> [Tag] +runTagging source + = ($ []) + . appEndo + . run + . execWriter + . runReader source + + +-- FIXME: move GFoldable1 into semantic-ast. +class GFoldable1 c t where + -- | Generically map functions over fields of kind @* -> *@, monoidally combining the results. + gfoldMap1 + :: Monoid b + => (forall f . c f => f a -> b) + -> t a + -> b + +instance GFoldable1 c f => GFoldable1 c (M1 i c' f) where + gfoldMap1 alg = gfoldMap1 @c alg . unM1 + +instance (GFoldable1 c f, GFoldable1 c g) => GFoldable1 c (f :*: g) where + gfoldMap1 alg (f :*: g) = gfoldMap1 @c alg f <> gfoldMap1 @c alg g + +instance (GFoldable1 c f, GFoldable1 c g) => GFoldable1 c (f :+: g) where + gfoldMap1 alg (L1 l) = gfoldMap1 @c alg l + gfoldMap1 alg (R1 r) = gfoldMap1 @c alg r + +instance GFoldable1 c (K1 R t) where + gfoldMap1 _ _ = mempty + +instance GFoldable1 c Par1 where + gfoldMap1 _ _ = mempty + +instance c t => GFoldable1 c (Rec1 t) where + gfoldMap1 alg (Rec1 t) = alg t + +instance (Foldable f, GFoldable1 c g) => GFoldable1 c (f :.: g) where + gfoldMap1 alg = foldMap (gfoldMap1 @c alg) . unComp1 + +instance GFoldable1 c U1 where + gfoldMap1 _ _ = mempty diff --git a/semantic-tags/test/Doctest.hs b/semantic-tags/test/Doctest.hs new file mode 100644 index 000000000..ed2c0d09c --- /dev/null +++ b/semantic-tags/test/Doctest.hs @@ -0,0 +1,12 @@ +module Main +( main +) where + +import System.Environment +import Test.DocTest + +main :: IO () +main = do + args <- getArgs + autogen <- fmap (<> "/build/doctest/autogen") <$> lookupEnv "HASKELL_DIST_DIR" + doctest (maybe id ((:) . ("-i" <>)) autogen ("-isemantic-tags/src" : "--fast" : if null args then ["semantic-tags/src"] else args)) diff --git a/semantic.cabal b/semantic.cabal index 337c73214..c06ff04d7 100644 --- a/semantic.cabal +++ b/semantic.cabal @@ -56,7 +56,7 @@ common dependencies , fused-effects ^>= 0.5.0.0 , fused-effects-exceptions ^>= 0.2.0.0 , hashable ^>= 1.2.7.0 - , tree-sitter == 0.3.0.0 + , tree-sitter ^>= 0.4 , mtl ^>= 2.2.2 , network ^>= 2.8.0.0 , pathtype ^>= 0.8.1 @@ -180,7 +180,6 @@ library , Data.Syntax.Literal , Data.Syntax.Statement , Data.Syntax.Type - , Data.Tag , Data.Term -- Diffing algorithms & interpretation thereof , Diffing.Algorithm @@ -304,6 +303,7 @@ library , pretty-show ^>= 1.9.5 , profunctors ^>= 5.3 , reducers ^>= 3.12.3 + , semantic-tags ^>= 0 , semigroupoids ^>= 5.3.2 , split ^>= 0.2.3.3 , stm-chans ^>= 3.0.0.4 @@ -312,15 +312,15 @@ library , unliftio-core ^>= 0.1.2.0 , unordered-containers ^>= 0.2.9.0 , vector ^>= 0.12.0.2 - , tree-sitter-go == 0.2.0.0 - , tree-sitter-haskell == 0.2.0.0 - , tree-sitter-json == 0.2.0.0 - , tree-sitter-php == 0.2.0.0 - , tree-sitter-python == 0.4.0.0 - , tree-sitter-ruby == 0.2.0.0 - , tree-sitter-typescript == 0.2.1.0 - , tree-sitter-tsx == 0.2.1.0 - , tree-sitter-java == 0.2.0.0 + , tree-sitter-go ^>= 0.2 + , tree-sitter-haskell ^>= 0.2 + , tree-sitter-json ^>= 0.2 + , tree-sitter-php ^>= 0.2 + , tree-sitter-python ^>= 0.5 + , tree-sitter-ruby ^>= 0.2 + , tree-sitter-typescript ^>= 0.2.1 + , tree-sitter-tsx ^>= 0.2.1 + , tree-sitter-java ^>= 0.2 if flag(release) cpp-options: -DCOMPUTE_GIT_SHA @@ -410,14 +410,16 @@ test-suite parse-examples , tasty-hunit benchmark evaluation - import: haskell, executable-flags + import: haskell, dependencies, executable-flags hs-source-dirs: bench/evaluation type: exitcode-stdio-1.0 main-is: Main.hs ghc-options: -static build-depends: base - , criterion ^>= 1.5 + , algebraic-graphs + , gauge ^>= 0.2.5 , semantic + , semantic-source source-repository head type: git diff --git a/src/Analysis/Abstract/Caching/FlowInsensitive.hs b/src/Analysis/Abstract/Caching/FlowInsensitive.hs index c5670e6bc..0c10a265a 100644 --- a/src/Analysis/Abstract/Caching/FlowInsensitive.hs +++ b/src/Analysis/Abstract/Caching/FlowInsensitive.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GADTs, GeneralizedNewtypeDeriving, TypeOperators #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, TypeOperators #-} module Analysis.Abstract.Caching.FlowInsensitive ( cachingTerms , convergingModules diff --git a/src/Data/Language.hs b/src/Data/Language.hs index b1a5892a1..21b81ef84 100644 --- a/src/Data/Language.hs +++ b/src/Data/Language.hs @@ -10,6 +10,9 @@ module Data.Language , codeNavLanguages , textToLanguage , languageToText + , PerLanguageModes(..) + , LanguageMode(..) + , modeForLanguage ) where import Data.Aeson @@ -137,3 +140,19 @@ textToLanguage = \case "TSX" -> TSX "PHP" -> PHP _ -> Unknown + + +newtype PerLanguageModes = PerLanguageModes + { pythonMode :: LanguageMode + } + deriving (Eq, Ord, Show) + +data LanguageMode + = ALaCarte + | Precise + deriving (Bounded, Enum, Eq, Ord, Read, Show) + +modeForLanguage :: PerLanguageModes -> Language -> LanguageMode +modeForLanguage modes = \case + Python -> pythonMode modes + _ -> ALaCarte diff --git a/src/Data/Tag.hs b/src/Data/Tag.hs deleted file mode 100644 index 83e390a5a..000000000 --- a/src/Data/Tag.hs +++ /dev/null @@ -1,28 +0,0 @@ -{-# LANGUAGE DeriveAnyClass #-} - -module Data.Tag - ( Tag (..) - ) where - -import Prelude hiding (span) -import Prologue - -import Data.Aeson -import Control.Lens.Lens - -import Source.Span - --- | These selectors aren't prefixed with @tag@ for reasons of JSON --- backwards compatibility. -data Tag = Tag - { name :: Text - , kind :: Text - , span :: Span - , context :: [Text] - , line :: Maybe Text - , docs :: Maybe Text - } deriving (Eq, Show, Generic, ToJSON) - -instance HasSpan Tag where - span_ = lens span (\t s -> t { span = s }) - {-# INLINE span_ #-} diff --git a/src/Parsing/Parser.hs b/src/Parsing/Parser.hs index ef5735673..c287bdb78 100644 --- a/src/Parsing/Parser.hs +++ b/src/Parsing/Parser.hs @@ -8,7 +8,6 @@ module Parsing.Parser , someASTParser , someAnalysisParser , ApplyAll -, ApplyAll' -- À la carte parsers , goParser , goASTParser @@ -24,6 +23,8 @@ module Parsing.Parser , phpParser , phpASTParser , haskellParser + -- Precise parsers +, precisePythonParser ) where import Assigning.Assignment @@ -43,10 +44,12 @@ import qualified Language.Haskell.Assignment as Haskell import qualified Language.JSON.Assignment as JSON import qualified Language.Markdown.Assignment as Markdown import qualified Language.PHP.Assignment as PHP +import qualified Language.Python as Py import qualified Language.Python.Assignment as Python import qualified Language.Ruby.Assignment as Ruby import qualified Language.TSX.Assignment as TSX import qualified Language.TypeScript.Assignment as TypeScript +import Prelude hiding (fail) import Prologue import TreeSitter.Go import TreeSitter.Haskell @@ -54,20 +57,16 @@ import TreeSitter.JSON import qualified TreeSitter.Language as TS (Language, Symbol) import TreeSitter.PHP import TreeSitter.Python -import TreeSitter.Ruby +import TreeSitter.Ruby (tree_sitter_ruby) import TreeSitter.TSX import TreeSitter.TypeScript +import TreeSitter.Unmarshal -type family ApplyAll' (typeclasses :: [(* -> *) -> Constraint]) (fs :: [* -> *]) :: Constraint where - ApplyAll' (typeclass ': typeclasses) fs = (Apply typeclass fs, ApplyAll' typeclasses fs) - ApplyAll' '[] fs = () - -- | A parser, suitable for program analysis, for some specific language, producing 'Term's whose syntax satisfies a list of typeclass constraints. data SomeAnalysisParser typeclasses ann where - SomeAnalysisParser :: ( ApplyAll' typeclasses fs + SomeAnalysisParser :: ( ApplyAll typeclasses (Sum fs) , Apply (VertexDeclaration' (Sum fs)) fs - , Element Syntax.Identifier fs , HasPrelude lang ) => Parser (Term (Sum fs) ann) @@ -75,24 +74,24 @@ data SomeAnalysisParser typeclasses ann where -> SomeAnalysisParser typeclasses ann -- | A parser for some specific language, producing 'Term's whose syntax satisfies a list of typeclass constraints. -someAnalysisParser :: ( ApplyAll' typeclasses Go.Syntax - , ApplyAll' typeclasses PHP.Syntax - , ApplyAll' typeclasses Python.Syntax - , ApplyAll' typeclasses Ruby.Syntax - , ApplyAll' typeclasses TypeScript.Syntax - , ApplyAll' typeclasses Haskell.Syntax +someAnalysisParser :: ( ApplyAll typeclasses (Sum Go.Syntax) + , ApplyAll typeclasses (Sum PHP.Syntax) + , ApplyAll typeclasses (Sum Python.Syntax) + , ApplyAll typeclasses (Sum Ruby.Syntax) + , ApplyAll typeclasses (Sum TypeScript.Syntax) + , ApplyAll typeclasses (Sum Haskell.Syntax) ) - => proxy typeclasses -- ^ A proxy for the list of typeclasses required, e.g. @(Proxy :: Proxy '[Show1])@. - -> Language -- ^ The 'Language' to select. + => proxy typeclasses -- ^ A proxy for the list of typeclasses required, e.g. @(Proxy :: Proxy '[Show1])@. + -> Language -- ^ The 'Language' to select. -> SomeAnalysisParser typeclasses Loc -- ^ A 'SomeAnalysisParser' abstracting the syntax type to be produced. -someAnalysisParser _ Go = SomeAnalysisParser goParser (Proxy :: Proxy 'Go) -someAnalysisParser _ Haskell = SomeAnalysisParser haskellParser (Proxy :: Proxy 'Haskell) -someAnalysisParser _ JavaScript = SomeAnalysisParser typescriptParser (Proxy :: Proxy 'JavaScript) -someAnalysisParser _ PHP = SomeAnalysisParser phpParser (Proxy :: Proxy 'PHP) -someAnalysisParser _ Python = SomeAnalysisParser pythonParser (Proxy :: Proxy 'Python) -someAnalysisParser _ Ruby = SomeAnalysisParser rubyParser (Proxy :: Proxy 'Ruby) -someAnalysisParser _ TypeScript = SomeAnalysisParser typescriptParser (Proxy :: Proxy 'TypeScript) -someAnalysisParser _ TSX = SomeAnalysisParser typescriptParser (Proxy :: Proxy 'TSX) +someAnalysisParser _ Go = SomeAnalysisParser goParser (Proxy @'Go) +someAnalysisParser _ Haskell = SomeAnalysisParser haskellParser (Proxy @'Haskell) +someAnalysisParser _ JavaScript = SomeAnalysisParser typescriptParser (Proxy @'JavaScript) +someAnalysisParser _ PHP = SomeAnalysisParser phpParser (Proxy @'PHP) +someAnalysisParser _ Python = SomeAnalysisParser pythonParser (Proxy @'Python) +someAnalysisParser _ Ruby = SomeAnalysisParser rubyParser (Proxy @'Ruby) +someAnalysisParser _ TypeScript = SomeAnalysisParser typescriptParser (Proxy @'TypeScript) +someAnalysisParser _ TSX = SomeAnalysisParser typescriptParser (Proxy @'TSX) someAnalysisParser _ l = error $ "Analysis not supported for: " <> show l @@ -100,6 +99,8 @@ someAnalysisParser _ l = error $ "Analysis not supported for: " <> show data Parser term where -- | A parser producing 'AST' using a 'TS.Language'. ASTParser :: (Bounded grammar, Enum grammar, Show grammar) => Ptr TS.Language -> Parser (AST [] grammar) + -- | A parser 'Unmarshal'ing to a precise AST type using a 'TS.Language'. + UnmarshalParser :: Unmarshal t => Ptr TS.Language -> Parser (t Loc) -- | A parser producing an à la carte term given an 'AST'-producing parser and an 'Assignment' onto 'Term's in some syntax type. AssignmentParser :: (Enum grammar, Ix grammar, Show grammar, TS.Symbol grammar, Syntax.Error :< fs, Eq1 ast, Apply Foldable fs, Apply Functor fs, Foldable ast, Functor ast) => Parser (Term ast (Node grammar)) -- ^ A parser producing AST. @@ -163,6 +164,10 @@ markdownParser :: Parser Markdown.Term markdownParser = AssignmentParser MarkdownParser Markdown.assignment +precisePythonParser :: Parser (Py.Term Loc) +precisePythonParser = UnmarshalParser tree_sitter_python + + data SomeTerm typeclasses ann where SomeTerm :: ApplyAll typeclasses syntax => Term syntax ann -> SomeTerm typeclasses ann diff --git a/src/Parsing/TreeSitter.hs b/src/Parsing/TreeSitter.hs index 8c9e0b16e..2769241c0 100644 --- a/src/Parsing/TreeSitter.hs +++ b/src/Parsing/TreeSitter.hs @@ -2,14 +2,15 @@ module Parsing.TreeSitter ( Duration(..) , parseToAST +, parseToPreciseAST ) where -import Prologue hiding (bracket) +import Prologue -import Control.Effect.Resource +import Control.Effect.Fail +import Control.Effect.Lift +import Control.Effect.Reader import Control.Effect.Trace -import qualified Control.Exception as Exc (bracket) -import Data.ByteString.Unsafe (unsafeUseAsCStringLen) import Foreign import Foreign.C.Types (CBool (..)) import Foreign.Marshal.Array (allocaArray) @@ -19,45 +20,21 @@ import Data.Blob import Data.Duration import Data.Term import Source.Loc -import Source.Source (Source) import qualified Source.Source as Source import Source.Span +import qualified TreeSitter.Cursor as TS import qualified TreeSitter.Language as TS import qualified TreeSitter.Node as TS import qualified TreeSitter.Parser as TS import qualified TreeSitter.Tree as TS +import qualified TreeSitter.Unmarshal as TS -data Result grammar - = Failed - | Succeeded (AST [] grammar) - -runParser :: (Enum grammar, Bounded grammar) => Ptr TS.Parser -> Source -> IO (Result grammar) -runParser parser blobSource = unsafeUseAsCStringLen (Source.bytes blobSource) $ \ (source, len) -> do - alloca (\ rootPtr -> do - let acquire = do - -- Change this to TS.ts_parser_loop_until_cancelled if you want to test out cancellation - TS.ts_parser_parse_string parser nullPtr source len - - let release t - | t == nullPtr = pure () - | otherwise = TS.ts_tree_delete t - - let go treePtr = do - if treePtr == nullPtr - then pure Failed - else do - TS.ts_tree_root_node_p treePtr rootPtr - ptr <- peek rootPtr - Succeeded <$> anaM toAST ptr - Exc.bracket acquire release go) - --- | Parse 'Source' with the given 'TS.Language' and return its AST. --- Returns Nothing if the operation timed out. +-- | Parse a 'Blob' with the given 'TS.Language' and return its AST. +-- Returns 'Nothing' if the operation timed out. parseToAST :: ( Bounded grammar , Carrier sig m , Enum grammar - , Member Resource sig , Member Trace sig , MonadIO m ) @@ -65,19 +42,49 @@ parseToAST :: ( Bounded grammar -> Ptr TS.Language -> Blob -> m (Maybe (AST [] grammar)) -parseToAST parseTimeout language b@Blob{..} = bracket (liftIO TS.ts_parser_new) (liftIO . TS.ts_parser_delete) $ \ parser -> do - compatible <- liftIO $ do +parseToAST parseTimeout language blob = runParse parseTimeout language blob (fmap Right . anaM toAST <=< peek) + +parseToPreciseAST + :: ( Carrier sig m + , Member Trace sig + , MonadIO m + , TS.Unmarshal t + ) + => Duration + -> Ptr TS.Language + -> Blob + -> m (Maybe (t Loc)) +parseToPreciseAST parseTimeout language blob = runParse parseTimeout language blob $ \ rootPtr -> + TS.withCursor (castPtr rootPtr) $ \ cursor -> + runM (runFail (runReader cursor (runReader (Source.bytes (blobSource blob)) (TS.peekNode >>= TS.unmarshalNode)))) + +runParse + :: ( Carrier sig m + , Member Trace sig + , MonadIO m + ) + => Duration + -> Ptr TS.Language + -> Blob + -> (Ptr TS.Node -> IO (Either String a)) + -> m (Maybe a) +runParse parseTimeout language b@Blob{..} action = do + result <- liftIO . TS.withParser language $ \ parser -> do let timeoutMicros = fromIntegral $ toMicroseconds parseTimeout TS.ts_parser_set_timeout_micros parser timeoutMicros TS.ts_parser_halt_on_error parser (CBool 1) - TS.ts_parser_set_language parser language - result <- if compatible then - liftIO $ runParser parser blobSource - else - Failed <$ trace "tree-sitter: incompatible versions" + compatible <- TS.ts_parser_set_language parser language + if compatible then + TS.withParseTree parser (Source.bytes blobSource) $ \ treePtr -> do + if treePtr == nullPtr then + pure (Left "tree-sitter: null root node") + else + TS.withRootNode treePtr action + else + pure (Left "tree-sitter: incompatible versions") case result of - Failed -> Nothing <$ trace ("tree-sitter: parsing failed " <> blobPath b) - (Succeeded ast) -> Just ast <$ trace ("tree-sitter: parsing succeeded " <> blobPath b) + Left err -> Nothing <$ trace err <* trace ("tree-sitter: parsing failed " <> blobPath b) + Right ast -> Just ast <$ trace ("tree-sitter: parsing succeeded " <> blobPath b) toAST :: forall grammar . (Bounded grammar, Enum grammar) => TS.Node -> IO (Base (AST [] grammar) TS.Node) toAST node@TS.Node{..} = do diff --git a/src/Semantic/Api/Symbols.hs b/src/Semantic/Api/Symbols.hs index 7f36812d6..dbd44c1f0 100644 --- a/src/Semantic/Api/Symbols.hs +++ b/src/Semantic/Api/Symbols.hs @@ -6,13 +6,15 @@ module Semantic.Api.Symbols ) where import Control.Effect.Error +import Control.Effect.Reader import Control.Exception import Data.Blob hiding (File (..)) import Data.ByteString.Builder -import Data.Maybe +import Data.Language import Data.Term import qualified Data.Text as T import Data.Text (pack) +import qualified Language.Python as Py import Parsing.Parser import Prologue import Semantic.Api.Bridge @@ -23,6 +25,7 @@ import Serializing.Format import Source.Loc import Tags.Taggable import Tags.Tagging +import qualified Tags.Tagging.Precise as Precise import Control.Lens import Data.ProtoLens (defMessage) @@ -32,7 +35,7 @@ import Proto.Semantic_Fields as P legacyParseSymbols :: (Member Distribute sig, ParseEffects sig m, Traversable t) => t Blob -> m Legacy.ParseTreeSymbolResponse legacyParseSymbols blobs = Legacy.ParseTreeSymbolResponse <$> distributeFoldMap go blobs where - go :: (Member (Error SomeException) sig, Member Task sig, Carrier sig m) => Blob -> m [Legacy.File] + go :: ParseEffects sig m => Blob -> m [Legacy.File] go blob@Blob{..} = (doParse blob >>= withSomeTerm renderToSymbols) `catchError` (\(SomeException _) -> pure (pure emptyFile)) where emptyFile = tagsToFile [] @@ -51,8 +54,8 @@ legacyParseSymbols blobs = Legacy.ParseTreeSymbolResponse <$> distributeFoldMap tagToSymbol Tag{..} = Legacy.Symbol { symbolName = name - , symbolKind = kind - , symbolLine = fromMaybe mempty line + , symbolKind = pack (show kind) + , symbolLine = line , symbolSpan = converting #? span } @@ -61,26 +64,32 @@ parseSymbolsBuilder format blobs = parseSymbols blobs >>= serialize format parseSymbols :: (Member Distribute sig, ParseEffects sig m, Traversable t) => t Blob -> m ParseTreeSymbolResponse parseSymbols blobs = do -- ParseTreeSymbolResponse . V.fromList . toList <$> distributeFor blobs go - terms <- distributeFor blobs go + modes <- ask + terms <- distributeFor blobs (go modes) pure $ defMessage & P.files .~ toList terms where - go :: (Member (Error SomeException) sig, Member Task sig, Carrier sig m) => Blob -> m File - go blob@Blob{..} = (doParse blob >>= withSomeTerm renderToSymbols) `catchError` (\(SomeException e) -> pure $ errorFile (show e)) + go :: ParseEffects sig m => PerLanguageModes -> Blob -> m File + go modes blob@Blob{..} + | Precise <- pythonMode modes + , Python <- blobLanguage' + = catching $ renderPreciseToSymbols <$> parse precisePythonParser blob + | otherwise = catching $ withSomeTerm renderToSymbols <$> doParse blob where + catching m = m `catchError` (\(SomeException e) -> pure $ errorFile (show e)) blobLanguage' = blobLanguage blob blobPath' = pack $ blobPath blob errorFile e = defMessage & P.path .~ blobPath' - & P.language .~ (bridging # blobLanguage blob) + & P.language .~ (bridging # blobLanguage') & P.symbols .~ mempty & P.errors .~ [defMessage & P.error .~ T.pack e] & P.blobOid .~ blobOid - symbolsToSummarize :: [Text] - symbolsToSummarize = ["Function", "Method", "Class", "Module", "Call", "Send"] + renderToSymbols :: IsTaggable f => Term f Loc -> File + renderToSymbols term = tagsToFile (runTagging blob symbolsToSummarize term) - renderToSymbols :: (IsTaggable f, Applicative m) => Term f Loc -> m File - renderToSymbols term = pure $ tagsToFile (runTagging blob symbolsToSummarize term) + renderPreciseToSymbols :: Py.Term Loc -> File + renderPreciseToSymbols term = tagsToFile (Precise.tags blobSource term) tagsToFile :: [Tag] -> File tagsToFile tags = defMessage @@ -93,9 +102,12 @@ parseSymbols blobs = do -- ParseTreeSymbolResponse . V.fromList . toList <$> dis tagToSymbol :: Tag -> Symbol tagToSymbol Tag{..} = defMessage & P.symbol .~ name - & P.kind .~ kind - & P.line .~ fromMaybe mempty line + & P.kind .~ pack (show kind) + & P.line .~ line & P.maybe'span .~ converting #? span & P.maybe'docs .~ case docs of Just d -> Just (defMessage & P.docstring .~ d) Nothing -> Nothing + + symbolsToSummarize :: [Text] + symbolsToSummarize = ["Function", "Method", "Class", "Module", "Call", "Send"] diff --git a/src/Semantic/Api/Terms.hs b/src/Semantic/Api/Terms.hs index a73c5d501..12a9c2ac4 100644 --- a/src/Semantic/Api/Terms.hs +++ b/src/Semantic/Api/Terms.hs @@ -16,6 +16,8 @@ module Semantic.Api.Terms import Analysis.ConstructorName (ConstructorName) import Control.Effect.Error +import Control.Effect.Reader +import Control.Lens import Control.Monad import Control.Monad.IO.Class import Data.Abstract.Declarations @@ -25,11 +27,14 @@ import Data.Either import Data.Graph import Data.JSON.Fields import Data.Language +import Data.ProtoLens (defMessage) import Data.Quieterm import Data.Term import qualified Data.Text as T import Parsing.Parser import Prologue +import Proto.Semantic as P hiding (Blob) +import Proto.Semantic_Fields as P import Rendering.Graph import Rendering.JSON hiding (JSON) import qualified Rendering.JSON @@ -40,10 +45,6 @@ import qualified Serializing.Format as Format import Source.Loc import Tags.Taggable -import Control.Lens -import Data.ProtoLens (defMessage) -import Proto.Semantic as P hiding (Blob) -import Proto.Semantic_Fields as P termGraph :: (Traversable t, Member Distribute sig, ParseEffects sig m) => t Blob -> m ParseTreeGraphResponse termGraph blobs = do @@ -84,7 +85,7 @@ data TermOutputFormat deriving (Eq, Show) parseTermBuilder :: (Traversable t, Member Distribute sig, ParseEffects sig m, MonadIO m) - => TermOutputFormat-> t Blob -> m Builder + => TermOutputFormat -> t Blob -> m Builder parseTermBuilder TermJSONTree = distributeFoldMap jsonTerm >=> serialize Format.JSON -- NB: Serialize happens at the top level for these two JSON formats to collect results of multiple blobs. parseTermBuilder TermJSONGraph = termGraph >=> serialize Format.JSONPB parseTermBuilder TermSExpression = distributeFoldMap sexpTerm @@ -116,7 +117,7 @@ quietTerm blob = showTiming blob <$> time' ( (doParse blob >>= withSomeTerm (fma in stringUtf8 (status <> "\t" <> show (blobLanguage blob) <> "\t" <> blobPath blob <> "\t" <> show duration <> " ms\n") -type ParseEffects sig m = (Member (Error SomeException) sig, Member Task sig, Carrier sig m) +type ParseEffects sig m = (Member (Error SomeException) sig, Member (Reader PerLanguageModes) sig, Member Task sig, Carrier sig m) type TermConstraints = '[ Taggable diff --git a/src/Semantic/CLI.hs b/src/Semantic/CLI.hs index ea8f6b78d..7d761e04e 100644 --- a/src/Semantic/CLI.hs +++ b/src/Semantic/CLI.hs @@ -1,6 +1,7 @@ {-# LANGUAGE ApplicativeDo #-} module Semantic.CLI (main) where +import Control.Effect.Reader import Control.Exception as Exc (displayException) import Data.Blob import Data.Blob.IO @@ -101,15 +102,39 @@ parseCommand :: Mod CommandFields (Task.TaskEff Builder) parseCommand = command "parse" (info parseArgumentsParser (progDesc "Generate parse trees for path(s)")) where parseArgumentsParser = do - renderer <- flag (parseTermBuilder TermSExpression) (parseTermBuilder TermSExpression) (long "sexpression" <> help "Output s-expression parse trees (default)") - <|> flag' (parseTermBuilder TermJSONTree) (long "json" <> help "Output JSON parse trees") - <|> flag' (parseTermBuilder TermJSONGraph) (long "json-graph" <> help "Output JSON adjacency list") - <|> flag' (parseSymbolsBuilder JSONPB) (long "symbols" <> help "Output JSON symbol list") - <|> flag' (parseSymbolsBuilder JSONPB) (long "json-symbols" <> help "Output JSON symbol list") - <|> flag' (parseSymbolsBuilder Proto) (long "proto-symbols" <> help "Output JSON symbol list") - <|> flag' (parseTermBuilder TermDotGraph) (long "dot" <> help "Output DOT graph parse trees") - <|> flag' (parseTermBuilder TermShow) (long "show" <> help "Output using the Show instance (debug only, format subject to change without notice)") - <|> flag' (parseTermBuilder TermQuiet) (long "quiet" <> help "Don't produce output, but show timing stats") + languageModes <- Language.PerLanguageModes + <$> option auto ( long "python-mode" + <> help "The AST representation to use for Python sources" + <> metavar "ALaCarte|Precise" + <> value Language.ALaCarte + <> showDefault) + renderer + <- flag (parseTermBuilder TermSExpression) + (parseTermBuilder TermSExpression) + ( long "sexpression" + <> help "Output s-expression parse trees (default)") + <|> flag' (parseTermBuilder TermJSONTree) + ( long "json" + <> help "Output JSON parse trees") + <|> flag' (parseTermBuilder TermJSONGraph) + ( long "json-graph" + <> help "Output JSON adjacency list") + <|> flag' (parseSymbolsBuilder JSONPB) + ( long "symbols" + <> long "json-symbols" + <> help "Output JSON symbol list") + <|> flag' (parseSymbolsBuilder Proto) + ( long "proto-symbols" + <> help "Output protobufs symbol list") + <|> flag' (parseTermBuilder TermDotGraph) + ( long "dot" + <> help "Output DOT graph parse trees") + <|> flag' (parseTermBuilder TermShow) + ( long "show" + <> help "Output using the Show instance (debug only, format subject to change without notice)") + <|> flag' (parseTermBuilder TermQuiet) + ( long "quiet" + <> help "Don't produce output, but show timing stats") filesOrStdin <- FilesFromGitRepo <$> option str (long "gitDir" <> help "A .git directory to read from") <*> option shaReader (long "sha" <> help "The commit SHA1 to read from") @@ -119,7 +144,7 @@ parseCommand = command "parse" (info parseArgumentsParser (progDesc "Generate pa <|> IncludePathsFromHandle <$> flag' stdin (long "only-stdin" <> help "Include only the paths given to stdin")) <|> FilesFromPaths <$> some (argument filePathReader (metavar "FILES...")) <|> pure (FilesFromHandle stdin) - pure $ Task.readBlobs filesOrStdin >>= renderer + pure $ Task.readBlobs filesOrStdin >>= runReader languageModes . renderer tsParseCommand :: Mod CommandFields (Task.TaskEff Builder) tsParseCommand = command "ts-parse" (info tsParseArgumentsParser (progDesc "Generate raw tree-sitter parse trees for path(s)")) diff --git a/src/Semantic/Git.hs b/src/Semantic/Git.hs index 903023c52..954365184 100644 --- a/src/Semantic/Git.hs +++ b/src/Semantic/Git.hs @@ -22,7 +22,6 @@ import Data.Char import Data.Either (fromRight) import Data.Text as Text import Shelly hiding (FilePath) -import System.IO (hSetBinaryMode) -- | git clone --bare clone :: Text -> FilePath -> IO () @@ -39,7 +38,7 @@ lsTree :: FilePath -> OID -> IO [TreeEntry] lsTree gitDir (OID sha) = sh $ parseEntries <$> run "git" ["-C", pack gitDir, "ls-tree", "-rz", sha] sh :: MonadIO m => Sh a -> m a -sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True)) +sh = shelly . silently -- | Parses an list of entries separated by \NUL, and on failure return [] parseEntries :: Text -> [TreeEntry] diff --git a/src/Semantic/Task.hs b/src/Semantic/Task.hs index 124d5c1c0..929793a95 100644 --- a/src/Semantic/Task.hs +++ b/src/Semantic/Task.hs @@ -269,6 +269,12 @@ runParser blob@Blob{..} parser = case parser of parseToAST (configTreeSitterParseTimeout config) language blob >>= maybeM (throwError (SomeException ParserTimedOut)) + UnmarshalParser language -> + time "parse.tree_sitter_ast_parse" languageTag $ do + config <- asks config + parseToPreciseAST (configTreeSitterParseTimeout config) language blob + >>= maybeM (throwError (SomeException ParserTimedOut)) + AssignmentParser parser assignment -> runAssignment Assignment.assign parser assignment DeterministicParser parser assignment -> runAssignment Deterministic.assign parser assignment diff --git a/src/Semantic/Util.hs b/src/Semantic/Util.hs index bd569d3fc..b5de97011 100644 --- a/src/Semantic/Util.hs +++ b/src/Semantic/Util.hs @@ -8,6 +8,7 @@ module Semantic.Util , evalRubyProject , evalTypeScriptProject , evaluateProject' + , justEvaluating , mergeErrors , reassociate , parseFile diff --git a/src/Tags/Taggable.hs b/src/Tags/Taggable.hs index 96e4e876d..8010186ce 100644 --- a/src/Tags/Taggable.hs +++ b/src/Tags/Taggable.hs @@ -48,14 +48,14 @@ import qualified Language.TypeScript.Syntax as TypeScript -- TODO: Move to src/Data data Token - = Enter { tokenName :: Text, tokenSnippetRange :: Maybe Range } - | Exit { tokenName :: Text, tokenSnippetRange :: Maybe Range} + = Enter { tokenName :: Text, tokenSnippetRange :: Range } + | Exit { tokenName :: Text, tokenSnippetRange :: Range} | Iden { identifierName :: Text, tokenSpan :: Span, docsLiteralRange :: Maybe Range } deriving (Eq, Show) type Tagger = Stream (Of Token) -enter, exit :: Monad m => String -> Maybe Range -> Tagger m () +enter, exit :: Monad m => String -> Range -> Tagger m () enter c = yield . Enter (pack c) exit c = yield . Exit (pack c) @@ -69,7 +69,7 @@ class Taggable constr where ) => Language -> constr (Term syntax Loc) -> Maybe Range - snippet :: Foldable syntax => Loc -> constr (Term syntax Loc) -> Maybe Range + snippet :: Foldable syntax => Loc -> constr (Term syntax Loc) -> Range symbolName :: Declarations1 syntax => constr (Term syntax Loc) -> Maybe Name @@ -83,8 +83,8 @@ class TaggableBy (strategy :: Strategy) constr where => Language -> constr (Term syntax Loc) -> Maybe Range docsLiteral' _ _ = Nothing - snippet' :: (Foldable syntax) => Loc -> constr (Term syntax Loc) -> Maybe Range - snippet' _ _ = Nothing + snippet' :: (Foldable syntax) => Loc -> constr (Term syntax Loc) -> Range + snippet' ann _ = byteRange ann symbolName' :: Declarations1 syntax => constr (Term syntax Loc) -> Maybe Name symbolName' _ = Nothing @@ -157,7 +157,7 @@ instance Taggable a => TaggableBy 'Custom (TermF a Loc) where symbolName' t = symbolName (termFOut t) instance TaggableBy 'Custom Syntax.Context where - snippet' ann (Syntax.Context _ (Term (In subj _))) = Just (subtractLoc ann subj) + snippet' ann (Syntax.Context _ (Term (In subj _))) = subtractLoc ann subj instance TaggableBy 'Custom Declaration.Function where docsLiteral' Python (Declaration.Function _ _ _ (Term (In _ bodyF))) @@ -165,7 +165,7 @@ instance TaggableBy 'Custom Declaration.Function where , isTextElement exprF = Just (byteRange exprAnn) | otherwise = Nothing docsLiteral' _ _ = Nothing - snippet' ann (Declaration.Function _ _ _ (Term (In body _))) = Just $ subtractLoc ann body + snippet' ann (Declaration.Function _ _ _ (Term (In body _))) = subtractLoc ann body symbolName' = declaredName . Declaration.functionName instance TaggableBy 'Custom Declaration.Method where @@ -174,7 +174,7 @@ instance TaggableBy 'Custom Declaration.Method where , isTextElement exprF = Just (byteRange exprAnn) | otherwise = Nothing docsLiteral' _ _ = Nothing - snippet' ann (Declaration.Method _ _ _ _ (Term (In body _)) _) = Just $ subtractLoc ann body + snippet' ann (Declaration.Method _ _ _ _ (Term (In body _)) _) = subtractLoc ann body symbolName' = declaredName . Declaration.methodName instance TaggableBy 'Custom Declaration.Class where @@ -183,28 +183,28 @@ instance TaggableBy 'Custom Declaration.Class where , isTextElement exprF = Just (byteRange exprAnn) | otherwise = Nothing docsLiteral' _ _ = Nothing - snippet' ann (Declaration.Class _ _ _ (Term (In body _))) = Just $ subtractLoc ann body + snippet' ann (Declaration.Class _ _ _ (Term (In body _))) = subtractLoc ann body symbolName' = declaredName . Declaration.classIdentifier instance TaggableBy 'Custom Ruby.Class where - snippet' ann (Ruby.Class _ _ (Term (In body _))) = Just $ subtractLoc ann body + snippet' ann (Ruby.Class _ _ (Term (In body _))) = subtractLoc ann body symbolName' = declaredName . Ruby.classIdentifier instance TaggableBy 'Custom Ruby.Module where - snippet' ann (Ruby.Module _ (Term (In body _):_)) = Just $ subtractLoc ann body - snippet' ann (Ruby.Module _ _) = Just $ byteRange ann + snippet' ann (Ruby.Module _ (Term (In body _):_)) = subtractLoc ann body + snippet' ann (Ruby.Module _ _) = byteRange ann symbolName' = declaredName . Ruby.moduleIdentifier instance TaggableBy 'Custom TypeScript.Module where - snippet' ann (TypeScript.Module _ (Term (In body _):_)) = Just $ subtractLoc ann body - snippet' ann (TypeScript.Module _ _ ) = Just $ byteRange ann + snippet' ann (TypeScript.Module _ (Term (In body _):_)) = subtractLoc ann body + snippet' ann (TypeScript.Module _ _ ) = byteRange ann symbolName' = declaredName . TypeScript.moduleIdentifier instance TaggableBy 'Custom Expression.Call where - snippet' ann (Expression.Call _ _ _ (Term (In body _))) = Just $ subtractLoc ann body + snippet' ann (Expression.Call _ _ _ (Term (In body _))) = subtractLoc ann body symbolName' = declaredName . Expression.callFunction instance TaggableBy 'Custom Ruby.Send where - snippet' ann (Ruby.Send _ _ _ (Just (Term (In body _)))) = Just $ subtractLoc ann body - snippet' ann _ = Just $ byteRange ann + snippet' ann (Ruby.Send _ _ _ (Just (Term (In body _)))) = subtractLoc ann body + snippet' ann _ = byteRange ann symbolName' Ruby.Send{..} = declaredName =<< sendSelector diff --git a/src/Tags/Tagging.hs b/src/Tags/Tagging.hs index a0a5e602e..71127736a 100644 --- a/src/Tags/Tagging.hs +++ b/src/Tags/Tagging.hs @@ -2,6 +2,7 @@ module Tags.Tagging ( runTagging , Tag(..) +, Kind(..) ) where @@ -14,10 +15,10 @@ import Streaming import qualified Streaming.Prelude as Streaming import Data.Blob -import Data.Tag import Data.Term import Source.Loc import qualified Source.Source as Source +import Tags.Tag import Tags.Taggable runTagging :: (IsTaggable syntax) @@ -29,30 +30,41 @@ runTagging blob symbolsToSummarize = Eff.run . evalState @[ContextToken] [] . Streaming.toList_ - . contextualizing blob symbolsToSummarize + . contextualizing blob toKind . tagging blob + where + toKind x = do + guard (x `elem` symbolsToSummarize) + case x of + "Function" -> Just Function + "Method" -> Just Method + "Class" -> Just Class + "Module" -> Just Module + "Call" -> Just Call + "Send" -> Just Call -- Ruby’s Send is considered to be a kind of 'Call' + _ -> Nothing -type ContextToken = (Text, Maybe Range) +type ContextToken = (Text, Range) contextualizing :: ( Member (State [ContextToken]) sig , Carrier sig m ) => Blob - -> [Text] + -> (Text -> Maybe Kind) -> Stream (Of Token) m a -> Stream (Of Tag) m a -contextualizing Blob{..} symbolsToSummarize = Streaming.mapMaybeM $ \case +contextualizing Blob{..} toKind = Streaming.mapMaybeM $ \case Enter x r -> Nothing <$ enterScope (x, r) Exit x r -> Nothing <$ exitScope (x, r) Iden iden span docsLiteralRange -> get @[ContextToken] >>= pure . \case - ((x, r):("Context", cr):xs) | x `elem` symbolsToSummarize - -> Just $ Tag iden x span (fmap fst xs) (firstLine (slice r)) (slice cr) - ((x, r):xs) | x `elem` symbolsToSummarize - -> Just $ Tag iden x span (fmap fst xs) (firstLine (slice r)) (slice docsLiteralRange) + ((x, r):("Context", cr):_) | Just kind <- toKind x + -> Just $ Tag iden kind span (firstLine (slice r)) (Just (slice cr)) + ((x, r):_) | Just kind <- toKind x + -> Just $ Tag iden kind span (firstLine (slice r)) (slice <$> docsLiteralRange) _ -> Nothing where - slice = fmap (stripEnd . Source.toText . Source.slice blobSource) - firstLine = fmap (T.take 180 . fst . breakOn "\n") + slice = stripEnd . Source.toText . Source.slice blobSource + firstLine = T.take 180 . fst . breakOn "\n" enterScope, exitScope :: ( Member (State [ContextToken]) sig , Carrier sig m diff --git a/test/Data/Functor/Listable.hs b/test/Data/Functor/Listable.hs index 30af6c8a1..2a9d2f90e 100644 --- a/test/Data/Functor/Listable.hs +++ b/test/Data/Functor/Listable.hs @@ -24,7 +24,6 @@ import Data.Functor.Both import qualified Data.Language as Language import Data.List.NonEmpty import Data.Patch -import Data.Semigroup.App import qualified Data.Syntax as Syntax import qualified Data.Syntax.Comment as Comment import qualified Data.Syntax.Declaration as Declaration diff --git a/test/Examples.hs b/test/Examples.hs index 50b476559..0c49c581d 100644 --- a/test/Examples.hs +++ b/test/Examples.hs @@ -3,6 +3,7 @@ module Main (main) where import Control.Effect +import Control.Effect.Reader import Control.Exception (displayException) import qualified Control.Foldl as Foldl import Data.Function ((&)) @@ -17,6 +18,7 @@ import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Lazy.Char8 as BLC import qualified Data.ByteString.Streaming.Char8 as ByteStream import Data.Either +import Data.Language (LanguageMode(..), PerLanguageModes(..)) import Data.Set (Set) import Data.Traversable import Data.Typeable @@ -121,4 +123,4 @@ knownFailuresForPath tsDir (Just path) parseFilePath :: (Member (Error SomeException) sig, Member Distribute sig, Member Task sig, Member Files sig, Carrier sig m, MonadIO m) => Path.RelFile -> m Bool -parseFilePath path = readBlob (fileForRelPath path) >>= parseTermBuilder @[] TermShow . pure >>= const (pure True) +parseFilePath path = readBlob (fileForRelPath path) >>= runReader (PerLanguageModes ALaCarte) . parseTermBuilder @[] TermShow . pure >>= const (pure True) diff --git a/test/Semantic/CLI/Spec.hs b/test/Semantic/CLI/Spec.hs index 1c9f09c25..ca139c668 100644 --- a/test/Semantic/CLI/Spec.hs +++ b/test/Semantic/CLI/Spec.hs @@ -1,5 +1,6 @@ module Semantic.CLI.Spec (testTree) where +import Control.Effect.Reader import Data.ByteString.Builder import Semantic.Api hiding (Blob, BlobPair, File) import Semantic.Task @@ -50,23 +51,24 @@ testForParseFixture (format, runParse, files, expected) = parseFixtures :: [(String, [Blob] -> TaskEff Builder, [File], Path.RelFile)] parseFixtures = - [ ("s-expression", parseTermBuilder TermSExpression, path, Path.relFile "test/fixtures/ruby/corpus/and-or.parseA.txt") - , ("json", parseTermBuilder TermJSONTree, path, prefix Path.file "parse-tree.json") - , ("json", parseTermBuilder TermJSONTree, path', prefix Path.file "parse-trees.json") - , ("json", parseTermBuilder TermJSONTree, [], prefix Path.file "parse-tree-empty.json") - , ("symbols", parseSymbolsBuilder Serializing.Format.JSON, path'', prefix Path.file "parse-tree.symbols.json") - , ("protobuf symbols", parseSymbolsBuilder Serializing.Format.Proto, path'', prefix Path.file "parse-tree.symbols.protobuf.bin") + [ ("s-expression", run . parseTermBuilder TermSExpression, path, Path.relFile "test/fixtures/ruby/corpus/and-or.parseA.txt") + , ("json", run . parseTermBuilder TermJSONTree, path, prefix Path.file "parse-tree.json") + , ("json", run . parseTermBuilder TermJSONTree, path', prefix Path.file "parse-trees.json") + , ("json", run . parseTermBuilder TermJSONTree, [], prefix Path.file "parse-tree-empty.json") + , ("symbols", run . parseSymbolsBuilder Serializing.Format.JSONPB, path'', prefix Path.file "parse-tree.symbols.json") + , ("protobuf symbols", run . parseSymbolsBuilder Serializing.Format.Proto, path'', prefix Path.file "parse-tree.symbols.protobuf.bin") ] where path = [File "test/fixtures/ruby/corpus/and-or.A.rb" Ruby] path' = [File "test/fixtures/ruby/corpus/and-or.A.rb" Ruby, File "test/fixtures/ruby/corpus/and-or.B.rb" Ruby] path'' = [File "test/fixtures/ruby/corpus/method-declaration.A.rb" Ruby] prefix = Path.relDir "test/fixtures/cli" + run = runReader (PerLanguageModes ALaCarte) diffFixtures :: [(String, [BlobPair] -> TaskEff Builder, [Both File], Path.RelFile)] diffFixtures = [ ("json diff", parseDiffBuilder DiffJSONTree, pathMode, prefix Path.file "diff-tree.json") , ("s-expression diff", parseDiffBuilder DiffSExpression, pathMode, Path.relFile "test/fixtures/ruby/corpus/method-declaration.diffA-B.txt") - , ("toc summaries diff", diffSummaryBuilder Serializing.Format.JSON, pathMode, prefix Path.file "diff-tree.toc.json") + , ("toc summaries diff", diffSummaryBuilder Serializing.Format.JSONPB, pathMode, prefix Path.file "diff-tree.toc.json") , ("protobuf diff", diffSummaryBuilder Serializing.Format.Proto, pathMode, prefix Path.file "diff-tree.toc.protobuf.bin") ] where pathMode = [Both (File "test/fixtures/ruby/corpus/method-declaration.A.rb" Ruby) (File "test/fixtures/ruby/corpus/method-declaration.B.rb" Ruby)] diff --git a/test/Semantic/IO/Spec.hs b/test/Semantic/IO/Spec.hs index 767c0da9f..35ccc66c5 100644 --- a/test/Semantic/IO/Spec.hs +++ b/test/Semantic/IO/Spec.hs @@ -1,78 +1,87 @@ +{-# LANGUAGE OverloadedStrings #-} + module Semantic.IO.Spec (spec) where import Prelude hiding (readFile) -import Control.Monad.IO.Class -import Data.List -import System.Directory -import System.IO.Temp -import Data.String +import Control.Monad.IO.Class +import Data.List +import Data.String +import qualified Data.Text as Text +import System.Directory +import System.IO.Temp -import Data.Blob -import Data.Handle -import SpecHelpers hiding (readFile) +import Data.Blob +import Data.Handle import qualified Semantic.Git as Git -import Shelly (shelly, silently, cd, run_) +import Shelly (cd, run_, shelly, silently, touchfile, writefile) +import SpecHelpers hiding (readFile) +import System.Path (()) import qualified System.Path as Path -import System.Path (()) + + +makeGitRepo :: FilePath -> IO () +makeGitRepo dir = shelly . silently $ do + cd (fromString dir) + let git = run_ "git" + git ["init"] + touchfile "bar.py" + writefile "日本語.rb" "# coding: utf-8\n日本語 = 'hello'" + git ["add", "日本語.rb", "bar.py"] + git ["config", "user.name", "'Test'"] + git ["config", "user.email", "'test@test.test'"] + git ["commit", "-am", "'test commit'"] spec :: Spec spec = do + describe "catFile" $ do + hasGit <- runIO $ isJust <$> findExecutable "git" + when hasGit . it "should not corrupt the output of files with UTF-8 identifiers" $ do + result <- liftIO . withSystemTempDirectory "semantic-temp-git-repo" $ \dir -> do + makeGitRepo dir + trees <- Git.lsTree (dir <> "/.git") (Git.OID "HEAD") + Just it <- pure $ find (\p -> "日本語" `isInfixOf` Git.treeEntryPath p) trees + Git.catFile (dir <> "/.git") (Git.treeEntryOid it) + ("日本語" `Text.isInfixOf` result) `shouldBe` True + + describe "lsTree" $ do + hasGit <- runIO $ isJust <$> findExecutable "git" + when hasGit . it "should read all tree entries from a repo" $ do + items <- liftIO . withSystemTempDirectory "semantic-temp-git-repo" $ \dir -> do + makeGitRepo dir + Git.lsTree dir (Git.OID "HEAD") + + length items `shouldBe` 2 + describe "readBlobsFromGitRepo" $ do hasGit <- runIO $ isJust <$> findExecutable "git" when hasGit . it "should read from a git directory" $ do -- This temporary directory will be cleaned after use. blobs <- liftIO . withSystemTempDirectory "semantic-temp-git-repo" $ \dir -> do - shelly $ silently $ do - cd (fromString dir) - let git = run_ "git" - git ["init"] - run_ "touch" ["foo.py", "bar.rb"] - git ["add", "foo.py", "bar.rb"] - git ["config", "user.name", "'Test'"] - git ["config", "user.email", "'test@test.test'"] - git ["commit", "-am", "'test commit'"] - + makeGitRepo dir readBlobsFromGitRepoPath (Path.absDir dir Path.relDir ".git") (Git.OID "HEAD") [] [] let files = sortOn fileLanguage (blobFile <$> blobs) - files `shouldBe` [ File "foo.py" Python - , File "bar.rb" Ruby + files `shouldBe` [ File "bar.py" Python + , File "日本語.rb" Ruby ] when hasGit . it "should read from a git directory with --only" $ do -- This temporary directory will be cleaned after use. blobs <- liftIO . withSystemTempDirectory "semantic-temp-git-repo" $ \dir -> do let pdir = Path.absDir dir - shelly $ silently $ do - cd (fromString dir) - let git = run_ "git" - git ["init"] - run_ "touch" ["foo.py", "bar.rb"] - git ["add", "foo.py", "bar.rb"] - git ["config", "user.name", "'Test'"] - git ["config", "user.email", "'test@test.test'"] - git ["commit", "-am", "'test commit'"] - - readBlobsFromGitRepoPath (pdir Path.relDir ".git") (Git.OID "HEAD") [] [Path.relFile "foo.py"] + makeGitRepo dir + readBlobsFromGitRepoPath (pdir Path.relDir ".git") (Git.OID "HEAD") [] [Path.relFile "日本語.rb"] let files = sortOn fileLanguage (blobFile <$> blobs) - files `shouldBe` [ File "foo.py" Python ] + files `shouldBe` [ File "日本語.rb" Ruby ] when hasGit . it "should read from a git directory with --exclude" $ do -- This temporary directory will be cleaned after use. blobs <- liftIO . withSystemTempDirectory "semantic-temp-git-repo" $ \dir -> do - shelly $ silently $ do - cd (fromString dir) - let git = run_ "git" - git ["init"] - run_ "touch" ["foo.py", "bar.rb"] - git ["add", "foo.py", "bar.rb"] - git ["config", "user.name", "'Test'"] - git ["config", "user.email", "'test@test.test'"] - git ["commit", "-am", "'test commit'"] + makeGitRepo dir - readBlobsFromGitRepoPath (Path.absDir dir Path.relDir ".git") (Git.OID "HEAD") [Path.relFile "foo.py"] [] + readBlobsFromGitRepoPath (Path.absDir dir Path.relDir ".git") (Git.OID "HEAD") [Path.relFile "日本語.rb"] [] let files = sortOn fileLanguage (blobFile <$> blobs) - files `shouldBe` [ File "bar.rb" Ruby ] + files `shouldBe` [ File "bar.py" Python ] describe "readFile" $ do it "returns a blob for extant files" $ do diff --git a/test/Semantic/Spec.hs b/test/Semantic/Spec.hs index a73c7233e..e2e4252e2 100644 --- a/test/Semantic/Spec.hs +++ b/test/Semantic/Spec.hs @@ -1,5 +1,6 @@ module Semantic.Spec (spec) where +import Control.Effect.Reader import Control.Exception (fromException) import SpecHelpers @@ -15,17 +16,17 @@ spec :: Spec spec = do describe "parseBlob" $ do it "returns error if given an unknown language (json)" $ do - output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermJSONTree [ setBlobLanguage Unknown methodsBlob ] + output <- fmap runBuilder . runTaskOrDie . runReader (PerLanguageModes ALaCarte) $ parseTermBuilder TermJSONTree [ setBlobLanguage Unknown methodsBlob ] output `shouldBe` "{\"trees\":[{\"path\":\"methods.rb\",\"error\":\"NoLanguageForBlob \\\"methods.rb\\\"\",\"language\":\"Unknown\"}]}\n" it "throws if given an unknown language for sexpression output" $ do - res <- runTaskWithOptions defaultOptions (parseTermBuilder TermSExpression [setBlobLanguage Unknown methodsBlob]) + res <- runTaskWithOptions defaultOptions (runReader (PerLanguageModes ALaCarte) (parseTermBuilder TermSExpression [setBlobLanguage Unknown methodsBlob])) case res of Left exc -> fromException exc `shouldBe` Just (NoLanguageForBlob "methods.rb") Right _bad -> fail "Expected parseTermBuilder to fail for an unknown language" it "renders with the specified renderer" $ do - output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob] + output <- fmap runBuilder . runTaskOrDie . runReader (PerLanguageModes ALaCarte) $ parseTermBuilder TermSExpression [methodsBlob] output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n" describe "git ls-tree parsing" $ do diff --git a/test/SpecHelpers.hs b/test/SpecHelpers.hs index 00ac5958a..d1310b847 100644 --- a/test/SpecHelpers.hs +++ b/test/SpecHelpers.hs @@ -42,7 +42,7 @@ import Data.Project as X import Data.Proxy as X import Data.Foldable (toList) import Data.Functor.Listable as X -import Data.Language as X +import Data.Language as X hiding (Precise) import Data.List.NonEmpty as X (NonEmpty(..)) import Data.Semilattice.Lower as X import Source.Source as X (Source) @@ -95,7 +95,7 @@ diffFilePaths session paths parseFilePath :: TaskSession -> Path.RelFile -> IO (Either SomeException ByteString) parseFilePath session path = do blob <- readBlobFromFile (fileForRelPath path) - res <- runTask session $ parseTermBuilder TermSExpression (toList blob) + res <- runTask session . runReader (PerLanguageModes ALaCarte) $ parseTermBuilder TermSExpression (toList blob) pure (runBuilder <$> res) -- | Read two files to a BlobPair. diff --git a/test/Tags/Spec.hs b/test/Tags/Spec.hs index ff3c11029..90c235805 100644 --- a/test/Tags/Spec.hs +++ b/test/Tags/Spec.hs @@ -2,7 +2,7 @@ module Tags.Spec (spec) where import Data.Text (Text) import SpecHelpers -import Tags.Tagging +import Tags.Tagging as Tags import qualified System.Path as Path spec :: Spec @@ -11,89 +11,94 @@ spec = do it "produces tags for functions with docs" $ do (blob, tree) <- parseTestFile goParser (Path.relFile "test/fixtures/go/tags/simple_functions.go") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "TestFromBits" "Function" (Span (Pos 6 1) (Pos 8 2)) ["Statements"] (Just "func TestFromBits(t *testing.T) {") (Just "// TestFromBits ...") - , Tag "Hi" "Function" (Span (Pos 10 1) (Pos 11 2)) ["Statements"] (Just "func Hi()") Nothing ] + [ Tag "TestFromBits" Function (Span (Pos 6 1) (Pos 8 2)) "func TestFromBits(t *testing.T) {" (Just "// TestFromBits ...") + , Tag "Hi" Function (Span (Pos 10 1) (Pos 11 2)) "func Hi()" Nothing ] it "produces tags for methods" $ do (blob, tree) <- parseTestFile goParser (Path.relFile "test/fixtures/go/tags/method.go") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "CheckAuth" "Method" (Span (Pos 3 1) (Pos 3 100)) ["Statements"] (Just "func (c *apiClient) CheckAuth(req *http.Request, user, repo string) (*authenticatedActor, error)") Nothing] + [ Tag "CheckAuth" Method (Span (Pos 3 1) (Pos 3 100)) "func (c *apiClient) CheckAuth(req *http.Request, user, repo string) (*authenticatedActor, error)" Nothing] it "produces tags for calls" $ do (blob, tree) <- parseTestFile goParser (Path.relFile "test/fixtures/go/tags/simple_functions.go") runTagging blob ["Call"] tree `shouldBe` - [ Tag "Hi" "Call" (Span (Pos 7 2) (Pos 7 6)) ["Function", "Context", "Statements"] (Just "Hi()") Nothing] + [ Tag "Hi" Call (Span (Pos 7 2) (Pos 7 6)) "Hi()" Nothing] describe "javascript and typescript" $ do it "produces tags for functions with docs" $ do (blob, tree) <- parseTestFile typescriptParser (Path.relFile "test/fixtures/javascript/tags/simple_function_with_docs.js") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "myFunction" "Function" (Span (Pos 2 1) (Pos 4 2)) ["Statements"] (Just "function myFunction()") (Just "// This is myFunction") ] + [ Tag "myFunction" Function (Span (Pos 2 1) (Pos 4 2)) "function myFunction()" (Just "// This is myFunction") ] it "produces tags for classes" $ do (blob, tree) <- parseTestFile typescriptParser (Path.relFile "test/fixtures/typescript/tags/class.ts") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "FooBar" "Class" (Span (Pos 1 1) (Pos 1 16)) ["Statements"] (Just "class FooBar") Nothing ] + [ Tag "FooBar" Class (Span (Pos 1 1) (Pos 1 16)) "class FooBar" Nothing ] it "produces tags for modules" $ do (blob, tree) <- parseTestFile typescriptParser (Path.relFile "test/fixtures/typescript/tags/module.ts") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "APromise" "Module" (Span (Pos 1 1) (Pos 1 20)) ["Statements"] (Just "module APromise { }") Nothing ] + [ Tag "APromise" Tags.Module (Span (Pos 1 1) (Pos 1 20)) "module APromise { }" Nothing ] describe "python" $ do it "produces tags for functions" $ do (blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/simple_functions.py") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "Foo" "Function" (Span (Pos 1 1) (Pos 5 17)) ["Statements"] (Just "def Foo(x):") Nothing - , Tag "Bar" "Function" (Span (Pos 7 1) (Pos 11 13)) ["Statements"] (Just "def Bar():") Nothing - , Tag "local" "Function" (Span (Pos 8 5) (Pos 9 17)) ["Statements", "Function", "Statements"] (Just "def local():") Nothing + [ Tag "Foo" Function (Span (Pos 1 1) (Pos 5 17)) "def Foo(x):" Nothing + , Tag "Bar" Function (Span (Pos 7 1) (Pos 11 13)) "def Bar():" Nothing + , Tag "local" Function (Span (Pos 8 5) (Pos 9 17)) "def local():" Nothing ] it "produces tags for functions with docs" $ do (blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/simple_function_with_docs.py") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "Foo" "Function" (Span (Pos 1 1) (Pos 3 13)) ["Statements"] (Just "def Foo(x):") (Just "\"\"\"This is the foo function\"\"\"") ] + [ Tag "Foo" Function (Span (Pos 1 1) (Pos 3 13)) "def Foo(x):" (Just "\"\"\"This is the foo function\"\"\"") ] it "produces tags for classes" $ do (blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/class.py") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "Foo" "Class" (Span (Pos 1 1) (Pos 5 17)) ["Statements"] (Just "class Foo:") (Just "\"\"\"The Foo class\"\"\"") - , Tag "f" "Function" (Span (Pos 3 5) (Pos 5 17)) ["Statements", "Class", "Statements"] (Just "def f(self):") (Just "\"\"\"The f method\"\"\"") + [ Tag "Foo" Class (Span (Pos 1 1) (Pos 5 17)) "class Foo:" (Just "\"\"\"The Foo class\"\"\"") + , Tag "f" Function (Span (Pos 3 5) (Pos 5 17)) "def f(self):" (Just "\"\"\"The f method\"\"\"") ] it "produces tags for multi-line functions" $ do (blob, tree) <- parseTestFile pythonParser (Path.relFile "test/fixtures/python/tags/multiline.py") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "Foo" "Function" (Span (Pos 1 1) (Pos 3 13)) ["Statements"] (Just "def Foo(x,") Nothing ] + [ Tag "Foo" Function (Span (Pos 1 1) (Pos 3 13)) "def Foo(x," Nothing ] describe "ruby" $ do it "produces tags for methods" $ do (blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/simple_method.rb") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "foo" "Method" (Span (Pos 1 1) (Pos 4 4)) ["Statements"] (Just "def foo") Nothing ] + [ Tag "foo" Method (Span (Pos 1 1) (Pos 4 4)) "def foo" Nothing ] it "produces tags for sends" $ do (blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/simple_method.rb") runTagging blob ["Send"] tree `shouldBe` - [ Tag "puts" "Send" (Span (Pos 2 3) (Pos 2 12)) ["Statements", "Method", "Statements"] (Just "puts \"hi\"") Nothing - , Tag "bar" "Send" (Span (Pos 3 3) (Pos 3 8)) ["Statements", "Method", "Statements"] (Just "a.bar") Nothing - , Tag "a" "Send" (Span (Pos 3 3) (Pos 3 4)) ["Send", "Statements", "Method", "Statements"] (Just "a") Nothing + [ Tag "puts" Call (Span (Pos 2 3) (Pos 2 12)) "puts \"hi\"" Nothing + , Tag "bar" Call (Span (Pos 3 3) (Pos 3 8)) "a.bar" Nothing + , Tag "a" Call (Span (Pos 3 3) (Pos 3 4)) "a" Nothing ] it "produces tags for methods with docs" $ do (blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/simple_method_with_docs.rb") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "foo" "Method" (Span (Pos 2 1) (Pos 3 4)) ["Statements"] (Just "def foo") (Just "# Public: foo") ] + [ Tag "foo" Method (Span (Pos 2 1) (Pos 3 4)) "def foo" (Just "# Public: foo") ] + + it "correctly tags files containing multibyte UTF-8 characters" $ do + (blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/unicode_identifiers.rb") + runTagging blob symbolsToSummarize tree `shouldBe` + [ Tag "日本語" Method (Span (Pos 2 1) (Pos 4 4)) "def 日本語" (Just "# coding: utf-8")] it "produces tags for methods and classes with docs" $ do (blob, tree) <- parseTestFile rubyParser (Path.relFile "test/fixtures/ruby/tags/class_module.rb") runTagging blob symbolsToSummarize tree `shouldBe` - [ Tag "Foo" "Module" (Span (Pos 2 1 ) (Pos 12 4)) ["Statements"] (Just "module Foo") (Just "# Public: Foo") - , Tag "Bar" "Class" (Span (Pos 5 3 ) (Pos 11 6)) ["Module", "Context", "Statements"] (Just "class Bar") (Just "# Public: Bar") - , Tag "baz" "Method" (Span (Pos 8 5 ) (Pos 10 8)) ["Class", "Context", "Module", "Context", "Statements"] (Just "def baz(a)") (Just "# Public: baz") - , Tag "C" "Class" (Span (Pos 14 1) (Pos 20 4)) ["Statements"] (Just "class A::B::C") Nothing - , Tag "foo" "Method" (Span (Pos 15 3) (Pos 17 6)) ["Statements", "Class", "Statements"] (Just "def foo") Nothing - , Tag "foo" "Method" (Span (Pos 18 3) (Pos 19 6)) ["Statements", "Class", "Statements"] (Just "def self.foo") Nothing + [ Tag "Foo" Tags.Module (Span (Pos 2 1 ) (Pos 12 4)) "module Foo" (Just "# Public: Foo") + , Tag "Bar" Class (Span (Pos 5 3 ) (Pos 11 6)) "class Bar" (Just "# Public: Bar") + , Tag "baz" Method (Span (Pos 8 5 ) (Pos 10 8)) "def baz(a)" (Just "# Public: baz") + , Tag "C" Class (Span (Pos 14 1) (Pos 20 4)) "class A::B::C" Nothing + , Tag "foo" Method (Span (Pos 15 3) (Pos 17 6)) "def foo" Nothing + , Tag "foo" Method (Span (Pos 18 3) (Pos 19 6)) "def self.foo" Nothing ] symbolsToSummarize :: [Text] diff --git a/test/fixtures/ruby/tags/unicode_identifiers.rb b/test/fixtures/ruby/tags/unicode_identifiers.rb new file mode 100644 index 000000000..50a42b0a0 --- /dev/null +++ b/test/fixtures/ruby/tags/unicode_identifiers.rb @@ -0,0 +1,4 @@ +# coding: utf-8 +def 日本語 + "hello" +end