1
1
mirror of https://github.com/github/semantic.git synced 2024-12-27 17:05:33 +03:00

Merge remote-tracking branch 'origin/master' into import-holes

This commit is contained in:
joshvera 2020-02-06 14:53:32 -05:00
commit ff1d8a73ab
198 changed files with 3470 additions and 1038 deletions

View File

@ -37,7 +37,7 @@ jobs:
name: Cache ~/.cabal/store
with:
path: ~/.cabal/store
key: ${{ runner.os }}-${{ matrix.ghc }}-v5-cabal-store
key: ${{ runner.os }}-${{ matrix.ghc }}-v8-cabal-store
- uses: actions/cache@v1
name: Cache dist-newstyle
@ -52,7 +52,7 @@ jobs:
- name: Install dependencies
run: |
cabal v2-update
script/bootstrap
cabal v2-configure --project-file=cabal.project.ci --disable-optimization --enable-benchmarks --enable-tests --write-ghc-environment-files=always -j2
cabal v2-build --project-file=cabal.project.ci all --only-dependencies

21
.gitmodules vendored
View File

@ -0,0 +1,21 @@
[submodule "semantic-json/vendor/tree-sitter-json"]
path = semantic-json/vendor/tree-sitter-json
url = https://github.com/tree-sitter/tree-sitter-json.git
[submodule "semantic-python/vendor/tree-sitter-python"]
path = semantic-python/vendor/tree-sitter-python
url = https://github.com/tree-sitter/tree-sitter-python.git
[submodule "semantic-java/vendor/tree-sitter-java"]
path = semantic-java/vendor/tree-sitter-java
url = https://github.com/tree-sitter/tree-sitter-java.git
[submodule "semantic-go/vendor/tree-sitter-go"]
path = semantic-go/vendor/tree-sitter-go
url = https://github.com/tree-sitter/tree-sitter-go.git
[submodule "semantic-ruby/vendor/tree-sitter-ruby"]
path = semantic-ruby/vendor/tree-sitter-ruby
url = https://github.com/tree-sitter/tree-sitter-ruby.git
[submodule "semantic-typescript/vendor/tree-sitter-typescript"]
path = semantic-typescript/vendor/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript.git
[submodule "semantic-tsx/vendor/tree-sitter-typescript"]
path = semantic-tsx/vendor/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript.git

29
HACKING.md Normal file
View File

@ -0,0 +1,29 @@
# Effective `semantic` Hacking for Fun and Profit
The Semantic repository is a large one, containing dozens of subprojects. This means that GHC has to do a lot of work when compiling. For this reason, it's important to keep in mind the principles that will let you avoid recompiling the whole world as soon as you change a single .cabal file.
## The Landscape
We officially recommend [Visual Studio Code](https://code.visualstudio.com) with the [`ghcide`](https://marketplace.visualstudio.com/items?itemName=DigitalAssetHoldingsLLC.ghcide) extension. Though our tooling scripts may work with other editor integration solutions, we can't guarantee that they'll do so indefinitely.
## Things to Do
1. *Use `script/repl`.* The REPL script is much more powerful than `cabal repl`; it ensures that all packages can be loaded (including tests), so you should be able to `:load` any on-disk package that you want—and you shouldn't have to restart the REPL every time you add a new file, as GHCi will optimistically read from any `import` statements it encounters. Keep in mind that `:load` accepts both file paths and module identifiers.
2. *Use the editor integration.* There is no substitute for a workflow that allows you to fix errors without switching applications. If you're using tooling other than VS Code and `ghcide`, we recommend you configure its GHCi process to be `script/repl`.
3. *Run tests in the REPL.* Unlike `cabal repl`, all the testing packages are loaded into the REPL, so you can `:load` a path to a test file and invoke the relevant test with `main`. This will enable the fastest fix/build/test cycle possible. It may take some time to get used to avoiding `cabal test`. If all you're wanting to see is if the `semantic` CLI tool builds correctly, `:load src/Semantic/CLI.hs`.
4. *If you have to build, be sure to disable optimizations and parallelize aggressively.* `cabal` builds with `-O1` on by default; this entails a significant hit to compile speed. If you find yourself building some product repeatedly, add `optimizations: False`.
5. *Turn on stylish-haskell integration.* Most editors are capable of running Haskell code through `stylish-haskell` on save; enabling this does wonders towards keeping your code in compliance with our style guide, frees you from having to fret over the minor details of how something should be formatted, and saves us time in the review process. The VSCode extension for `stylish-haskell` can be found here.
## Things to Avoid
1. *Don't `cabal clean`*. `cabal clean` doesn't take any arguments that determine what to clean; as such, running it will clean everything, including the language ASTs, which take some time to recompile.
2. *Don't `cabal configure` if humanly possible*. It nukes all your build caches. Should you need to modify a global build setting, edit `cabal.project.local` manually.
3. *Write small modules with minimal dependencies.* Keep the code that deals with language ASTs well-isolated.
4. *Avoid fancy type tricks if possible.* Techniques like [advanced overlap](https://wiki.haskell.org/GHC/AdvancedOverlap) can save on boilerplate but may not be worth the pain it puts the type checker through. If the only downside to avoiding a fancy type trick is some boilerplate, consider that boilerplate is often preferable to slowing down everyone's build for the indefinite future.

View File

@ -1,6 +1,5 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
@ -12,7 +11,7 @@ import Data.Abstract.Evaluatable
import Data.Bifunctor
import Data.Blob.IO (readBlobFromPath)
import qualified Data.Duration as Duration
import "semantic" Data.Graph (topologicalSort)
import Data.Graph.Algebraic (topologicalSort)
import qualified Data.Language as Language
import Data.Project
import Data.Proxy

View File

@ -1,20 +1,21 @@
-- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file).
-- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file).
-- Local packages
packages: .
semantic-analysis
semantic-ast
semantic-codegen
semantic-core
semantic-go
semantic-java
semantic-json
semantic-parse
semantic-python
semantic-ruby
semantic-scope-graph
semantic-tsx
semantic-typescript
semantic-tags
semantic-scope-graph
-- Packages brought in from other repos instead of hackage
-- ATTENTION: remember to update cabal.project.ci when bumping SHAs here!

View File

@ -1,13 +1,15 @@
-- ATTENTION: care must be taken to keep this file in sync with cabal.project. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file).
-- ATTENTION: care must be taken to keep this file in sync with cabal.project and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file).
-- Local packages
packages: .
semantic-analysis
semantic-ast
semantic-codegen
semantic-core
semantic-go
semantic-java
semantic-json
semantic-parse
semantic-python
semantic-ruby
semantic-scope-graph
@ -43,6 +45,9 @@ package semantic-analysis
package semantic-ast
ghc-options: -Werror
package semantic-codegen
ghc-options: -Werror
package semantic-core
ghc-options: -Werror
@ -55,6 +60,9 @@ package semantic-java
package semantic-json
ghc-options: -Werror
package semantic-parse
ghc-options: -Werror
package semantic-python
ghc-options: -Werror

View File

@ -9,7 +9,7 @@ Please note that this list of steps reflects the state of Semantic as is, not wh
1. **Find or write a [tree-sitter](https://tree-sitter.github.io) parser for your language.** The tree-sitter [organization page](https://github.com/tree-sitter) has a number of parsers beyond those we currently support in Semantic; look there first to make sure you're not duplicating work. The tree-sitter [documentation on creating parsers](http://tree-sitter.github.io/tree-sitter/creating-parsers) provides an exhaustive look at the process of developing and debugging tree-sitter parsers. Though we do not support grammars written with other toolkits such as [ANTLR](https://www.antlr.org), translating an ANTLR or other BNF-style grammar into a tree-sitter grammar is usually straightforward.
2. **Create a Haskell library providing an interface to that C source.** The [`haskell-tree-sitter`](https://github.com/tree-sitter/haskell-tree-sitter) repository provides a Cabal package for each supported language. You can find an example of a pull request to add such a package here. Each package needs to provide two API surfaces:
* a bridged (via the FFI) reference to the toplevel parser in the generated file ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/internal/TreeSitter/JSON/Internal.hs))
* symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/TreeSitter/JSON.hs))
* symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/github/semantic/blob/master/semantic-json/src/Language/JSON/Grammar.hs)). For more information, see [CodeGen docs](https://github.com/github/semantic/blob/master/semantic-codegen/README.md).
3. **Identify the new syntax nodes required to represent your language.** While we provide an extensive library of reusable AST nodes for [literals](https://github.com/github/semantic/blob/master/src/Data/Syntax/Literal.hs), [expressions](https://github.com/github/semantic/blob/master/src/Data/Syntax/Expression.hs), [statements](https://github.com/github/semantic/blob/master/src/Data/Syntax/Statement.hs), and [types](https://github.com/github/semantic/blob/master/src/Data/Syntax/Type.hs), most languages will require some syntax nodes not found in other languages. You'll need to create a new module providing those data types, and those data types must be written as an open union: [here](https://github.com/github/semantic/commits/master/src/Language/Ruby/Syntax.hs?author=charliesome) is an example for Ruby's syntactic details.
4. **Write an assignment step that translates tree-sitter trees into Haskell datatypes.** More information about this can be found in the [assignment documentation](assignment.md). This is currently the most time-consuming and error-prone part of the process (see [https://github.com/github/semantic/issues/77]).
5. **Implement `Evaluatable` instances and add new [`Value` effects](https://github.com/github/semantic/blob/master/src/Control/Abstract/Value.hs) as is needed to describe the control flow of your language.** While several features of Semantic (e.g. `semantic parse --symbols` and `semantic diff`) will become fully available given a working assignment step, further features based on concrete or abstract interpretation (such as `semantic graph`) require implementing the `Evaluatable` typeclass and providing value-style effects for each control flow feature provided by the language. This means that language support is a spectrum: Semantic can provide useful information without any knowledge of a language's semantics, but each successive addition to its interpretive capabilities enables more functionality.

View File

@ -13,8 +13,6 @@ Our CI systems ensure that all patches pass `hlint`'s muster. We have our own se
We strongly recommend adding Haddock documentation to any function/data type, unless its purpose is immediately apparent from its name.
Comments should describe the "why", type signatures should describe the "what", and the code should describe the "how".
The Haskell Prelude is too minimal for serious work. The `Prologue` module should be imported in most files, as it reexports most of what you need.
# Formatting
2 spaces everywhere. Tabs are forbidden. Haskell indentation can be unpredictable, so generally stick with what your editor suggests.
@ -50,14 +48,6 @@ data Pos = Pos
}
```
### Split up imports into logical groups.
We use the following convention, each section separated by a newline:
1. Prelude/Prologue import
2. Library/stdlib imports
3. Local in-project imports.
### Align typographical symbols.
`->` in `case` statements and signatures, `=` in functions, and `::` in records should be aligned. Your editor can help with this. In certain situations, aligning symbols may decrease readability, e.g. complicated `case` statements. Use your best judgment.
@ -66,7 +56,7 @@ We use the following convention, each section separated by a newline:
Locally bound variables (such as the arguments to functions, or helpers defined in a `where` clause) can have short names, such as `x` or `go`. Globally bound functions and variables should have descriptive names.
You'll often find yourself implementing functions that conflict with Prelude/Prologue definitions. If this is the case, avoid adding a prefix to these functions, and instead import them qualified.
You'll often find yourself implementing functions that conflict with Prelude definitions. If this is the case, avoid adding a prefix to these functions, and instead import them qualified.
``` haskell
-- Broke

View File

@ -13,34 +13,47 @@ output_file="${HIE_BIOS_OUTPUT:-/dev/stdout}"
build_dir="dist-newstyle/build/x86_64-osx/ghc-$ghc_version"
build_products_dir="$build_dir/build-repl"
function add_autogen_includes {
echo "-optP-include"
echo "-optP$1/cabal_macros.h"
# autogenerated files, .h and .hs
echo "-i$1"
echo "-I$1"
}
cores=$(sysctl -n machdep.cpu.core_count || echo 4)
function flags {
# disable optimizations for faster loading
echo "-O0"
# dont load .ghci files (for ghcide)
echo "-ignore-dot-ghci"
# use as many jobs as there are physical cores
echo "-j$cores"
# where to put build products
echo "-outputdir $build_products_dir"
echo "-odir $build_products_dir"
echo "-hidir $build_products_dir"
echo "-stubdir $build_products_dir"
# preprocessor options, for -XCPP
echo "-optP-include"
echo "-optP$build_dir/semantic-0.10.0.0/build/autogen/cabal_macros.h"
# autogenerated sources, both .hs and .h (e.g. Foo_paths.hs)
echo "-i$build_dir/semantic-0.10.0.0/build/autogen"
echo "-I$build_dir/semantic-0.10.0.0/build/autogen"
if [ -d "$build_dir/semantic-0.10.0.0/build/autogen" ]
then add_autogen_includes "$build_dir/semantic-0.10.0.0/build/autogen"
elif [ -d "$build_dir/semantic-0.10.0.0/noopt/build/autogen" ]
then add_autogen_includes "$build_dir/semantic-0.10.0.0/noopt/build/autogen"
fi
# .hs source dirs
# TODO: would be nice to figure this out from cabal.project & the .cabal files
echo "-isemantic-analysis/src"
echo "-isemantic-ast/src"
echo "-isemantic-codegen/src"
echo "-isemantic-core/src"
echo "-isemantic-go/src"
echo "-isemantic-java/src"
echo "-isemantic-json/src"
echo "-isemantic-parse/src"
echo "-isemantic-python/src"
echo "-isemantic-python/test"
echo "-isemantic-ruby/src"

View File

@ -37,40 +37,26 @@ common haskell
library
import: haskell
exposed-modules: Marshal.JSON
exposed-modules:
AST.Element
AST.Traversable1
AST.Traversable1.Class
Marshal.JSON
-- other-modules:
-- other-extensions:
build-depends: base ^>= 4.13
, tree-sitter ^>= 0.8
, semantic-source ^>= 0.0.2
, tree-sitter-python ^>= 0.8.1
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16
, pretty-simple ^>= 3.1.0.0
, aeson ^>= 1.4.2.0
, text ^>= 1.2.3.1
, bytestring ^>= 0.10.8.2
, aeson-pretty ^>= 0.8.8
, bytestring ^>= 0.10.8.2
, tree-sitter ^>= 0.8
, semantic-source ^>= 0.0.2
, template-haskell ^>= 2.15
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16
, pretty-simple ^>= 3.1.0.0
, text ^>= 1.2.3.1
hs-source-dirs: src
default-language: Haskell2010
executable semantic-ast
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base
, semantic-ast
, tree-sitter
, semantic-source
, tree-sitter-python
, bytestring
, optparse-applicative
, pretty-simple
, aeson
, bytestring
, aeson-pretty
hs-source-dirs: app
default-language: Haskell2010

View File

@ -0,0 +1,75 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module AST.Traversable1
( module AST.Traversable1.Class
, for1
, traverse1_
, for1_
, foldMap1
, Generics(..)
) where
import AST.Traversable1.Class
import Data.Functor (void)
import Data.Functor.Const
import Data.Monoid (Ap (..))
import GHC.Generics
for1
:: forall c t f a b
. (Traversable1 c t, Applicative f)
=> t a
-> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> f (t b)
for1 t f g = traverse1 @c f g t
traverse1_
:: forall c t f a a' a''
. (Traversable1 c t, Applicative f)
=> (a -> f a')
-> (forall t' . c t' => t' a -> f a'')
-> t a
-> f ()
traverse1_ f g = getAp . foldMap1 @c (Ap . void . f) (Ap . void . g)
for1_
:: forall c t f a a' a''
. (Traversable1 c t, Applicative f)
=> t a
-> (a -> f a')
-> (forall t' . c t' => t' a -> f a'')
-> f ()
for1_ t f g = getAp $ foldMap1 @c (Ap . void . f) (Ap . void . g) t
foldMap1 :: forall c t b a . (Traversable1 c t, Monoid b) => (a -> b) -> (forall t' . c t' => t' a -> b) -> t a -> b
foldMap1 f g = getConst . traverse1 @c (Const . f) (Const . g)
-- | @'Generics' t@ has a 'Traversable1' instance when @'Rep1' t@ has a 'GTraversable1' instance, making this convenient for applying 'traverse1' to 'Generic1' types lacking 'Traversable1' instances:
--
-- @
-- 'getGenerics' '<$>' 'traverse1' f g ('Generics' t) = 'to1' '<$>' 'gtraverse1' f g ('from1' t)
-- @
--
-- It further defines its 'Foldable', 'Functor', and 'Traversable' instances using 'Traversable1', making it suitable for deriving with @-XDerivingVia@.
newtype Generics t a = Generics { getGenerics :: t a }
instance (Generic1 t, GTraversable1 Foldable (Rep1 t)) => Foldable (Generics t) where
foldMap = foldMapDefault1
instance (Generic1 t, GTraversable1 Functor (Rep1 t)) => Functor (Generics t) where
fmap = fmapDefault1
instance (Generic1 t, GTraversable1 Foldable (Rep1 t), GTraversable1 Functor (Rep1 t), GTraversable1 Traversable (Rep1 t)) => Traversable (Generics t) where
traverse = traverseDefault1
instance (Generic1 t, GTraversable1 c (Rep1 t)) => Traversable1 c (Generics t) where
traverse1 f g = fmap (Generics . to1) . gtraverse1 @c f g . from1 . getGenerics

View File

@ -0,0 +1,100 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
-- | This module defines the 'Traversable1' class and its generic derivation using 'GTraversable1'. Note that any changes to this file will require recompilation of all of the AST modules, which is quite expensive; thus, most additions should be made in "AST.Traversable1" instead, and that that module should not be imported by the AST modules.
module AST.Traversable1.Class
( Traversable1(..)
, foldMapDefault1
, fmapDefault1
, traverseDefault1
, GTraversable1(..)
) where
import Data.Functor.Const
import Data.Functor.Identity
import GHC.Generics
-- | Simultaneous traversal of subterms of kind @*@ and @* -> *@ in an 'Applicative' context.
--
-- 'Traversable1' can express any combination of first- and second-order mapping, folding, and traversal.
--
-- Note that the @1@ suffix is used in the manner of 'Data.Functor.Classes.Show1' or 'Generic1', rather than 'foldr1'; its a higher-order traversal which is simultaneously able to traverse (and alter) annotations.
class Traversable1 c t where
-- | Map annotations of kind @*@ and heterogeneously-typed subterms of kind @* -> *@ under some constraint @c@ into an 'Applicative' context. The constraint is necessary to operate on otherwise universally-quantified subterms, since otherwise there would be insufficient information to inspect them at all.
--
-- No proxy is provided for the constraint @c@; instead, @-XTypeApplications@ should be used. E.g. here we ignore the annotations and print all the @* -> *@ subterms using 'Show1':
--
-- @
-- 'traverse1' \@'Data.Functor.Classes.Show1' 'pure' (\ t -> t '<$' 'putStrLn' ('Data.Functor.Classes.showsPrec1' 0 t ""))
-- @
--
-- Note that this traversal is non-recursive: any recursion through subterms must be performed by the second function argument.
traverse1
:: Applicative f
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
default traverse1
:: (Applicative f, Generic1 t, GTraversable1 c (Rep1 t))
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
traverse1 f g = fmap to1 . gtraverse1 @c f g . from1
-- | This function may be used as a value for 'foldMap' in a 'Foldable' instance.
foldMapDefault1 :: (Traversable1 Foldable t, Monoid b) => (a -> b) -> t a -> b
foldMapDefault1 f = getConst . traverse1 @Foldable (Const . f) (Const . foldMap f)
-- | This function may be used as a value for 'fmap' in a 'Functor' instance.
fmapDefault1 :: Traversable1 Functor t => (a -> b) -> t a -> t b
fmapDefault1 f = runIdentity . traverse1 @Functor (Identity . f) (Identity . fmap f)
-- | This function may be used as a value for 'traverse' in a 'Traversable' instance.
traverseDefault1 :: (Traversable1 Traversable t, Applicative f) => (a -> f b) -> t a -> f (t b)
traverseDefault1 f = traverse1 @Traversable f (traverse f)
class GTraversable1 c t where
-- | Generically map annotations and subterms of kind @* -> *@ into an 'Applicative' context.
gtraverse1
:: Applicative f
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
instance GTraversable1 c f => GTraversable1 c (M1 i c' f) where
gtraverse1 f g = fmap M1 . gtraverse1 @c f g . unM1
instance (GTraversable1 c f, GTraversable1 c g) => GTraversable1 c (f :*: g) where
gtraverse1 f g (l :*: r) = (:*:) <$> gtraverse1 @c f g l <*> gtraverse1 @c f g r
instance (GTraversable1 c f, GTraversable1 c g) => GTraversable1 c (f :+: g) where
gtraverse1 f g (L1 l) = L1 <$> gtraverse1 @c f g l
gtraverse1 f g (R1 r) = R1 <$> gtraverse1 @c f g r
instance GTraversable1 c (K1 R t) where
gtraverse1 _ _ (K1 k) = pure (K1 k)
instance GTraversable1 c Par1 where
gtraverse1 f _ (Par1 a) = Par1 <$> f a
instance c t => GTraversable1 c (Rec1 t) where
gtraverse1 _ g (Rec1 t) = Rec1 <$> g t
instance (Traversable f, GTraversable1 c g) => GTraversable1 c (f :.: g) where
gtraverse1 f g = fmap Comp1 . traverse (gtraverse1 @c f g) . unComp1
instance GTraversable1 c U1 where
gtraverse1 _ _ _ = pure U1

View File

@ -0,0 +1,5 @@
# Revision history for semantic-codegen
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

4
semantic-codegen/Main.hs Normal file
View File

@ -0,0 +1,4 @@
module Main where
main :: IO ()
main = putStrLn "Hello, Haskell!"

216
semantic-codegen/README.md Normal file
View File

@ -0,0 +1,216 @@
# CodeGen Documentation
CodeGen is the process for auto-generating language-specific, strongly-typed ASTs to be used in [Semantic](https://github.com/github/semantic-code/blob/d9f91a05dc30a61b9ff8c536d75661d417f3c506/design-docs/precise-code-navigation.md).
_Note: This project was recently moved from `tree-sitter` into `Semantic`. These docs are in the process of being updated to reflect changes._
### Prerequisites
To get started, first make sure your language has:
1. An existing [tree-sitter](http://tree-sitter.github.io/tree-sitter/) parser;
2. An existing Cabal package in this repository for said language. This will provide an interface into tree-sitter's C source. [Here](https://github.com/tree-sitter/haskell-tree-sitter/tree/master/tree-sitter-python) is an example of a library for Python, a supported language that the remaining documentation will refer to.
### CodeGen Pipeline
During parser generation, tree-sitter produces a JSON file that captures the structure of a language's grammar. Based on this, we're able to derive datatypes representing surface languages, and then use those datatypes to generically build ASTs. This automates the engineering effort [historically required for adding a new language](https://github.com/github/semantic/blob/master/docs/adding-new-languages.md).
The following steps provide a high-level outline of the process:
1. [**Deserialize.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Deserialize.hs) First, we deserialize the `node-types.json` file for a given language into the desired shape of datatypes via parsing capabilities afforded by the [Aeson](http://hackage.haskell.org/package/aeson) library. There are four distinct types represented in the node-types.json file takes on: sums, products, named leaves and anonymous leaves.
2. [**Generate Syntax.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/GenerateSyntax.hs) We then use Template Haskell to auto-generate language-specific, strongly-typed datatypes that represent various language constructs. This API exports the top-level function `astDeclarationsForLanguage` to auto-generate datatypes at compile-time, which is is invoked by a given language [AST](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/AST.hs) module.
3. [**Unmarshal.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Unmarshal.hs) Unmarshaling is the process of iterating over tree-sitters parse trees using its tree cursor API, and producing Haskell ASTs for the relevant nodes. We parse source code from tree-sitter and unmarshal the data we get to build these ASTs generically. This file exports the top-level function `parseByteString`, which takes source code and a language as arguments, and produces an AST.
Here is an example that describes the relationship between a Python identifier represented in the tree-sitter generated JSON file, and a datatype generated by Template Haskell based on the provided JSON:
| Type | JSON | TH-generated code |
|----------|--------------|------------|
|Named leaf|<code>{<br>"type": "identifier",<br>"named": true<br>}|<code>data TreeSitter.Python.AST.Identifier a<br>= TreeSitter.Python.AST.Identifier {TreeSitter.Python.AST.ann :: a,<br>TreeSitter.Python.AST.bytes :: text-1.2.3.1:Data.Text.Internal.Text} -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Show a => Show (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Ord a => Ord (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Eq a => Eq (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Traversable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Functor TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Foldable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Unmarshal TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance SymbolMatching TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1|
The remaining document provides more details on generating ASTs, inspecting datatypes, tests, and information on decisions pertaining to relevant APIs.
___
### Table of Contents
- [CodeGen Documentation](#codegen-documentation)
- [Prerequisites](#prerequisites)
- [CodeGen Pipeline](#codegen-pipeline)
- [Table of Contents](#table-of-contents)
- [Generating ASTs](#generating-asts)
- [Inspecting auto-generated datatypes](#inspecting-auto-generated-datatypes)
- [Tests](#tests)
- [Additional notes](#additional-notes)
___
### Generating ASTs
To parse source code and produce ASTs locally:
1. Load the REPL for a given language:
```
cabal new-repl lib:tree-sitter-python
```
2. Set language extensions, `OverloadedStrings` and `TypeApplications`, and import relevant modules, `AST.Unmarshal`, `Source.Range` and `Source.Span`:
```
:seti -XOverloadedStrings
:seti -XTypeApplications
import Source.Span
import Source.Range
import AST.Unmarshal
```
3. You can now call `parseByteString`, passing in the desired language you wish to parse (in this case Python exemplified by `tree_sitter_python`), and the source code (in this case an integer `1`). Since the function is constrained by `(Unmarshal t, UnmarshalAnn a)`, you can use type applications to provide a top-level node `t`, an entry point into the tree, in addition to a polymorphic annotation `a` used to represent range and span:
```
parseByteString @TreeSitter.Python.AST.Module @(Source.Span.Span, Source.Range.Range) tree_sitter_python "1"
```
This generates the following AST:
```
Right
( Module
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, extraChildren =
[ R1
( SimpleStatement
( L1
( R1
( R1
( L1
( ExpressionStatement
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, extraChildren = L1
( L1
( Expression
( L1
( L1
( L1
( PrimaryExpression
( R1
( L1
( L1
( L1
( Integer
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, text = "1"
}
)
)
)
)
)
)
)
)
)
)
) :| []
}
)
)
)
)
)
)
]
}
)
```
### Inspecting auto-generated datatypes
Datatypes are derived from a language and its `node-types.json` file using the GenerateSyntax API. Definition can be viewed in the REPL just as they would for any other datatype, using `:i`:
```
:i TreeSitter.Python.AST.Module
```
This shows us the auto-generated `Module` datatype:
```Haskell
data TreeSitter.Python.AST.Module a
= TreeSitter.Python.AST.Module {TreeSitter.Python.AST.ann :: a,
TreeSitter.Python.AST.extraChildren :: [(GHC.Generics.:+:)
TreeSitter.Python.AST.CompoundStatement
TreeSitter.Python.AST.SimpleStatement
a]}
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Show a => Show (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Ord a => Ord (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Eq a => Eq (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Traversable TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Functor TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Foldable TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Unmarshal TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance SymbolMatching TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
```
### Tests
As of right now, Hedgehog tests are minimal and only in place for the Python library.
To run tests:
`cabal v2-test tree-sitter-python`
### Additional notes
- [GenerateSyntax](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/GenerateSyntax.hs) provides a way to pre-define certain datatypes for which Template Haskell is not used. Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. While this gives us flexibility, we encourage that this is used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into `Integer`s), and may require defining `TS.UnmarshalAnn` or `TS.SymbolMatching` instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual `Foldable`, `Functor`, etc. instances provided for generated datatypes.
- Annotations are captured by a polymorphic parameter `a`
- [Unmarshal](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/Unmarshal.hs) defines both generic and non-generic classes. This is because generic behaviors are different than what we get non-generically, and in the case of ` Maybe`, `[]`—we actually preference doing things non-generically. Since `[]` is a sum, the generic behavior for `:+:` would be invoked and expect that wed have repetitions represented in the parse tree as right-nested singly-linked lists (ex., `(a (b (c (d…))))`) rather than as just consecutive sibling nodes (ex., `(a b c ...d)`, which is what our trees have). We want to match the latter.

View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,83 @@
cabal-version: 2.4
-- Initial package description 'semantic-codegen.cabal' generated by 'cabal
-- init'. For further documentation, see
-- http://haskell.org/cabal/users-guide/
name: semantic-codegen
version: 0.1.0.0
-- synopsis:
-- description:
-- bug-reports:
license: MIT
license-file: LICENSE
author: The Semantic Authors
maintainer: opensource+semantic@github.com
copyright: (c) 2019 GitHub, Inc.
category: Language
extra-source-files: CHANGELOG.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
library
import: haskell
exposed-modules: AST.Deserialize
AST.GenerateSyntax
AST.Grammar.TH
AST.Token
AST.Unmarshal
-- other-modules:
-- other-extensions:
build-depends: base >= 4.13
, aeson ^>= 1.4.2.0
, bytestring ^>= 0.10.8.2
, tree-sitter ^>= 0.8
, fused-effects ^>= 1.0
, semantic-ast
, semantic-source ^>= 0.0.2
, template-haskell ^>= 2.15
, text ^>= 1.2.3.1
, unordered-containers ^>= 0.2.10
, containers >= 0.6.0.1
, text ^>= 1.2.3.1
, filepath ^>= 1.4.1
, directory ^>= 1.3.3.2
hs-source-dirs: src
default-language: Haskell2010
executable semantic-codegen
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base
, tree-sitter
, semantic-source
, bytestring
, aeson
, bytestring
, text
, unordered-containers
, containers
, filepath
default-language: Haskell2010
-- hs-source-dirs:
default-language: Haskell2010

View File

@ -0,0 +1,133 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveLift #-}
-- Turn off partial field warnings for Datatype.
{-# OPTIONS_GHC -Wno-partial-fields #-}
module AST.Deserialize
( Datatype (..)
, Field (..)
, Children(..)
, Required (..)
, Type (..)
, DatatypeName (..)
, Named (..)
, Multiple (..)
) where
import Data.Aeson as Aeson
import Data.Aeson.Types
import Data.Char
import GHC.Generics hiding (Constructor, Datatype)
import Language.Haskell.TH.Syntax (Lift)
import Data.Text (Text, unpack)
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe)
-- Types to deserialize into:
data Datatype
= SumType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
, datatypeSubtypes :: NonEmpty Type
}
| ProductType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
, datatypeChildren :: Maybe Children
, datatypeFields :: [(String, Field)]
}
| LeafType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Datatype where
parseJSON = withObject "Datatype" $ \v -> do
type' <- v .: "type"
named <- v .: "named"
subtypes <- v .:? "subtypes"
case subtypes of
Nothing -> do
fields <- fmap (fromMaybe HM.empty) (v .:? "fields")
children <- v .:? "children"
if null fields && null children then
pure (LeafType type' named)
else
ProductType type' named children <$> parseKVPairs (HM.toList fields)
Just subtypes -> pure (SumType type' named subtypes)
-- | Transforms list of key-value pairs to a Parser
parseKVPairs :: [(Text, Value)] -> Parser [(String, Field)]
parseKVPairs = traverse go
where go :: (Text, Value) -> Parser (String, Field)
go (t,v) = do
v' <- parseJSON v
pure (unpack t, v')
data Field = MkField
{ fieldRequired :: Required
, fieldTypes :: NonEmpty Type
, fieldMultiple :: Multiple
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Field where
parseJSON = genericParseJSON customOptions
newtype Children = MkChildren Field
deriving (Eq, Ord, Show, Generic)
deriving newtype (ToJSON, FromJSON)
data Required = Optional | Required
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Required where
parseJSON = withBool "Required" (\p -> pure (if p then Required else Optional))
data Type = MkType
{ fieldType :: DatatypeName
, isNamed :: Named
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Type where
parseJSON = genericParseJSON customOptions
newtype DatatypeName = DatatypeName { getDatatypeName :: String }
deriving (Eq, Ord, Show, Generic)
deriving newtype (FromJSON, ToJSON)
data Named = Anonymous | Named
deriving (Eq, Ord, Show, Generic, ToJSON, Lift)
instance FromJSON Named where
parseJSON = withBool "Named" (\p -> pure (if p then Named else Anonymous))
data Multiple = Single | Multiple
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Multiple where
parseJSON = withBool "Multiple" (\p -> pure (if p then Multiple else Single))
customOptions :: Aeson.Options
customOptions = Aeson.defaultOptions
{
fieldLabelModifier = initLower . dropPrefix
, constructorTagModifier = initLower
}
dropPrefix :: String -> String
dropPrefix = Prelude.dropWhile isLower
initLower :: String -> String
initLower (c:cs) = toLower c : cs
initLower "" = ""

View File

@ -0,0 +1,208 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module AST.GenerateSyntax
( syntaxDatatype
, astDeclarationsForLanguage
) where
import AST.Deserialize (Children (..), Datatype (..), DatatypeName (..), Field (..), Multiple (..), Named (..), Required (..), Type (..))
import AST.Token
import AST.Traversable1.Class
import qualified AST.Unmarshal as TS
import Data.Aeson hiding (String)
import Data.Foldable
import Data.List
import Data.List.NonEmpty (NonEmpty (..))
import Data.Text (Text)
import Foreign.C.String
import Foreign.Ptr
import GHC.Generics hiding (Constructor, Datatype)
import GHC.Records
import Language.Haskell.TH as TH
import Language.Haskell.TH.Syntax as TH
import System.Directory
import System.FilePath.Posix
import qualified TreeSitter.Language as TS
import TreeSitter.Node
import TreeSitter.Symbol (TSSymbol, toHaskellCamelCaseIdentifier, toHaskellPascalCaseIdentifier)
-- | Derive Haskell datatypes from a language and its @node-types.json@ file.
--
-- Datatypes will be generated according to the specification in the @node-types.json@ file, with anonymous leaf types defined as synonyms for the 'Token' datatype.
--
-- Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. Note that this should be used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into 'Integer's), and may require defining 'TS.UnmarshalAnn' or 'TS.SymbolMatching' instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual 'Foldable', 'Functor', etc. instances provided for generated datatypes.
astDeclarationsForLanguage :: Ptr TS.Language -> FilePath -> Q [Dec]
astDeclarationsForLanguage language filePath = do
_ <- TS.addDependentFileRelative filePath
currentFilename <- loc_filename <$> location
pwd <- runIO getCurrentDirectory
let invocationRelativePath = takeDirectory (pwd </> currentFilename) </> filePath
input <- runIO (eitherDecodeFileStrict' invocationRelativePath) >>= either fail pure
allSymbols <- runIO (getAllSymbols language)
debugSymbolNames <- [d|
debugSymbolNames :: [String]
debugSymbolNames = $(listE (map (litE . stringL . debugPrefix) allSymbols))
|]
(debugSymbolNames <>) . concat @[] <$> traverse (syntaxDatatype language allSymbols) input
-- Build a list of all symbols
getAllSymbols :: Ptr TS.Language -> IO [(String, Named)]
getAllSymbols language = do
count <- TS.ts_language_symbol_count language
traverse getSymbol [(0 :: TSSymbol) .. fromIntegral (pred count)]
where
getSymbol i = do
cname <- TS.ts_language_symbol_name language i
n <- peekCString cname
t <- TS.ts_language_symbol_type language i
let named = if t == 0 then Named else Anonymous
pure (n, named)
-- Auto-generate Haskell datatypes for sums, products and leaf types
syntaxDatatype :: Ptr TS.Language -> [(String, Named)] -> Datatype -> Q [Dec]
syntaxDatatype language allSymbols datatype = skipDefined $ do
typeParameterName <- newName "a"
case datatype of
SumType (DatatypeName _) _ subtypes -> do
types' <- fieldTypesToNestedSum subtypes
let fieldName = mkName ("get" <> nameStr)
con <- recC name [TH.varBangType fieldName (TH.bangType strictness (pure types' `appT` varT typeParameterName))]
hasFieldInstance <- makeHasFieldInstance (conT name) (varT typeParameterName) (varE fieldName)
traversalInstances <- makeTraversalInstances (conT name)
pure
( NewtypeD [] name [PlainTV typeParameterName] Nothing con [deriveGN, deriveStockClause, deriveAnyClassClause]
: hasFieldInstance
<> traversalInstances)
ProductType (DatatypeName datatypeName) named children fields -> do
con <- ctorForProductType datatypeName typeParameterName children fields
symbolMatchingInstance <- symbolMatchingInstance allSymbols name named datatypeName
traversalInstances <- makeTraversalInstances (conT name)
pure
( generatedDatatype name [con] typeParameterName
: symbolMatchingInstance
<> traversalInstances)
-- Anonymous leaf types are defined as synonyms for the `Token` datatype
LeafType (DatatypeName datatypeName) Anonymous -> do
tsSymbol <- runIO $ withCStringLen datatypeName (\(s, len) -> TS.ts_language_symbol_for_name language s len False)
pure [ TySynD name [] (ConT ''Token `AppT` LitT (StrTyLit datatypeName) `AppT` LitT (NumTyLit (fromIntegral tsSymbol))) ]
LeafType (DatatypeName datatypeName) Named -> do
con <- ctorForLeafType (DatatypeName datatypeName) typeParameterName
symbolMatchingInstance <- symbolMatchingInstance allSymbols name Named datatypeName
traversalInstances <- makeTraversalInstances (conT name)
pure
( generatedDatatype name [con] typeParameterName
: symbolMatchingInstance
<> traversalInstances)
where
-- Skip generating datatypes that have already been defined (overridden) in the module where the splice is running.
skipDefined m = do
isLocal <- lookupTypeName nameStr >>= maybe (pure False) isLocalName
if isLocal then pure [] else m
name = mkName nameStr
nameStr = toNameString (datatypeNameStatus datatype) (getDatatypeName (AST.Deserialize.datatypeName datatype))
deriveStockClause = DerivClause (Just StockStrategy) [ ConT ''Eq, ConT ''Ord, ConT ''Show, ConT ''Generic, ConT ''Generic1]
deriveAnyClassClause = DerivClause (Just AnyclassStrategy) [ConT ''TS.Unmarshal, ConT ''Traversable1 `AppT` VarT (mkName "someConstraint")]
deriveGN = DerivClause (Just NewtypeStrategy) [ConT ''TS.SymbolMatching]
generatedDatatype name cons typeParameterName = DataD [] name [PlainTV typeParameterName] Nothing cons [deriveStockClause, deriveAnyClassClause]
makeTraversalInstances :: TypeQ -> Q [Dec]
makeTraversalInstances ty =
[d|
instance Foldable $ty where
foldMap = foldMapDefault1
instance Functor $ty where
fmap = fmapDefault1
instance Traversable $ty where
traverse = traverseDefault1
|]
makeHasFieldInstance :: TypeQ -> TypeQ -> ExpQ -> Q [Dec]
makeHasFieldInstance ty param elim =
[d|instance HasField "ann" $(ty `appT` param) $param where
getField = TS.gann . $elim |]
-- | Create TH-generated SymbolMatching instances for sums, products, leaves
symbolMatchingInstance :: [(String, Named)] -> Name -> Named -> String -> Q [Dec]
symbolMatchingInstance allSymbols name named str = do
let tsSymbols = elemIndices (str, named) allSymbols
names = intercalate ", " $ fmap (debugPrefix . (!!) allSymbols) tsSymbols
[d|instance TS.SymbolMatching $(conT name) where
matchedSymbols _ = tsSymbols
showFailure _ node = "expected " <> $(litE (stringL names))
<> " but got " <> if nodeSymbol node == 65535 then "ERROR" else genericIndex debugSymbolNames (nodeSymbol node)
<> " [" <> show r1 <> ", " <> show c1 <> "] -"
<> " [" <> show r2 <> ", " <> show c2 <> "]"
where TSPoint r1 c1 = nodeStartPoint node
TSPoint r2 c2 = nodeEndPoint node|]
-- | Prefix symbol names for debugging to disambiguate between Named and Anonymous nodes.
debugPrefix :: (String, Named) -> String
debugPrefix (name, Named) = name
debugPrefix (name, Anonymous) = "_" <> name
-- | Build Q Constructor for product types (nodes with fields)
ctorForProductType :: String -> Name -> Maybe Children -> [(String, Field)] -> Q Con
ctorForProductType constructorName typeParameterName children fields = ctorForTypes constructorName lists where
lists = annotation : fieldList <> childList
annotation = ("ann", varT typeParameterName)
fieldList = map (fmap toType) fields
childList = toList $ fmap toTypeChild children
toType (MkField required fieldTypes mult) =
let ftypes = fieldTypesToNestedSum fieldTypes `appT` varT typeParameterName
in case (required, mult) of
(Required, Multiple) -> appT (conT ''NonEmpty) ftypes
(Required, Single) -> ftypes
(Optional, Multiple) -> appT (conT ''[]) ftypes
(Optional, Single) -> appT (conT ''Maybe) ftypes
toTypeChild (MkChildren field) = ("extra_children", toType field)
-- | Build Q Constructor for leaf types (nodes with no fields or subtypes)
ctorForLeafType :: DatatypeName -> Name -> Q Con
ctorForLeafType (DatatypeName name) typeParameterName = ctorForTypes name
[ ("ann", varT typeParameterName) -- ann :: a
, ("text", conT ''Text) -- text :: Text
]
-- | Build Q Constructor for records
ctorForTypes :: String -> [(String, Q TH.Type)] -> Q Con
ctorForTypes constructorName types = recC (toName Named constructorName) recordFields where
recordFields = map (uncurry toVarBangType) types
toVarBangType str type' = TH.varBangType (mkName . toHaskellCamelCaseIdentifier $ str) (TH.bangType strictness type')
-- | Convert field types to Q types
fieldTypesToNestedSum :: NonEmpty AST.Deserialize.Type -> Q TH.Type
fieldTypesToNestedSum xs = go (toList xs)
where
combine lhs rhs = (conT ''(:+:) `appT` lhs) `appT` rhs -- (((((a :+: b) :+: c) :+: d)) :+: e) ((a :+: b) :+: (c :+: d))
convertToQType (MkType (DatatypeName n) named) = conT (toName named n)
go [x] = convertToQType x
go xs = let (l,r) = splitAt (length xs `div` 2) xs in combine (go l) (go r)
-- | Create bang required to build records
strictness :: BangQ
strictness = TH.bang noSourceUnpackedness noSourceStrictness
-- | Prepend "Anonymous" to named node when false, otherwise use regular toName
toName :: Named -> String -> Name
toName named str = mkName (toNameString named str)
toNameString :: Named -> String -> String
toNameString named str = prefix named <> toHaskellPascalCaseIdentifier str
where
prefix Anonymous = "Anonymous"
prefix Named = ""
-- | Get the 'Module', if any, for a given 'Name'.
moduleForName :: Name -> Maybe Module
moduleForName n = Module . PkgName <$> namePackage n <*> (ModName <$> nameModule n)
-- | Test whether the name is defined in the module where the splice is executed.
isLocalName :: Name -> Q Bool
isLocalName n = (moduleForName n ==) . Just <$> thisModule

View File

@ -0,0 +1,83 @@
{-# LANGUAGE DataKinds, DeriveAnyClass, DeriveGeneric, DuplicateRecordFields, TypeOperators #-}
module AST.Grammar.Examples () where
import Control.Effect.Reader
import Control.Monad.Fail
import qualified Data.ByteString as B
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import GHC.Generics ((:+:), Generic1)
import Numeric (readDec)
import Prelude hiding (fail)
import Source.Range
import AST.Token
import AST.Unmarshal
-- | An example of a sum-of-products datatype.
newtype Expr a = Expr ((If :+: Block :+: Var :+: Lit :+: Bin) a)
deriving (Generic1, Unmarshal)
instance SymbolMatching Expr where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Product with multiple fields.
data If a = If { ann :: a, condition :: Expr a, consequence :: Expr a, alternative :: Maybe (Expr a) }
deriving (Generic1, Unmarshal)
instance SymbolMatching If where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Single-field product.
data Block a = Block { ann :: a, body :: [Expr a] }
deriving (Generic1, Unmarshal)
instance SymbolMatching Block where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Leaf node.
data Var a = Var { ann :: a, text :: Text.Text }
deriving (Generic1, Unmarshal)
instance SymbolMatching Var where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Custom leaf node.
data Lit a = Lit { ann :: a, lit :: IntegerLit }
deriving (Generic1, Unmarshal)
instance SymbolMatching Lit where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Product with anonymous sum field.
data Bin a = Bin { ann :: a, lhs :: Expr a, op :: (AnonPlus :+: AnonTimes) a, rhs :: Expr a }
deriving (Generic1, Unmarshal)
instance SymbolMatching Bin where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Anonymous leaf node.
type AnonPlus = Token "+" 0
-- | Anonymous leaf node.
type AnonTimes = Token "*" 1
newtype IntegerLit = IntegerLit Integer
instance UnmarshalAnn IntegerLit where
unmarshalAnn node = do
Range start end <- unmarshalAnn node
bytestring <- asks source
let drop = B.drop start
take = B.take (end - start)
slice = take . drop
str = Text.unpack (Text.decodeUtf8 (slice bytestring))
case readDec str of
(i, _):_ -> pure (IntegerLit i)
_ -> fail ("could not parse '" <> str <> "'")

View File

@ -0,0 +1,33 @@
{-# LANGUAGE TemplateHaskell #-}
module AST.Grammar.TH
( mkStaticallyKnownRuleGrammarData
) where
import Data.Ix (Ix)
import Data.List (mapAccumL)
import qualified Data.Set as Set
import Foreign.Ptr
import Language.Haskell.TH
import Language.Haskell.TH.Syntax
import TreeSitter.Symbol
import TreeSitter.Language (Language, languageSymbols)
-- | TemplateHaskell construction of a datatype for the referenced Language.
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData :: Name -> Ptr Language -> Q [Dec]
mkStaticallyKnownRuleGrammarData name language = do
symbols <- renameDups . map ((,) . fst <*> uncurry symbolToName) . (++ [(Regular, "ParseError")]) <$> runIO (languageSymbols language)
Module _ modName <- thisModule
let mkMatch symbolType str = match (conP (Name (OccName str) (NameQ modName)) []) (normalB [e|symbolType|]) []
datatype <- dataD (pure []) name [] Nothing (flip normalC [] . mkName . snd <$> symbols)
[ derivClause Nothing (map conT [ ''Bounded, ''Enum, ''Eq, ''Ix, ''Ord, ''Show ]) ]
symbolInstance <- [d|
instance Symbol $(conT name) where
symbolType = $(lamCaseE (uncurry mkMatch <$> symbols)) |]
pure (datatype : symbolInstance)
renameDups :: [(a, String)] -> [(a, String)]
renameDups = snd . mapAccumL go mempty
where go done (ty, name) = let name' = rename name in (Set.insert name' done, (ty, name'))
where rename name | name `Set.member` done = rename (name ++ "'")
| otherwise = name

View File

@ -0,0 +1,17 @@
{-# LANGUAGE DataKinds, DeriveGeneric, DeriveTraversable, KindSignatures #-}
module AST.Token
( Token(..)
) where
import GHC.Generics (Generic, Generic1)
import GHC.TypeLits (Symbol, Nat)
-- | An AST node representing a token, indexed by its name and numeric value.
--
-- For convenience, token types are typically used via type synonyms, e.g.:
--
-- @
-- type AnonymousPlus = Token "+" 123
-- @
newtype Token (symName :: Symbol) (symVal :: Nat) a = Token { ann :: a }
deriving (Eq, Foldable, Functor, Generic, Generic1, Ord, Show, Traversable)

View File

@ -0,0 +1,405 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module AST.Unmarshal
( parseByteString
, UnmarshalState(..)
, UnmarshalError(..)
, FieldName(..)
, Unmarshal(..)
, UnmarshalAnn(..)
, UnmarshalField(..)
, SymbolMatching(..)
, Match(..)
, hoist
, lookupSymbol
, unmarshalNode
, GHasAnn(..)
) where
import Control.Algebra (send)
import Control.Carrier.Reader hiding (asks)
import Control.Exception
import Control.Monad.IO.Class
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.Coerce
import Data.Foldable (toList)
import qualified Data.IntMap as IntMap
import Data.List.NonEmpty (NonEmpty (..))
import Data.Proxy
import qualified Data.Text as Text
import Data.Text.Encoding
import Data.Text.Encoding.Error (lenientDecode)
import Foreign.C.String
import Foreign.Marshal.Array
import Foreign.Marshal.Utils
import Foreign.Ptr
import Foreign.Storable
import GHC.Generics
import GHC.Records
import GHC.TypeLits
import Source.Loc
import Source.Span
import TreeSitter.Cursor as TS
import TreeSitter.Language as TS
import TreeSitter.Node as TS
import TreeSitter.Parser as TS
import AST.Token as TS
import TreeSitter.Tree as TS
asks :: Has (Reader r) sig m => (r -> r') -> m r'
asks f = send (Ask (pure . f))
{-# INLINE asks #-}
-- Parse source code and produce AST
parseByteString :: (Unmarshal t, UnmarshalAnn a) => Ptr TS.Language -> ByteString -> IO (Either String (t a))
parseByteString language bytestring = withParser language $ \ parser -> withParseTree parser bytestring $ \ treePtr ->
if treePtr == nullPtr then
pure (Left "error: didn't get a root node")
else
withRootNode treePtr $ \ rootPtr ->
withCursor (castPtr rootPtr) $ \ cursor ->
(Right <$> runReader (UnmarshalState bytestring cursor) (liftIO (peek rootPtr) >>= unmarshalNode))
`catch` (pure . Left . getUnmarshalError)
newtype UnmarshalError = UnmarshalError { getUnmarshalError :: String }
deriving (Show)
instance Exception UnmarshalError
data UnmarshalState = UnmarshalState
{ source :: {-# UNPACK #-} !ByteString
, cursor :: {-# UNPACK #-} !(Ptr Cursor)
}
type MatchM = ReaderC UnmarshalState IO
newtype Match t = Match
{ runMatch :: forall a . UnmarshalAnn a => Node -> MatchM (t a)
}
-- | A church-encoded binary tree with constant-time 'singleton', 'mempty', '<>', and 'fmap', and linear-time 'foldMap'.
newtype B a = B (forall r . (r -> r -> r) -> (a -> r) -> r -> r)
instance Functor B where
fmap f (B run) = B (\ fork leaf -> run fork (leaf . f))
{-# INLINE fmap #-}
a <$ B run = B (\ fork leaf -> run fork (leaf . const a))
{-# INLINE (<$) #-}
instance Semigroup (B a) where
B l <> B r = B (\ fork leaf nil -> fork (l fork leaf nil) (r fork leaf nil))
{-# INLINE (<>) #-}
instance Monoid (B a) where
mempty = B (\ _ _ nil -> nil)
{-# INLINE mempty #-}
instance Foldable B where
foldMap f (B run) = run (<>) f mempty
{-# INLINE foldMap #-}
singleton :: a -> B a
singleton a = B (\ _ leaf _ -> leaf a)
{-# INLINE singleton #-}
hoist :: (forall x . t x -> t' x) -> Match t -> Match t'
hoist f (Match run) = Match (fmap f . run)
{-# INLINE hoist #-}
lookupSymbol :: TSSymbol -> IntMap.IntMap a -> Maybe a
lookupSymbol sym map = IntMap.lookup (fromIntegral sym) map
{-# INLINE lookupSymbol #-}
-- | Unmarshal a node
unmarshalNode :: forall t a .
( UnmarshalAnn a
, Unmarshal t
)
=> Node
-> MatchM (t a)
unmarshalNode node = case lookupSymbol (nodeSymbol node) matchers' of
Just t -> runMatch t node
Nothing -> liftIO . throwIO . UnmarshalError $ showFailure (Proxy @t) node
{-# INLINE unmarshalNode #-}
-- | Unmarshalling is the process of iterating over tree-sitters parse trees using its tree cursor API and producing Haskell ASTs for the relevant nodes.
--
-- Datatypes which can be constructed from tree-sitter parse trees may use the default definition of 'matchers' providing that they have a suitable 'Generic1' instance.
class SymbolMatching t => Unmarshal t where
matchers' :: IntMap.IntMap (Match t)
matchers' = IntMap.fromList (toList matchers)
matchers :: B (Int, Match t)
default matchers :: (Generic1 t, GUnmarshal (Rep1 t)) => B (Int, Match t)
matchers = foldMap (singleton . (, match)) (matchedSymbols (Proxy @t))
where match = Match $ \ node -> do
cursor <- asks cursor
goto cursor (nodeTSNode node)
fmap to1 (gunmarshalNode node)
instance (Unmarshal f, Unmarshal g) => Unmarshal (f :+: g) where
matchers = fmap (fmap (hoist L1)) matchers <> fmap (fmap (hoist R1)) matchers
instance Unmarshal t => Unmarshal (Rec1 t) where
matchers = coerce (matchers @t)
instance (KnownNat n, KnownSymbol sym) => Unmarshal (Token sym n) where
matchers = singleton (fromIntegral (natVal (Proxy @n)), Match (fmap Token . unmarshalAnn))
-- | Unmarshal an annotation field.
--
-- Leaf nodes have 'Text.Text' fields, and leaves, anonymous leaves, and products all have parametric annotation fields. All of these fields are unmarshalled using the metadata of the node, e.g. its start/end bytes, without reference to any child nodes it may contain.
class UnmarshalAnn a where
unmarshalAnn
:: Node
-> MatchM a
instance UnmarshalAnn () where
unmarshalAnn _ = pure ()
instance UnmarshalAnn Text.Text where
unmarshalAnn node = do
range <- unmarshalAnn node
asks (decodeUtf8With lenientDecode . slice range . source)
-- | Instance for pairs of annotations
instance (UnmarshalAnn a, UnmarshalAnn b) => UnmarshalAnn (a,b) where
unmarshalAnn node = (,)
<$> unmarshalAnn @a node
<*> unmarshalAnn @b node
instance UnmarshalAnn Loc where
unmarshalAnn node = Loc
<$> unmarshalAnn @Range node
<*> unmarshalAnn @Span node
instance UnmarshalAnn Range where
unmarshalAnn node = do
let start = fromIntegral (nodeStartByte node)
end = fromIntegral (nodeEndByte node)
pure (Range start end)
instance UnmarshalAnn Span where
unmarshalAnn node = do
let spanStart = pointToPos (nodeStartPoint node)
spanEnd = pointToPos (nodeEndPoint node)
pure (Span spanStart spanEnd)
pointToPos :: TSPoint -> Pos
pointToPos (TSPoint line column) = Pos (fromIntegral line) (fromIntegral column)
-- | Optional/repeated fields occurring in product datatypes are wrapped in type constructors, e.g. 'Maybe', '[]', or 'NonEmpty', and thus can unmarshal zero or more nodes for the same field name.
class UnmarshalField t where
unmarshalField
:: ( Unmarshal f
, UnmarshalAnn a
)
=> String -- ^ datatype name
-> String -- ^ field name
-> [Node] -- ^ nodes
-> MatchM (t (f a))
instance UnmarshalField Maybe where
unmarshalField _ _ [] = pure Nothing
unmarshalField _ _ [x] = Just <$> unmarshalNode x
unmarshalField d f _ = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected zero or one nodes in field '" <> f <> "' but got multiple"
instance UnmarshalField [] where
unmarshalField d f (x:xs) = do
head' <- unmarshalNode x
tail' <- unmarshalField d f xs
pure $ head' : tail'
unmarshalField _ _ [] = pure []
instance UnmarshalField NonEmpty where
unmarshalField d f (x:xs) = do
head' <- unmarshalNode x
tail' <- unmarshalField d f xs
pure $ head' :| tail'
unmarshalField d f [] = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected one or more nodes in field '" <> f <> "' but got zero"
class SymbolMatching (a :: * -> *) where
matchedSymbols :: Proxy a -> [Int]
-- | Provide error message describing the node symbol vs. the symbols this can match
showFailure :: Proxy a -> Node -> String
instance SymbolMatching f => SymbolMatching (M1 i c f) where
matchedSymbols _ = matchedSymbols (Proxy @f)
showFailure _ = showFailure (Proxy @f)
instance SymbolMatching f => SymbolMatching (Rec1 f) where
matchedSymbols _ = matchedSymbols (Proxy @f)
showFailure _ = showFailure (Proxy @f)
instance (KnownNat n, KnownSymbol sym) => SymbolMatching (Token sym n) where
matchedSymbols _ = [fromIntegral (natVal (Proxy @n))]
showFailure _ _ = "expected " ++ symbolVal (Proxy @sym)
instance (SymbolMatching f, SymbolMatching g) => SymbolMatching (f :+: g) where
matchedSymbols _ = matchedSymbols (Proxy @f) <> matchedSymbols (Proxy @g)
showFailure _ = sep <$> showFailure (Proxy @f) <*> showFailure (Proxy @g)
sep :: String -> String -> String
sep a b = a ++ ". " ++ b
-- | Move the cursor to point at the passed 'TSNode'.
goto :: Ptr Cursor -> TSNode -> MatchM ()
goto cursor node = liftIO (with node (ts_tree_cursor_reset_p cursor))
type Fields = [(FieldName, Node)]
-- | Return the fields remaining in the current branch, represented as 'Map.Map' of 'FieldName's to their corresponding 'Node's.
getFields :: Ptr Cursor -> Node -> MatchM Fields
getFields cursor node
| maxCount == 0 = pure []
| otherwise = do
nodes <- liftIO . allocaArray maxCount $ \ ptr -> do
actualCount <- ts_tree_cursor_copy_child_nodes cursor ptr
peekArray (fromIntegral actualCount) ptr
traverse (\ node -> (, node) <$> getFieldName node) nodes
where
maxCount = fromIntegral (nodeChildCount node)
getFieldName node
| nodeFieldName node == nullPtr = pure (FieldName "extraChildren")
| otherwise = FieldName . toHaskellCamelCaseIdentifier <$> liftIO (peekCString (nodeFieldName node))
lookupField :: FieldName -> Fields -> [Node]
lookupField k = map snd . filter ((== k) . fst)
-- | Return a 'ByteString' that contains a slice of the given 'ByteString'.
slice :: Range -> ByteString -> ByteString
slice (Range start end) = take . drop
where drop = B.drop start
take = B.take (end - start)
newtype FieldName = FieldName { getFieldName :: String }
deriving (Eq, Ord, Show)
-- | Generic construction of ASTs from a 'Map.Map' of named fields.
--
-- Product types (specifically, record types) are constructed by looking up the node for each corresponding field name in the map, moving the cursor to it, and then invoking 'unmarshalNode' to construct the value for that field. Leaf types are constructed as a special case of product types.
--
-- Sum types are constructed by using the current nodes symbol to select the corresponding constructor deterministically.
class GUnmarshal f where
gunmarshalNode
:: UnmarshalAnn a
=> Node
-> MatchM (f a)
instance (Datatype d, GUnmarshalData f) => GUnmarshal (M1 D d f) where
gunmarshalNode = go (gunmarshalNode' (datatypeName @d undefined)) where
go :: (Node -> MatchM (f a)) -> Node -> MatchM (M1 i c f a)
go = coerce
class GUnmarshalData f where
gunmarshalNode'
:: UnmarshalAnn a
=> String
-> Node
-> MatchM (f a)
instance GUnmarshalData f => GUnmarshalData (M1 i c f) where
gunmarshalNode' = go gunmarshalNode' where
go :: (String -> Node -> MatchM (f a)) -> String -> Node -> MatchM (M1 i c f a)
go = coerce
-- For anonymous leaf nodes:
instance GUnmarshalData U1 where
gunmarshalNode' _ _ = pure U1
-- For unary products:
instance UnmarshalAnn k => GUnmarshalData (K1 c k) where
gunmarshalNode' _ = go unmarshalAnn where
go :: (Node -> MatchM k) -> Node -> MatchM (K1 c k a)
go = coerce
-- For anonymous leaf nodes
instance GUnmarshalData Par1 where
gunmarshalNode' _ = go unmarshalAnn where
go :: (Node -> MatchM a) -> Node -> MatchM (Par1 a)
go = coerce
instance Unmarshal t => GUnmarshalData (Rec1 t) where
gunmarshalNode' _ = go unmarshalNode where
go :: (Node -> MatchM (t a)) -> Node -> MatchM (Rec1 t a)
go = coerce
-- For product datatypes:
instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalData (f :*: g) where
gunmarshalNode' datatypeName node = asks cursor >>= flip getFields node >>= gunmarshalProductNode @(f :*: g) datatypeName node
-- | Generically unmarshal products
class GUnmarshalProduct f where
gunmarshalProductNode
:: UnmarshalAnn a
=> String
-> Node
-> Fields
-> MatchM (f a)
-- Product structure
instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalProduct (f :*: g) where
gunmarshalProductNode datatypeName node fields = (:*:)
<$> gunmarshalProductNode @f datatypeName node fields
<*> gunmarshalProductNode @g datatypeName node fields
-- Contents of product types (ie., the leaves of the product tree)
instance UnmarshalAnn k => GUnmarshalProduct (M1 S c (K1 i k)) where
gunmarshalProductNode _ node _ = go unmarshalAnn node where
go :: (Node -> MatchM k) -> Node -> MatchM (M1 S c (K1 i k) a)
go = coerce
instance GUnmarshalProduct (M1 S c Par1) where
gunmarshalProductNode _ node _ = go unmarshalAnn node where
go :: (Node -> MatchM a) -> Node -> MatchM (M1 S c Par1 a)
go = coerce
instance (UnmarshalField f, Unmarshal g, Selector c) => GUnmarshalProduct (M1 S c (f :.: g)) where
gunmarshalProductNode datatypeName _ = go (unmarshalField datatypeName fieldName . lookupField (FieldName fieldName)) where
go :: (Fields -> MatchM (f (g a))) -> Fields -> MatchM (M1 S c (f :.: g) a)
go = coerce
fieldName = selName @c undefined
instance (Unmarshal t, Selector c) => GUnmarshalProduct (M1 S c (Rec1 t)) where
gunmarshalProductNode datatypeName _ fields =
case lookupField (FieldName fieldName) fields of
[] -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node '" <> fieldName <> "' but didn't get one"
[x] -> go unmarshalNode x where
go :: (Node -> MatchM (t a)) -> Node -> MatchM (M1 S c (Rec1 t) a)
go = coerce
_ -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node but got multiple"
where
fieldName = selName @c undefined
class GHasAnn a t where
gann :: t a -> a
instance GHasAnn a f => GHasAnn a (M1 i c f) where
gann = gann . unM1
instance (GHasAnn a l, GHasAnn a r) => GHasAnn a (l :+: r) where
gann (L1 l) = gann l
gann (R1 r) = gann r
instance {-# OVERLAPPABLE #-} HasField "ann" (t a) a => GHasAnn a t where
gann = getField @"ann"

View File

@ -24,9 +24,12 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-go ^>= 0.4.1
@ -50,5 +53,7 @@ library
import: haskell
exposed-modules:
Language.Go
Language.Go.AST
Language.Go.Grammar
Language.Go.Tags
hs-source-dirs: src

View File

@ -6,11 +6,11 @@ module Language.Go
import Data.Proxy
import qualified Language.Go.AST as Go
import qualified Language.Go.Tags as GoTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Go (tree_sitter_go)
import qualified TreeSitter.Go.AST as Go
import qualified TreeSitter.Unmarshal as TS
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Go.SourceFile a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Go.AST
( module Language.Go.AST
) where
import Prelude hiding (False, Float, Integer, Rational, String, True)
import AST.GenerateSyntax
import qualified Language.Go.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_go "../../../vendor/tree-sitter-go/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Go.Grammar
( tree_sitter_go
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Go (tree_sitter_go)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-go/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_go

View File

@ -9,16 +9,16 @@ module Language.Go.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Text as Text
import GHC.Generics
import qualified Language.Go.AST as Go
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Go.AST as Go
import TreeSitter.Token
class ToTags t where
tags
@ -30,8 +30,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -72,12 +71,11 @@ instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name kind loc range = do

1
semantic-go/vendor/tree-sitter-go vendored Submodule

@ -0,0 +1 @@
Subproject commit 689cc8fbdc0613d267434f221af85aff91a31f8c

View File

@ -21,12 +21,17 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Language.Java
Language.Java.AST
Language.Java.Grammar
Language.Java.Tags
build-depends:
base >= 4.13 && < 5
, fused-effects ^>= 1.0
, semantic-ast
, semantic-codegen
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, tree-sitter ^>= 0.8
, tree-sitter-java ^>= 0.6.1
hs-source-dirs: src
@ -43,3 +48,4 @@ library
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
-Wno-missing-deriving-strategies

View File

@ -1,15 +1,15 @@
-- | Semantic functionality for Java programs.
module Language.Java
( Term(..)
, TreeSitter.Java.tree_sitter_java
, Language.Java.Grammar.tree_sitter_java
) where
import Data.Proxy
import qualified Language.Java.AST as Java
import qualified Language.Java.Tags as JavaTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Java (tree_sitter_java)
import qualified TreeSitter.Java.AST as Java
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Java.Grammar (tree_sitter_java)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Java.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Java.AST
( module Language.Java.AST
) where
import AST.GenerateSyntax
import qualified Language.Java.Grammar as Grammar
import AST.Token
astDeclarationsForLanguage Grammar.tree_sitter_java "../../../vendor/tree-sitter-java/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Java.Grammar
( tree_sitter_java
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Java (tree_sitter_java)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-java/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_java

View File

@ -8,16 +8,17 @@ module Language.Java.Tags
( ToTags(..)
) where
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import GHC.Generics
import GHC.Generics ((:+:)(..))
import qualified Language.Java.AST as Java
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Java.AST as Java
import TreeSitter.Token
class ToTags t where
tags
@ -29,8 +30,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -80,12 +80,11 @@ instance ToTags Java.MethodInvocation where
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
instance ToTags Java.AnnotatedType
instance ToTags Java.Annotation
@ -140,7 +139,7 @@ instance ToTags Java.FieldAccess
instance ToTags Java.FieldDeclaration
instance ToTags Java.FinallyClause
instance ToTags Java.FloatingPointType
instance ToTags Java.ForInit
-- instance ToTags Java.ForInit
instance ToTags Java.ForStatement
instance ToTags Java.FormalParameter
instance ToTags Java.FormalParameters
@ -160,7 +159,7 @@ instance ToTags Java.LabeledStatement
instance ToTags Java.LambdaExpression
instance ToTags Java.Literal
instance ToTags Java.LocalVariableDeclaration
instance ToTags Java.LocalVariableDeclarationStatement
-- instance ToTags Java.LocalVariableDeclarationStatement
instance ToTags Java.MarkerAnnotation
-- instance ToTags Java.MethodDeclaration
-- instance ToTags Java.MethodInvocation

@ -0,0 +1 @@
Subproject commit afc4cec799f6594390aeb0ca5e16ec89e73d488e

View File

@ -21,9 +21,13 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Language.JSON
Language.JSON.AST
Language.JSON.Grammar
build-depends:
base >= 4.13 && < 5
, semantic-codegen
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, tree-sitter ^>= 0.8
, tree-sitter-json ^>= 0.6
hs-source-dirs: src
@ -40,3 +44,5 @@ library
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies

View File

@ -1,14 +1,14 @@
-- | Semantic functionality for JSON programs.
module Language.JSON
( Term(..)
, TreeSitter.JSON.tree_sitter_json
, Language.JSON.Grammar.tree_sitter_json
) where
import Data.Proxy
import qualified Language.JSON.AST as JSON
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.JSON (tree_sitter_json)
import qualified TreeSitter.JSON.AST as JSON
import qualified TreeSitter.Unmarshal as TS
import qualified Language.JSON.Grammar (tree_sitter_json)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: JSON.Document a }

View File

@ -0,0 +1,20 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.JSON.AST
( module Language.JSON.AST
) where
import Prelude hiding (String)
import AST.GenerateSyntax
import qualified Language.JSON.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_json "../../../vendor/tree-sitter-json/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.JSON.Grammar
( tree_sitter_json
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.JSON (tree_sitter_json)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-json/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_json

@ -0,0 +1 @@
Subproject commit 7b6a33f300e3e88c3017e0a9d88c77b50ea6d149

View File

@ -0,0 +1,5 @@
# Revision history for semantic-parse
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

21
semantic-parse/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,28 +1,25 @@
# semantic-ast
# semantic-parse
This package has two goals:
1. Develop a library that will produce ASTs;
2. Provide a command line tool that will output ASTs in supported formats.
This package provides a command line tool that will output ASTs in supported formats.
#### CLI
To output ASTs, run the `semantic-ast` command, specifying two mandatory options: 1) the format you'd like to return (ex., `Show`, `JSON`, etc.) and 2) the option specifying whether the source code will be passed in directly via command line (using `--sourceString`) or via providing the file path `--sourceFile`.
To output ASTs, run the `semantic-parse` command, specifying two mandatory options: 1) the format you'd like to return (ex., `Show`, `JSON`, etc.) and 2) the option specifying whether the source code will be passed in directly via command line (using `--sourceString`) or via providing the file path `--sourceFile`.
Filepath:
```
semantic-ast --format [FORMAT] --sourceFile [FILEPATH]
semantic-parse --format [FORMAT] --sourceFile [FILEPATH]
```
Source string:
```
semantic-ast --format [FORMAT] --sourceString [SOURCE]
semantic-parse --format [FORMAT] --sourceString [SOURCE]
```
An example command is:
```
semantic-ast -- --format Show --sourceString "a"
semantic-parse -- --format Show --sourceString "a"
```
This will generate an AST

2
semantic-parse/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -2,9 +2,9 @@
module Main (main) where
import TreeSitter.Unmarshal
import qualified TreeSitter.Python.AST as AST
import qualified TreeSitter.Python as Python
import AST.Unmarshal
import qualified Language.Python.AST as AST
import qualified Language.Python.Grammar as Python
import Source.Range
import Source.Span
import Data.Aeson (toJSON)

View File

@ -0,0 +1,57 @@
cabal-version: 2.4
-- Initial package description 'semantic-ast.cabal' generated by 'cabal
-- init'. For further documentation, see
-- http://haskell.org/cabal/users-guide/
name: semantic-parse
version: 0.1.0.0
-- synopsis:
-- description:
-- bug-reports:
license: MIT
license-file: LICENSE
author: The Semantic Authors
maintainer: opensource+semantic@github.com
copyright: (c) 2019 GitHub, Inc.
category: Language
extra-source-files: CHANGELOG.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
executable semantic-parse
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base
, semantic-ast
, tree-sitter
, semantic-source
, tree-sitter-python ^>= 0.8.1
, bytestring
, optparse-applicative
, pretty-simple
, aeson
, bytestring
, aeson-pretty
, semantic-python
, text
hs-source-dirs: app
default-language: Haskell2010

View File

@ -21,15 +21,18 @@ tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
build-depends: base ^>= 4.13
, fused-effects ^>= 1.0
, fused-effects ^>= 1.0.0.1
, fused-syntax
, parsers ^>= 0.12.10
, semantic-analysis ^>= 0
, semantic-ast
, semantic-core ^>= 0.0
, semantic-codegen
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, semantic-scope-graph ^>= 0.0
, semilattices ^>= 0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-python ^>= 0.8.1
@ -54,12 +57,15 @@ library
import: haskell
exposed-modules:
Language.Python
Language.Python.AST
Language.Python.Core
Language.Python.Grammar
Language.Python.Failure
Language.Python.Patterns
Language.Python.ScopeGraph
Language.Python.Tags
hs-source-dirs: src
build-depends: lens ^>= 4.18
test-suite compiling
import: haskell
@ -80,6 +86,7 @@ test-suite compiling
, process ^>= 1.6.5
, resourcet ^>= 1.2.2
, semantic-analysis ^>= 0
, semantic-codegen
, streaming ^>= 0.2.2
, streaming-process ^>= 0.1
, streaming-bytestring ^>= 0.1.6
@ -100,6 +107,7 @@ test-suite graphing
build-depends: base
, semantic-python
, semantic-codegen
, semantic-scope-graph
, bytestring
, pathtype

View File

@ -1,17 +1,18 @@
-- | Semantic functionality for Python programs.
module Language.Python
( Term(..)
, TreeSitter.Python.tree_sitter_python
, Language.Python.Grammar.tree_sitter_python
) where
import Data.Proxy
import qualified Language.Python.AST as Py
import Language.Python.ScopeGraph
import qualified Language.Python.Tags as PyTags
import ScopeGraph.Convert
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python (tree_sitter_python)
import qualified TreeSitter.Python.AST as Py
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Python.Grammar (tree_sitter_python)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Py.Module a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Python.AST
( module Language.Python.AST
) where
import Prelude hiding (False, Float, Integer, String, True)
import AST.GenerateSyntax
import qualified Language.Python.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_python "../../../vendor/tree-sitter-python/src/node-types.json"

View File

@ -33,12 +33,12 @@ import Data.Function
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe
import GHC.Records
import qualified Language.Python.AST as Py
import Language.Python.Failure
import Language.Python.Patterns
import Source.Span (Span)
import Syntax.Stack (Stack (..))
import qualified Syntax.Stack as Stack
import qualified TreeSitter.Python.AST as Py
-- | Keeps track of the current scope's bindings (so that we can, when
-- compiling a class or module, return the list of bound variables as

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Python.Grammar
( tree_sitter_python
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Python (tree_sitter_python)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-python/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_python

View File

@ -10,7 +10,7 @@ module Language.Python.Patterns
import AST.Element
import qualified Analysis.Name
import qualified TreeSitter.Python.AST as Py
import qualified Language.Python.AST as Py
-- | Useful pattern synonym for extracting a single identifier from
-- a Python ExpressionList. Easier than pattern-matching every time.

View File

@ -16,32 +16,32 @@
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Language.Python.ScopeGraph
( scopeGraphModule
) where
import AST.Element
import qualified Analysis.Name as Name
import Control.Algebra (Algebra (..), handleCoercible)
import AST.Element
import Control.Effect.Fresh
import Control.Effect.ScopeGraph
import Control.Lens (set, (^.))
import Data.Foldable
import Data.Maybe
import Data.Monoid
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import Data.Traversable
import GHC.Records
import GHC.TypeLits
import qualified Language.Python.AST as Py
import Language.Python.Patterns
import ScopeGraph.Convert (Result (..), complete, todo)
import qualified ScopeGraph.Properties.Declaration as Props
import qualified ScopeGraph.Properties.Function as Props
import qualified ScopeGraph.Properties.Reference as Props
import Source.Loc
import qualified TreeSitter.Python.AST as Py
-- This orphan instance will perish once it lands in fused-effects.
instance Algebra sig m => Algebra sig (Ap m) where
alg = Ap . alg . handleCoercible
import Source.Span (span_)
-- This typeclass is internal-only, though it shares the same interface
-- as the one defined in semantic-scope-graph. The somewhat-unconventional
@ -92,10 +92,15 @@ scopeGraphModule = getAp . scopeGraph
instance ToScopeGraph Py.AssertStatement where scopeGraph = onChildren
instance ToScopeGraph Py.Assignment where
scopeGraph (Py.Assignment _ (SingleIdentifier t) _val _typ) = do
let declProps = (DeclProperties ScopeGraph.Assignment ScopeGraph.Default Nothing)
complete <* declare t declProps
scopeGraph x = todo x
scopeGraph (Py.Assignment ann (SingleIdentifier t) val _typ) = do
declare t Props.Declaration
{ Props.kind = ScopeGraph.Assignment
, Props.relation = ScopeGraph.Default
, Props.associatedScope = Nothing
, Props.span = ann^.span_
}
maybe complete scopeGraph val
scopeGraph x = todo x
instance ToScopeGraph Py.Await where
scopeGraph (Py.Await _ a) = scopeGraph a
@ -177,23 +182,31 @@ instance ToScopeGraph Py.ForStatement where scopeGraph = todo
instance ToScopeGraph Py.FunctionDefinition where
scopeGraph Py.FunctionDefinition
{ name = Py.Identifier _ann1 name
{ ann
, name = Py.Identifier _ann1 name
, parameters = Py.Parameters _ann2 parameters
, body
} = do
let funProps = FunProperties ScopeGraph.Function
(_, associatedScope) <- declareFunction (Just $ Name.name name) funProps
(_, associatedScope) <- declareFunction (Just $ Name.name name) Props.Function
{ Props.kind = ScopeGraph.Function
, Props.span = ann^.span_
}
withScope associatedScope $ do
let declProps = DeclProperties ScopeGraph.Parameter ScopeGraph.Default Nothing
let param (Py.Parameter (Prj (Py.Identifier _pann pname))) = Just (Name.name pname)
param _ = Nothing
let declProps = Props.Declaration
{ Props.kind = ScopeGraph.Parameter
, Props.relation = ScopeGraph.Default
, Props.associatedScope = Nothing
, Props.span = lowerBound
}
let param (Py.Parameter (Prj (Py.Identifier pann pname))) = Just (pann, Name.name pname)
param _ = Nothing
let parameterMs = fmap param parameters
if any isNothing parameterMs
then todo parameterMs
else do
let parameters' = catMaybes parameterMs
paramDeclarations <- for parameters' $ \parameter ->
complete <* declare parameter declProps
paramDeclarations <- for parameters' $ \(pos, parameter) ->
complete <* declare parameter (set span_ (pos^.span_) declProps)
bodyResult <- scopeGraph body
pure (mconcat paramDeclarations <> bodyResult)
@ -203,7 +216,7 @@ instance ToScopeGraph Py.GeneratorExpression where scopeGraph = todo
instance ToScopeGraph Py.Identifier where
scopeGraph (Py.Identifier _ name) = do
reference name name RefProperties
reference name name Props.Reference
complete
instance ToScopeGraph Py.IfStatement where

View File

@ -10,19 +10,19 @@ module Language.Python.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (listToMaybe)
import Data.Text as Text
import GHC.Generics
import qualified Language.Python.AST as Py
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python.AST as Py
import TreeSitter.Token
class ToTags t where
tags
@ -34,8 +34,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -50,8 +49,7 @@ instance ToTags (Token sym n) where tags _ = pure ()
keywordFunctionCall
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc -> Loc -> Range -> Text -> m ()
keywordFunctionCall t loc range name = yieldTag name Function loc range Nothing >> gtags t
@ -127,12 +125,11 @@ docComment _ _ = Nothing
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
instance ToTags Py.AliasedImport
instance ToTags Py.ArgumentList

View File

@ -7,27 +7,33 @@ module Main (main) where
import Analysis.Name (Name)
import qualified Analysis.Name as Name
import qualified AST.Unmarshal as TS
import Control.Algebra
import Control.Carrier.Lift
import Control.Carrier.Sketch.ScopeGraph
import Control.Effect.ScopeGraph
import qualified Data.ScopeGraph as ScopeGraph
import Control.Monad
import qualified Data.ByteString as ByteString
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.ScopeGraph as ScopeGraph
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import qualified Language.Python ()
import qualified Language.Python as Py (Term)
import qualified Language.Python.Grammar as TSP
import ScopeGraph.Convert
import qualified ScopeGraph.Properties.Declaration as Props
import qualified ScopeGraph.Properties.Function as Props
import qualified ScopeGraph.Properties.Reference as Props
import Source.Loc
import qualified Source.Source as Source
import Source.Span
import System.Exit (die)
import System.Path ((</>))
import qualified System.Path as Path
import qualified System.Path.Directory as Path
import qualified Test.Tasty as Tasty
import qualified Test.Tasty.HUnit as HUnit
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Unmarshal as TS
import qualified Data.List.NonEmpty as NonEmpty
{-
@ -55,8 +61,8 @@ runScopeGraph p _src item = run . runSketch (Just p) $ scopeGraph item
sampleGraphThing :: (Has ScopeGraph sig m) => m Result
sampleGraphThing = do
declare "hello" (DeclProperties ScopeGraph.Assignment ScopeGraph.Default Nothing)
declare "goodbye" (DeclProperties ScopeGraph.Assignment ScopeGraph.Default Nothing)
declare "hello" (Props.Declaration ScopeGraph.Assignment ScopeGraph.Default Nothing (Span (Pos 2 0) (Pos 2 10)))
declare "goodbye" (Props.Declaration ScopeGraph.Assignment ScopeGraph.Default Nothing (Span (Pos 3 0) (Pos 3 12)))
pure Complete
graphFile :: FilePath -> IO (ScopeGraph.ScopeGraph Name, Result)
@ -76,8 +82,8 @@ assertSimpleAssignment = do
expectedReference :: (Has ScopeGraph sig m) => m Result
expectedReference = do
declare "x" (DeclProperties ScopeGraph.Assignment ScopeGraph.Default Nothing)
reference "x" "x" RefProperties
declare "x" (Props.Declaration ScopeGraph.Assignment ScopeGraph.Default Nothing (Span (Pos 0 0) (Pos 0 5)))
reference "x" "x" Props.Reference
pure Complete
assertSimpleReference :: HUnit.Assertion
@ -90,18 +96,18 @@ assertSimpleReference = do
expectedLexicalScope :: (Has ScopeGraph sig m) => m Result
expectedLexicalScope = do
_ <- declareFunction (Just $ Name.name "foo") (FunProperties ScopeGraph.Function)
reference "foo" "foo" RefProperties {}
_ <- declareFunction (Just $ Name.name "foo") (Props.Function ScopeGraph.Function (Span (Pos 0 0) (Pos 1 24)))
reference "foo" "foo" Props.Reference {}
pure Complete
expectedFunctionArg :: (Has ScopeGraph sig m) => m Result
expectedFunctionArg = do
(_, associatedScope) <- declareFunction (Just $ Name.name "foo") (FunProperties ScopeGraph.Function)
(_, associatedScope) <- declareFunction (Just $ Name.name "foo") (Props.Function ScopeGraph.Function (Span (Pos 0 0) (Pos 1 12)))
withScope associatedScope $ do
declare "x" (DeclProperties ScopeGraph.Identifier ScopeGraph.Default Nothing)
reference "x" "x" RefProperties
declare "x" (Props.Declaration ScopeGraph.Identifier ScopeGraph.Default Nothing lowerBound)
reference "x" "x" Props.Reference
pure ()
reference "foo" "foo" RefProperties
reference "foo" "foo" Props.Reference
pure Complete
expectedImportHole :: (Has ScopeGraph sig m) => m Result
@ -115,7 +121,7 @@ assertLexicalScope = do
(graph, _) <- graphFile path
case run (runSketch Nothing expectedLexicalScope) of
(expecto, Complete) -> HUnit.assertEqual "Should work for simple case" expecto graph
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
assertFunctionArg :: HUnit.Assertion
assertFunctionArg = do
@ -123,7 +129,7 @@ assertFunctionArg = do
(graph, _) <- graphFile path
case run (runSketch Nothing expectedFunctionArg) of
(expecto, Complete) -> HUnit.assertEqual "Should work for simple case" expecto graph
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
assertImportHole :: HUnit.Assertion
assertImportHole = do
@ -131,7 +137,7 @@ assertImportHole = do
(graph, _) <- graphFile path
case run (runSketch Nothing expectedImportHole) of
(expecto, Complete) -> HUnit.assertEqual "Should work for simple case" expecto graph
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
main :: IO ()
main = do

View File

@ -39,8 +39,8 @@ import System.Path ((</>))
import qualified System.Path as Path
import qualified System.Path.Directory as Path
import qualified Text.Trifecta as Trifecta
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Python.Grammar as TSP
import qualified AST.Unmarshal as TS
import qualified Test.Tasty as Tasty
import qualified Test.Tasty.HUnit as HUnit

@ -0,0 +1 @@
Subproject commit 899ac8d5d6c883b2f05362c9953e14e78aac474c

View File

@ -24,9 +24,12 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-ruby ^>= 0.4.1
@ -50,5 +53,7 @@ library
import: haskell
exposed-modules:
Language.Ruby
Language.Ruby.AST
Language.Ruby.Grammar
Language.Ruby.Tags
hs-source-dirs: src

View File

@ -3,17 +3,17 @@
-- | Semantic functionality for Ruby programs.
module Language.Ruby
( Term(..)
, TreeSitter.Ruby.tree_sitter_ruby
, Language.Ruby.Grammar.tree_sitter_ruby
) where
import qualified AST.Unmarshal as TS
import Control.Carrier.State.Strict
import Data.Proxy
import Data.Text (Text)
import qualified Language.Ruby.AST as Rb
import qualified Language.Ruby.Grammar (tree_sitter_ruby)
import qualified Language.Ruby.Tags as RbTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Ruby (tree_sitter_ruby)
import qualified TreeSitter.Ruby.AST as Rb
import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Rb.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Ruby.AST
( module Language.Ruby.AST
) where
import Prelude hiding (False, Float, Integer, Rational, String, True)
import AST.GenerateSyntax
import qualified Language.Ruby.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_ruby "../../../vendor/tree-sitter-ruby/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Ruby.Grammar
( tree_sitter_ruby
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Ruby (tree_sitter_ruby)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-ruby/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_ruby

View File

@ -12,21 +12,21 @@ module Language.Ruby.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import qualified AST.Unmarshal as TS
import Control.Effect.Reader
import Control.Effect.State
import Control.Effect.Writer
import Control.Monad
import Data.Foldable
import Data.Text as Text
import GHC.Generics
import qualified Language.Ruby.AST as Rb
import Source.Loc
import Source.Range as Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Ruby.AST as Rb
import TreeSitter.Token
import qualified TreeSitter.Unmarshal as TS
class ToTags t where
tags
@ -40,8 +40,7 @@ class ToTags t where
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -89,7 +88,7 @@ instance ToTags Rb.Class where
where
range' = case extraChildren of
Prj Rb.Superclass { ann = Loc { byteRange = Range { end }}} : _ -> Range start end
_ -> Range start (getEnd expr)
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
yield name = yieldTag name Class loc range' >> gtags t
@ -106,7 +105,7 @@ instance ToTags Rb.SingletonClass where
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> range
_ -> range
getStart = Range.start . byteRange . TS.gann
yield name = yieldTag name Class loc range' >> gtags t
@ -123,7 +122,7 @@ instance ToTags Rb.Module where
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> Range start (getEnd expr)
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
getStart = Range.start . byteRange . TS.gann
yield name = yieldTag name Module loc range' >> gtags t
@ -132,8 +131,7 @@ yieldMethodNameTag
:: ( Has (State [Text]) sig m
, Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
) => t Loc -> Loc -> Range -> Rb.MethodName Loc -> m ()
yieldMethodNameTag t loc range (Rb.MethodName expr) = enterScope True $ case expr of
Prj Rb.Identifier { text = name } -> yield name
@ -165,7 +163,7 @@ instance ToTags Rb.Method where
where
range' = case parameters of
Just Rb.MethodParameters { ann = Loc { byteRange = Range { end } }} -> Range start end
_ -> Range start (getEnd name)
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
instance ToTags Rb.SingletonMethod where
@ -177,7 +175,7 @@ instance ToTags Rb.SingletonMethod where
where
range' = case parameters of
Just Rb.MethodParameters { ann = Loc { byteRange = Range { end } }} -> Range start end
_ -> Range start (getEnd name)
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
instance ToTags Rb.Block where
@ -336,12 +334,11 @@ gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
-- instance ToTags Rb.Alias
instance ToTags Rb.Arg

@ -0,0 +1 @@
Subproject commit eb2b6225bfb80010f2e4cbd27db8c6f3775230b5

View File

@ -23,6 +23,9 @@ library
Control.Carrier.Sketch.ScopeGraph
Control.Effect.ScopeGraph
ScopeGraph.Convert
ScopeGraph.Properties.Declaration
ScopeGraph.Properties.Function
ScopeGraph.Properties.Reference
Data.Hole
Data.Module
Data.ScopeGraph
@ -33,6 +36,7 @@ library
, containers
, fused-effects ^>= 1.0
, generic-monoid
, generic-lens
, hashable
, lens
, pathtype

View File

@ -23,19 +23,19 @@ import Analysis.Name (Name)
import qualified Analysis.Name as Name
import Control.Algebra
import Control.Carrier.Fresh.Strict
import Control.Carrier.State.Strict
import Control.Carrier.Reader
import Control.Effect.ScopeGraph (ScopeGraphEff(..), DeclProperties(..))
import Control.Carrier.State.Strict
import Control.Effect.ScopeGraph (ScopeGraphEff (..))
import Control.Monad.IO.Class
import Data.Bifunctor
import qualified Data.List.NonEmpty as NonEmpty
import Data.Module
import Data.ScopeGraph (ScopeGraph)
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import GHC.Records
import qualified ScopeGraph.Properties.Declaration as Props
import Source.Span
import qualified System.Path as Path
import qualified Data.List.NonEmpty as NonEmpty
-- | The state type used to keep track of the in-progress graph and
-- positional/contextual information. The name "sketchbook" is meant
@ -60,15 +60,16 @@ newtype SketchC address m a = SketchC (StateC Sketchbook (FreshC m) a)
instance (Effect sig, Algebra sig m) => Algebra (ScopeGraphEff :+: Reader Name :+: Fresh :+: sig) (SketchC Name m) where
alg (L (Declare n props k)) = do
Sketchbook old current <- SketchC (get @Sketchbook)
let Props.Declaration kind relation associatedScope span = props
let (new, _pos) =
ScopeGraph.declare
(ScopeGraph.Declaration n)
(lowerBound @ModuleInfo)
(relation props)
relation
ScopeGraph.Public
(lowerBound @Span)
(getField @"kind" @DeclProperties props)
(associatedScope props)
span
kind
associatedScope
current
old
SketchC (put (Sketchbook new current))

View File

@ -10,13 +10,10 @@
-- | The ScopeGraph effect is used to build up a scope graph over
-- the lifetime of a monadic computation. The name is meant to evoke
-- physically ScopeGraphing the hierarchical outline of a graph.
-- physically sketching the hierarchical outline of a graph.
module Control.Effect.ScopeGraph
( ScopeGraph
, ScopeGraphEff (..)
, DeclProperties (..)
, RefProperties (..)
, FunProperties (..)
, declare
-- Scope Manipulation
, currentScope
@ -42,16 +39,9 @@ import GHC.Generics (Generic, Generic1)
import GHC.Records
import Data.List.NonEmpty
data DeclProperties = DeclProperties {
kind :: ScopeGraph.Kind
, relation :: ScopeGraph.Relation
, associatedScope :: Maybe Name
}
data RefProperties = RefProperties
data FunProperties = FunProperties {
kind :: ScopeGraph.Kind
}
import qualified ScopeGraph.Properties.Declaration as Props
import qualified ScopeGraph.Properties.Function as Props
import qualified ScopeGraph.Properties.Reference as Props
type ScopeGraph
= ScopeGraphEff
@ -59,8 +49,8 @@ type ScopeGraph
:+: Reader Name
data ScopeGraphEff m k =
Declare Name DeclProperties (() -> m k)
| Reference Text Text RefProperties (() -> m k)
Declare Name Props.Declaration (() -> m k)
| Reference Text Text Props.Reference (() -> m k)
| NewScope (Map ScopeGraph.EdgeLabel [Name]) (Name -> m k)
| InsertEdge ScopeGraph.EdgeLabel (NonEmpty Name) (() -> m k)
deriving (Generic, Generic1, HFunctor, Effect)
@ -68,11 +58,11 @@ data ScopeGraphEff m k =
currentScope :: Has (Reader Name) sig m => m Name
currentScope = ask
declare :: forall sig m . Has ScopeGraph sig m => Name -> DeclProperties -> m ()
declare :: forall sig m . (Has ScopeGraph sig m) => Name -> Props.Declaration -> m ()
declare n props = send (Declare n props pure)
-- | Establish a reference to a prior declaration.
reference :: forall sig m . Has ScopeGraph sig m => Text -> Text -> RefProperties -> m ()
reference :: forall sig m . (Has ScopeGraph sig m) => Text -> Text -> Props.Reference -> m ()
reference n decl props = send (Reference n decl props pure)
newScope :: forall sig m . Has ScopeGraph sig m => Map ScopeGraph.EdgeLabel [Name] -> m Name
@ -82,22 +72,29 @@ newScope edges = send (NewScope edges pure)
insertEdge :: Has ScopeGraph sig m => ScopeGraph.EdgeLabel -> NonEmpty Name -> m ()
insertEdge label targets = send (InsertEdge label targets pure)
declareFunction :: forall sig m . Has ScopeGraph sig m => Maybe Name -> FunProperties -> m (Name, Name)
declareFunction name props = do
declareFunction :: forall sig m . (Has ScopeGraph sig m) => Maybe Name -> Props.Function -> m (Name, Name)
declareFunction name (Props.Function kind span) = do
currentScope' <- currentScope
let lexicalEdges = Map.singleton ScopeGraph.Lexical [ currentScope' ]
associatedScope <- newScope lexicalEdges
name' <- declareMaybeName name (DeclProperties { relation = ScopeGraph.Default, kind = (getField @"kind" @FunProperties props), associatedScope = Just associatedScope })
name' <- declareMaybeName name Props.Declaration
{ Props.relation = ScopeGraph.Default
, Props.kind = kind
, Props.associatedScope = Just associatedScope
, Props.span = span
}
pure (name', associatedScope)
declareMaybeName :: Has ScopeGraph sig m
=> Maybe Name
-> DeclProperties
-> Props.Declaration
-> m Name
declareMaybeName maybeName props = do
case maybeName of
Just name -> declare name props >> pure name
_ -> Name.gensym >>= \name -> declare name (props { relation = ScopeGraph.Gensym }) >> pure name -- TODO: Modify props and change Kind to Gensym
Just name -> name <$ declare name props
_ -> do
name <- Name.gensym
name <$ declare name (props { Props.relation = ScopeGraph.Gensym })
withScope :: Has ScopeGraph sig m
=> Name

View File

@ -0,0 +1,25 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeApplications #-}
-- | The 'Declaration' record type is used by the 'Control.Effect.Sketch' module to keep
-- track of the parameters that need to be passed when establishing a new declaration.
-- That is to say, it is a record type primarily used for its selector names.
module ScopeGraph.Properties.Declaration
( Declaration (..)
) where
import Analysis.Name (Name)
import Data.Generics.Product (field)
import Data.ScopeGraph as ScopeGraph (Kind, Relation)
import GHC.Generics (Generic)
import Source.Span
data Declaration = Declaration
{ kind :: ScopeGraph.Kind
, relation :: ScopeGraph.Relation
, associatedScope :: Maybe Name
, span :: Span
} deriving Generic
instance HasSpan Declaration where span_ = field @"span"

View File

@ -0,0 +1,22 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeApplications #-}
-- | The 'Function' record type is used by the 'Control.Effect.Sketch' module to keep
-- track of the parameters that need to be passed when establishing a new declaration.
-- That is to say, it is a record type primarily used for its selector names.
module ScopeGraph.Properties.Function
( Function (..)
) where
import Data.Generics.Product (field)
import qualified Data.ScopeGraph as ScopeGraph (Kind)
import GHC.Generics (Generic)
import Source.Span
data Function = Function
{ kind :: ScopeGraph.Kind
, span :: Span
} deriving Generic
instance HasSpan Function where span_ = field @"span"

View File

@ -0,0 +1,9 @@
-- | The 'Declaration' record type is used by the 'Control.Effect.Sketch' module to keep
-- track of the parameters that need to be passed when establishing a new reference.
-- It is currently unused, but will possess more fields in the future as scope graph
-- functionality is enhanced.
module ScopeGraph.Properties.Reference
( Reference (..)
) where
data Reference = Reference

View File

@ -1,7 +1,7 @@
cabal-version: 2.4
name: semantic-source
version: 0.0.2.0
version: 0.1.0.0
synopsis: Types and functionality for working with source code
description: Types and functionality for working with source code (program text).
homepage: https://github.com/github/semantic/tree/master/semantic-source#readme

View File

@ -1,4 +1,5 @@
{-# LANGUAGE DeriveGeneric, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-|
'Source' models source code, represented as a thin wrapper around a 'B.ByteString' with conveniences for splitting by line, slicing, etc.
@ -37,7 +38,7 @@ import Data.Aeson (FromJSON (..), withText)
import qualified Data.ByteString as B
import Data.Char (ord)
import Data.Maybe (fromMaybe)
import Data.Monoid (Last(..))
import Data.Monoid (Last (..))
import Data.Semilattice.Lower
import Data.String (IsString (..))
import qualified Data.Text as T
@ -45,7 +46,7 @@ import qualified Data.Text.Encoding as T
import Data.Text.Encoding.Error (lenientDecode)
import GHC.Generics (Generic)
import Source.Range
import Source.Span (Span(Span), Pos(..))
import Source.Span (Pos (..), Span (Span))
-- | The contents of a source file. This is represented as a UTF-8
@ -75,7 +76,7 @@ totalRange = Range 0 . B.length . bytes
-- | Return a 'Span' that covers the entire text.
totalSpan :: Source -> Span
totalSpan source = Span lowerBound (Pos (Prelude.length ranges) (succ (end lastRange - start lastRange))) where
totalSpan source = Span (Pos 1 1) (Pos (Prelude.length ranges) (succ (end lastRange - start lastRange))) where
ranges = lineRanges source
lastRange = fromMaybe lowerBound (getLast (foldMap (Last . Just) ranges))

View File

@ -1,4 +1,6 @@
{-# LANGUAGE DeriveGeneric, OverloadedStrings, RankNTypes #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
-- | Source position and span information
--
-- Mostly taken from purescript's SourcePos definition.
@ -16,9 +18,8 @@ import Control.DeepSeq (NFData)
import Data.Aeson ((.:), (.=))
import qualified Data.Aeson as A
import Data.Hashable (Hashable)
import Data.Semilattice.Lower (Lower(..))
import GHC.Generics (Generic)
import GHC.Stack (SrcLoc(..))
import GHC.Stack (SrcLoc (..))
-- | A Span of position information
data Span = Span
@ -44,10 +45,6 @@ instance A.FromJSON Span where
<$> o .: "start"
<*> o .: "end"
instance Lower Span where
lowerBound = Span lowerBound lowerBound
-- | Construct a Span with a given value for both its start and end positions.
point :: Pos -> Span
point p = Span p p
@ -56,7 +53,11 @@ spanFromSrcLoc :: SrcLoc -> Span
spanFromSrcLoc s = Span (Pos (srcLocStartLine s) (srcLocStartCol s)) (Pos (srcLocEndLine s) (srcLocEndCol s))
-- | Source position information (1-indexed)
-- | Source position information.
-- The 'Pos' values associated with ASTs returned from tree-sitter
-- 'Unmarshal' instances are zero-indexed. Unless you are displaying
-- span information to a user, you should write your code assuming
-- zero-indexing.
data Pos = Pos
{ line :: {-# UNPACK #-} !Int
, column :: {-# UNPACK #-} !Int
@ -77,10 +78,6 @@ instance A.FromJSON Pos where
[ line, col ] <- A.parseJSON arr
pure $ Pos line col
instance Lower Pos where
lowerBound = Pos 1 1
line_, column_ :: Lens' Pos Int
line_ = lens line (\p l -> p { line = l })
column_ = lens column (\p l -> p { column = l })

View File

@ -20,7 +20,6 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
AST.Element
Tags.Tag
Tags.Tagging.Precise
build-depends:

View File

@ -1,40 +1,16 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module Tags.Tagging.Precise
( Tags
, ToTags(..)
, yield
, runTagging
, firstLine
, Traversable1(..)
, for1
, traverse1_
, for1_
, foldMap1
, foldMapDefault1
, fmapDefault1
, traverseDefault1
, GTraversable1(..)
, Generics(..)
) where
import Control.Carrier.Reader
import Control.Carrier.Writer.Strict
import Data.Functor (void)
import Data.Functor.Const
import Data.Functor.Identity
import Data.Monoid (Ap (..), Endo (..))
import Data.Monoid (Endo (..))
import Data.Text as Text (Text, take, takeWhile, stripEnd)
import GHC.Generics
import Prelude hiding (span)
import Source.Loc (Loc (..))
import Source.Source as Source
@ -64,137 +40,3 @@ runTagging source
-- | Slices a range out of 'Source' and gives back the first line of source up to 180 characters.
firstLine :: Source -> Range -> Text
firstLine src = Text.stripEnd . Text.take 180 . Text.takeWhile (/= '\n') . Source.toText . slice src
-- FIXME: move Traversable1 into semantic-ast.
-- FIXME: derive Traversable1 instances for TH-generated syntax types.
-- | Simultaneous traversal of subterms of kind @*@ and @* -> *@ in an 'Applicative' context.
--
-- 'Traversable1' can express any combination of first- and second-order mapping, folding, and traversal.
--
-- Note that the @1@ suffix is used in the manner of 'Data.Functor.Classes.Show1' or 'Generic1', rather than 'foldr1'; its a higher-order traversal which is simultaneously able to traverse (and alter) annotations.
class Traversable1 c t where
-- | Map annotations of kind @*@ and heterogeneously-typed subterms of kind @* -> *@ under some constraint @c@ into an 'Applicative' context. The constraint is necessary to operate on otherwise universally-quantified subterms, since otherwise there would be insufficient information to inspect them at all.
--
-- No proxy is provided for the constraint @c@; instead, @-XTypeApplications@ should be used. E.g. here we ignore the annotations and print all the @* -> *@ subterms using 'Show1':
--
-- @
-- 'traverse1' \@'Data.Functor.Classes.Show1' 'pure' (\ t -> t '<$' 'putStrLn' ('Data.Functor.Classes.showsPrec1' 0 t ""))
-- @
--
-- Note that this traversal is non-recursive: any recursion through subterms must be performed by the second function argument.
traverse1
:: Applicative f
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
default traverse1
:: (Applicative f, Generic1 t, GTraversable1 c (Rep1 t))
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
traverse1 f g = fmap to1 . gtraverse1 @c f g . from1
for1
:: forall c t f a b
. (Traversable1 c t, Applicative f)
=> t a
-> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> f (t b)
for1 t f g = traverse1 @c f g t
traverse1_
:: forall c t f a a' a''
. (Traversable1 c t, Applicative f)
=> (a -> f a')
-> (forall t' . c t' => t' a -> f a'')
-> t a
-> f ()
traverse1_ f g = getAp . foldMap1 @c (Ap . void . f) (Ap . void . g)
for1_
:: forall c t f a a' a''
. (Traversable1 c t, Applicative f)
=> t a
-> (a -> f a')
-> (forall t' . c t' => t' a -> f a'')
-> f ()
for1_ t f g = getAp $ foldMap1 @c (Ap . void . f) (Ap . void . g) t
foldMap1 :: forall c t b a . (Traversable1 c t, Monoid b) => (a -> b) -> (forall t' . c t' => t' a -> b) -> t a -> b
foldMap1 f g = getConst . traverse1 @c (Const . f) (Const . g)
-- | This function may be used as a value for 'foldMap' in a 'Foldable' instance.
foldMapDefault1 :: (Traversable1 Foldable t, Monoid b) => (a -> b) -> t a -> b
foldMapDefault1 f = foldMap1 @Foldable f (foldMap f)
-- | This function may be used as a value for 'fmap' in a 'Functor' instance.
fmapDefault1 :: Traversable1 Functor t => (a -> b) -> t a -> t b
fmapDefault1 f = runIdentity . traverse1 @Functor (Identity . f) (Identity . fmap f)
-- | This function may be used as a value for 'traverse' in a 'Traversable' instance.
traverseDefault1 :: (Traversable1 Traversable t, Applicative f) => (a -> f b) -> t a -> f (t b)
traverseDefault1 f = traverse1 @Traversable f (traverse f)
-- FIXME: move GTraversable1 into semantic-ast.
class GTraversable1 c t where
-- | Generically map annotations and subterms of kind @* -> *@ into an 'Applicative' context.
gtraverse1
:: Applicative f
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
instance GTraversable1 c f => GTraversable1 c (M1 i c' f) where
gtraverse1 f g = fmap M1 . gtraverse1 @c f g . unM1
instance (GTraversable1 c f, GTraversable1 c g) => GTraversable1 c (f :*: g) where
gtraverse1 f g (l :*: r) = (:*:) <$> gtraverse1 @c f g l <*> gtraverse1 @c f g r
instance (GTraversable1 c f, GTraversable1 c g) => GTraversable1 c (f :+: g) where
gtraverse1 f g (L1 l) = L1 <$> gtraverse1 @c f g l
gtraverse1 f g (R1 r) = R1 <$> gtraverse1 @c f g r
instance GTraversable1 c (K1 R t) where
gtraverse1 _ _ (K1 k) = pure (K1 k)
instance GTraversable1 c Par1 where
gtraverse1 f _ (Par1 a) = Par1 <$> f a
instance c t => GTraversable1 c (Rec1 t) where
gtraverse1 _ g (Rec1 t) = Rec1 <$> g t
instance (Traversable f, GTraversable1 c g) => GTraversable1 c (f :.: g) where
gtraverse1 f g = fmap Comp1 . traverse (gtraverse1 @c f g) . unComp1
instance GTraversable1 c U1 where
gtraverse1 _ _ _ = pure U1
-- | @'Generics' t@ has a 'Traversable1' instance when @'Rep1' t@ has a 'GTraversable1' instance, making this convenient for applying 'traverse1' to 'Generic1' types lacking 'Traversable1' instances:
--
-- @
-- 'getGenerics' '<$>' 'traverse1' f g ('Generics' t) = 'to1' '<$>' 'gtraverse1' f g ('from1' t)
-- @
--
-- It further defines its 'Foldable', 'Functor', and 'Traversable' instances using 'Traversable1', making it suitable for deriving with @-XDerivingVia@.
newtype Generics t a = Generics { getGenerics :: t a }
instance (Generic1 t, GTraversable1 Foldable (Rep1 t)) => Foldable (Generics t) where
foldMap = foldMapDefault1
instance (Generic1 t, GTraversable1 Functor (Rep1 t)) => Functor (Generics t) where
fmap = fmapDefault1
instance (Generic1 t, GTraversable1 Foldable (Rep1 t), GTraversable1 Functor (Rep1 t), GTraversable1 Traversable (Rep1 t)) => Traversable (Generics t) where
traverse = traverseDefault1
instance (Generic1 t, GTraversable1 c (Rep1 t)) => Traversable1 c (Generics t) where
traverse1 f g = fmap (Generics . to1) . gtraverse1 @c f g . from1 . getGenerics

View File

@ -24,9 +24,12 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-tsx ^>= 0.4.2
@ -50,5 +53,6 @@ library
import: haskell
exposed-modules:
Language.TSX
Language.TSX.AST
Language.TSX.Tags
hs-source-dirs: src

View File

@ -6,11 +6,11 @@ module Language.TSX
) where
import Data.Proxy
import qualified Language.TSX.AST as TSX
import qualified Language.TSX.Tags as TsxTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.TSX (tree_sitter_tsx)
import qualified TreeSitter.TSX.AST as TSX
import qualified TreeSitter.Unmarshal as TS
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: TSX.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.TSX.AST
( module Language.TSX.AST
) where
import Prelude hiding (False, Float, Integer, String, True)
import AST.GenerateSyntax
import qualified TreeSitter.TSX as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_tsx "../../../vendor/tree-sitter-typescript/tsx/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.TSX.Grammar
( tree_sitter_tsx
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.TSX (tree_sitter_tsx)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-typescript/tsx/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_tsx

View File

@ -10,17 +10,17 @@ module Language.TSX.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
import GHC.Generics
import qualified Language.TSX.AST as Tsx
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import TreeSitter.Token
import qualified TreeSitter.TSX.AST as Tsx
class ToTags t where
tags
@ -32,8 +32,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -110,7 +109,7 @@ instance ToTags Tsx.Module where
Prj Tsx.Identifier { text } -> yield text
-- TODO: Handle NestedIdentifiers and Strings
-- Prj Tsx.NestedIdentifier { extraChildren } -> match
_ -> gtags t
_ -> gtags t
yield text = yieldTag text Module loc byteRange >> gtags t
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
@ -122,12 +121,11 @@ instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
-- These are all valid, but point to built-in functions (e.g. require) that a la
-- carte doesn't display and since we have nothing to link to yet (can't

@ -0,0 +1 @@
Subproject commit aa950f58ea8aa112bc72f3481b98fc2d3c07b3e0

View File

@ -24,9 +24,12 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-typescript ^>= 0.4.2
@ -50,5 +53,7 @@ library
import: haskell
exposed-modules:
Language.TypeScript
Language.TypeScript.AST
Language.TypeScript.Grammar
Language.TypeScript.Tags
hs-source-dirs: src

View File

@ -2,15 +2,15 @@
-- | Semantic functionality for TypeScript programs.
module Language.TypeScript
( Term(..)
, TreeSitter.TypeScript.tree_sitter_typescript
, Language.TypeScript.Grammar.tree_sitter_typescript
) where
import Data.Proxy
import qualified Language.TypeScript.AST as TypeScript
import qualified Language.TypeScript.Tags as TsTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.TypeScript (tree_sitter_typescript)
import qualified TreeSitter.TypeScript.AST as TypeScript
import qualified TreeSitter.Unmarshal as TS
import qualified Language.TypeScript.Grammar (tree_sitter_typescript)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: TypeScript.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.TypeScript.AST
( module Language.TypeScript.AST
) where
import Prelude hiding (False, Float, Integer, String, True)
import AST.GenerateSyntax
import qualified Language.TypeScript.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_typescript "../../../vendor/tree-sitter-typescript/typescript/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.TypeScript.Grammar
( tree_sitter_typescript
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.TypeScript (tree_sitter_typescript)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-typescript/typescript/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_typescript

View File

@ -10,17 +10,17 @@ module Language.TypeScript.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
import GHC.Generics
import qualified Language.TypeScript.AST as Ts
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import TreeSitter.Token
import qualified TreeSitter.TypeScript.AST as Ts
class ToTags t where
tags
@ -32,8 +32,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -103,7 +102,7 @@ instance ToTags Ts.Module where
Prj Ts.Identifier { text } -> yield text
-- TODO: Handle NestedIdentifiers and Strings
-- Prj Tsx.NestedIdentifier { extraChildren } -> match
_ -> gtags t
_ -> gtags t
yield text = yieldTag text Module loc byteRange >> gtags t
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
@ -115,12 +114,11 @@ instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
-- These are all valid, but point to built-in functions (e.g. require) that a la
-- carte doesn't display and since we have nothing to link to yet (can't

@ -0,0 +1 @@
Subproject commit 40320d8e0953db5e173e6dfbb596d375a085ca65

View File

@ -67,6 +67,7 @@ common dependencies
, recursion-schemes ^>= 5.1
, scientific ^>= 0.3.6.2
, safe-exceptions ^>= 0.1.7.0
, semantic-codegen
, semantic-analysis ^>= 0
, semantic-source ^>= 0.0.2
, semilattices ^>= 0.0.0.3
@ -150,7 +151,7 @@ library
, Data.Error
, Data.Flag
, Data.Functor.Classes.Generic
, Data.Graph
, Data.Graph.Algebraic
, Data.Graph.ControlFlowVertex
, Data.Handle
, Data.History
@ -158,6 +159,7 @@ library
, Data.JSON.Fields
, Data.Language
, Data.Map.Monoidal
, Data.Maybe.Exts
, Data.Project
, Data.Quieterm
, Data.Semigroup.App
@ -254,7 +256,6 @@ library
, Tags.Taggable
, Tags.Tagging
-- Custom Prelude
, Prologue
autogen-modules: Paths_semantic
other-modules: Paths_semantic
build-depends: base >= 4.13 && < 5
@ -304,6 +305,8 @@ library
, unordered-containers ^>= 0.2.9.0
, vector ^>= 0.12.0.2
, tree-sitter-go ^>= 0.4.1.1
, tree-sitter-java ^>= 0.6.1
, tree-sitter-json ^>= 0.6
, tree-sitter-php ^>= 0.2
, tree-sitter-python ^>= 0.8.1
, tree-sitter-ruby ^>= 0.4.1
@ -358,6 +361,7 @@ test-suite test
, Generators
, Properties
build-depends: semantic
, semantic-json
, tree-sitter-json ^>= 0.6
, Glob ^>= 0.10.0
, hedgehog ^>= 1

View File

@ -1,17 +1,24 @@
{-# LANGUAGE DeriveFunctor, FlexibleContexts, GeneralizedNewtypeDeriving, TypeApplications, TypeOperators #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Analysis.Abstract.Caching.FlowInsensitive
( cachingTerms
, convergingModules
, caching
) where
import Prologue
import Control.Algebra (Effect)
import Control.Carrier.Fresh.Strict
import Control.Carrier.NonDet.Church
import Control.Carrier.Reader
import Control.Carrier.State.Strict
import Data.Bifunctor
import Data.Foldable
import Data.Functor.Classes
import Data.Maybe.Exts
import Data.Semilattice.Lower
import Data.Set (Set)
import Control.Abstract
import Data.Abstract.Module
@ -194,8 +201,8 @@ newtype Cache term address value = Cache { unCache :: Monoidal.Map (Configuratio
-- | A single point in a programs execution.
data Configuration term address = Configuration
{ configurationTerm :: term -- ^ The “instruction,” i.e. the current term to evaluate.
, configurationRoots :: Live address -- ^ The set of rooted addresses.
{ configurationTerm :: term -- ^ The “instruction,” i.e. the current term to evaluate.
, configurationRoots :: Live address -- ^ The set of rooted addresses.
}
deriving (Eq, Ord, Show)

View File

@ -1,4 +1,9 @@
{-# LANGUAGE ConstraintKinds, FlexibleContexts, GADTs, GeneralizedNewtypeDeriving, TypeApplications, TypeOperators #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Analysis.Abstract.Caching.FlowSensitive
( Cache
, cachingTerms
@ -6,13 +11,17 @@ module Analysis.Abstract.Caching.FlowSensitive
, caching
) where
import Prologue
import Control.Algebra (Effect)
import Control.Carrier.Fresh.Strict
import Control.Carrier.NonDet.Church
import Control.Carrier.Reader
import Control.Carrier.Fresh.Strict
import Control.Carrier.State.Strict
import Data.Bifunctor
import Data.Foldable
import Data.Functor
import Data.Functor.Classes
import Data.Maybe.Exts
import Data.Semilattice.Lower
import Data.Set (Set)
import Control.Abstract
import Data.Abstract.Module

View File

@ -4,7 +4,7 @@ module Analysis.Abstract.Collecting
import Control.Abstract
import Control.Carrier.Reader
import Prologue
import Data.Semilattice.Lower
providingLiveSet :: Evaluator term address value (ReaderC (Live address) m) a -> Evaluator term address value m a
providingLiveSet = raiseHandler (runReader lowerBound)

View File

@ -1,4 +1,8 @@
{-# LANGUAGE FlexibleContexts, FlexibleInstances, GeneralizedNewtypeDeriving, MultiParamTypeClasses, StandaloneDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE StandaloneDeriving #-}
module Analysis.Abstract.Dead
( Dead(..)
, revivingTerms
@ -9,9 +13,10 @@ module Analysis.Abstract.Dead
import Control.Abstract
import Control.Carrier.State.Strict
import Data.Abstract.Module
import Data.Functor.Foldable
import Data.Semigroup.Reducer as Reducer
import Data.Set (delete)
import Prologue
import Data.Semilattice.Lower
import Data.Set (Set, delete)
-- | A set of “dead” (unreachable) terms.
newtype Dead term = Dead { unDead :: Set term }

View File

@ -1,4 +1,13 @@
{-# LANGUAGE DerivingVia, FlexibleContexts, FlexibleInstances, LambdaCase, MultiParamTypeClasses, OverloadedStrings, RankNTypes, ScopedTypeVariables, TypeFamilies, UndecidableInstances #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Analysis.Abstract.Graph
( Graph(..)
, ControlFlowVertex(..)
@ -17,7 +26,7 @@ module Analysis.Abstract.Graph
) where
import Algebra.Graph.Export.Dot hiding (vertexName)
import Control.Abstract hiding (Function(..))
import Control.Abstract hiding (Function (..))
import Control.Algebra
import Control.Carrier.Reader
import Control.Carrier.State.Strict
@ -25,11 +34,11 @@ import Control.Effect.Sum.Project
import Data.Abstract.BaseError
import Data.Abstract.Module (Module (moduleInfo), ModuleInfo (..))
import Data.ByteString.Builder
import Data.Graph
import Data.Graph.Algebraic
import Data.Graph.ControlFlowVertex
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.Text.Encoding as T
import Prologue
import Source.Loc
style :: Style ControlFlowVertex Builder
@ -123,7 +132,7 @@ graphingModules recur m = do
where
-- NB: path is null for Languages like Ruby that have module imports that require concrete value semantics.
includeModule path
= let path' = if Prologue.null path then "unknown, concrete semantics required" else path
= let path' = if Prelude.null path then "unknown, concrete semantics required" else path
info = moduleInfo m
in moduleInclusion (moduleVertex (ModuleInfo path' (moduleLanguage info) (moduleOid info)))

View File

@ -1,12 +1,19 @@
{-# LANGUAGE DataKinds, FlexibleInstances, MultiParamTypeClasses, ScopedTypeVariables, TypeApplications, TypeFamilies, TypeOperators, UndecidableInstances #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module Analysis.ConstructorName
( ConstructorName(..)
) where
import Data.Proxy
import Data.Sum
import Data.Term
import GHC.Generics
import Prologue
-- | A typeclass to retrieve the name of the data constructor for a value.
--

View File

@ -1,16 +1,24 @@
{-# LANGUAGE DataKinds, DefaultSignatures, FlexibleInstances, GeneralizedNewtypeDeriving, MultiParamTypeClasses, ScopedTypeVariables, TypeApplications, TypeFamilies, UndecidableInstances #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Analysis.CyclomaticComplexity
( CyclomaticComplexity(..)
, HasCyclomaticComplexity
, cyclomaticComplexityAlgebra
) where
import Data.Aeson
import Data.Sum
import Data.Aeson
import Data.Proxy
import Data.Sum
import qualified Data.Syntax.Declaration as Declaration
import qualified Data.Syntax.Statement as Statement
import Data.Term
import Prologue
import Data.Term
-- | The cyclomatic complexity of a (sub)term.
newtype CyclomaticComplexity = CyclomaticComplexity Int

View File

@ -1,10 +1,13 @@
{-# LANGUAGE FlexibleContexts, TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
module Analysis.Decorator
( decoratorWithAlgebra
) where
import Data.Algebra
import Data.Bifunctor
import Data.Functor.Foldable
import Data.Term
import Prologue
-- | Lift an algebra into a decorator for terms annotated with records.
decoratorWithAlgebra :: (Functor (Syntax term), IsTerm term, Recursive (term a), Base (term a) ~ TermF (Syntax term) a)

View File

@ -4,7 +4,7 @@ module Analysis.HasTextElement
) where
import Data.Sum
import Prologue
import Data.Proxy
import qualified Data.Syntax.Literal as Literal
class HasTextElement syntax where

View File

@ -1,18 +1,26 @@
{-# LANGUAGE DataKinds, FlexibleInstances, MultiParamTypeClasses, RecordWildCards, ScopedTypeVariables, TypeApplications, TypeFamilies, UndecidableInstances #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Analysis.PackageDef
( PackageDef(..)
, HasPackageDef
, packageDefAlgebra
) where
import Data.Blob
import Source.Source as Source
import Data.Sum
import Data.Term
import Data.Algebra
import Data.Blob
import Data.Proxy
import Data.Sum
import Data.Term
import qualified Data.Text as T
import qualified Language.Go.Syntax
import Prologue
import Source.Loc
import Source.Loc
import Source.Source as Source
newtype PackageDef = PackageDef { moduleDefIdentifier :: T.Text }
deriving (Eq, Show)

View File

@ -1,4 +1,18 @@
{-# LANGUAGE AllowAmbiguousTypes, DataKinds, FlexibleContexts, FlexibleInstances, LambdaCase, MultiParamTypeClasses, OverloadedStrings, RankNTypes, RecordWildCards, ScopedTypeVariables, TypeApplications, TypeFamilies, TypeOperators, UndecidableInstances, ViewPatterns #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ViewPatterns #-}
module Analysis.TOCSummary
( Declaration(..)
, formatIdentifier
@ -8,15 +22,19 @@ module Analysis.TOCSummary
, declarationAlgebra
) where
import Prologue hiding (project)
import Data.Algebra
import Data.Blob
import qualified Data.Error as Error
import Data.Flag
import Data.Foldable (toList)
import Data.Language as Language
import Data.List.NonEmpty (nonEmpty)
import Data.Semigroup (sconcat)
import Data.Sum
import qualified Data.Syntax as Syntax
import qualified Data.Syntax.Declaration as Declaration
import Data.Term
import Data.Text (Text)
import qualified Data.Text as T
import qualified Language.Markdown.Syntax as Markdown
import Source.Loc as Loc

View File

@ -1,4 +1,15 @@
{-# LANGUAGE DataKinds, FlexibleContexts, FlexibleInstances, GADTs, InstanceSigs, MultiParamTypeClasses, RankNTypes, RecordWildCards, ScopedTypeVariables, StandaloneDeriving, TypeFamilies, TypeOperators #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
-- | Assignment of AST onto some other structure (typically terms).
--
-- Parsing yields an AST represented as a Rose tree labelled with symbols in the languages grammar and source locations (byte Range and Span). An Assignment represents a (partial) map from AST nodes onto some other structure; in essence, its a parser that operates over trees. (For our purposes, this structure is typically Terms annotated with source locations.) Assignments are able to match based on symbol, sequence, and hierarchy; thus, in @x = y@, both @x@ and @y@ might have the same symbol, @Identifier@, the left can be assigned to a variable declaration, while the right can be assigned to a variable reference.
@ -90,20 +101,32 @@ module Assigning.Assignment
, module Parsers
) where
import Prologue
import Prelude hiding (fail)
import qualified Assigning.Assignment.Table as Table
import Control.Monad.Except (MonadError (..))
import Data.AST
import Data.Error
import qualified Source.Source as Source
import Data.Term
import Data.Text.Encoding (decodeUtf8')
import Control.Applicative
import Control.Monad
import Control.Monad.Except (MonadError (..))
import Data.AST
import Data.Bifunctor
import Data.ByteString (ByteString)
import Data.Error
import Data.Foldable
import Data.Function
import Data.Functor.Classes
import Data.Ix
import Data.List.NonEmpty (NonEmpty (..), nonEmpty)
import Data.Maybe
import Data.Semigroup
import Data.Term
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8')
import GHC.Stack
import Prelude hiding (fail)
import qualified Source.Loc as L
import Source.Range as Range
import Source.Span as Span
import Text.Parser.Combinators as Parsers hiding (choice)
import TreeSitter.Language
import Source.Range as Range
import qualified Source.Source as Source
import Source.Span as Span
import Text.Parser.Combinators as Parsers hiding (choice)
import TreeSitter.Language
-- | Assignment from an AST with some set of 'symbol's onto some other value.
--
@ -129,12 +152,12 @@ data Tracing f a where
assignmentCallSite :: Assignment ast grammar a -> Maybe (String, SrcLoc)
assignmentCallSite (Tracing site _ `Then` _) = site
assignmentCallSite _ = Nothing
assignmentCallSite _ = Nothing
tracing :: HasCallStack => f a -> Tracing f a
tracing f = case getCallStack callStack of
(_ : site : _) -> Tracing (Just site) f
_ -> Tracing Nothing f
_ -> Tracing Nothing f
-- | Zero-width production of the current location.
--
@ -209,8 +232,8 @@ nodeError cs expected n@Node{..} = Error (nodeSpan n) expected (Just (Right node
firstSet :: (Enum grammar, Ix grammar) => Assignment ast grammar a -> [grammar]
firstSet = iterFreer (\ _ (Tracing _ assignment) -> case assignment of
Choose table _ _ -> Table.tableAddresses table
Label child _ -> firstSet child
_ -> []) . ([] <$)
Label child _ -> firstSet child
_ -> []) . ([] <$)
-- | Run an assignment over an AST exhaustively.
@ -275,7 +298,7 @@ requireExhaustive callSite (a, state) =
let state' = skipTokens state
stack = fromCallSiteList (maybe id (:) callSite (stateCallSites state))
in case stateNodes state' of
[] -> Right (a, state')
[] -> Right (a, state')
Term (In node _) : _ -> Left (nodeError stack [] node)
skipTokens :: Symbol grammar => State ast grammar -> State ast grammar
@ -289,11 +312,11 @@ advanceState state@State{..}
-- | State kept while running 'Assignment's.
data State ast grammar = State
{ stateOffset :: {-# UNPACK #-} !Int -- ^ The offset into the Source thus far reached, measured in bytes.
, statePos :: {-# UNPACK #-} !Pos -- ^ The (1-indexed) line/column position in the Source thus far reached.
{ stateOffset :: {-# UNPACK #-} !Int -- ^ The offset into the Source thus far reached, measured in bytes.
, statePos :: {-# UNPACK #-} !Pos -- ^ The (1-indexed) line/column position in the Source thus far reached.
, stateCallSites :: ![(String, SrcLoc)] -- ^ The symbols & source locations of the calls thus far.
, stateNodes :: ![AST ast grammar] -- ^ The remaining nodes to assign. Note that 'children' rules recur into subterms, and thus this does not necessarily reflect all of the terms remaining to be assigned in the overall algorithm, only those “in scope.”
, stateLocals :: ![Text] -- Special state necessary for the Ruby assignment. When we refactor Assignment to use effects we should pull this out into Language.Ruby.Assignment.
, stateNodes :: ![AST ast grammar] -- ^ The remaining nodes to assign. Note that 'children' rules recur into subterms, and thus this does not necessarily reflect all of the terms remaining to be assigned in the overall algorithm, only those “in scope.”
, stateLocals :: ![Text] -- Special state necessary for the Ruby assignment. When we refactor Assignment to use effects we should pull this out into Language.Ruby.Assignment.
}
deriving instance (Eq grammar, Eq1 ast) => Eq (State ast grammar)
@ -315,13 +338,13 @@ instance (Enum grammar, Eq1 ast, Ix grammar) => Alternative (Assignment ast gram
l@(Tracing callSiteL la `Then` continueL) <|> r@(Tracing callSiteR ra `Then` continueR) = go callSiteL la continueL callSiteR ra continueR
where go :: forall l r . Maybe (String, SrcLoc) -> AssignmentF ast grammar l -> (l -> Assignment ast grammar a) -> Maybe (String, SrcLoc) -> AssignmentF ast grammar r -> (r -> Assignment ast grammar a) -> Assignment ast grammar a
go callSiteL la continueL callSiteR ra continueR = case (la, ra) of
(Fail _, _) -> r
(Alt [], _) -> r
(_, Alt []) -> l
(Fail _, _) -> r
(Alt [], _) -> r
(_, Alt []) -> l
(Alt ls, Alt rs) -> alternate (Alt ((Left <$> ls) <> (Right <$> rs)))
(Alt ls, _) -> rebuild (Alt ((continueL <$> ls) <> pure r)) id
(_, Alt rs) -> rebuild (Alt (pure l <> (continueR <$> rs))) id
_ -> rebuild (Alt [l, r]) id
(Alt ls, _) -> rebuild (Alt ((continueL <$> ls) <> pure r)) id
(_, Alt rs) -> rebuild (Alt (pure l <> (continueR <$> rs))) id
_ -> rebuild (Alt [l, r]) id
where alternate :: AssignmentF ast grammar (Either l r) -> Assignment ast grammar a
alternate a = rebuild a (either continueL continueR)
rebuild :: AssignmentF ast grammar x -> (x -> Assignment ast grammar a) -> Assignment ast grammar a
@ -368,7 +391,7 @@ infixl 1 `Then`
instance Functor (Freer f) where
fmap f = go
where go (Return result) = Return (f result)
where go (Return result) = Return (f result)
go (Then step yield) = Then step (go . yield)
{-# INLINE go #-}
{-# INLINE fmap #-}
@ -405,7 +428,7 @@ instance Monad (Freer f) where
-- This is analogous to 'iter' with a continuation for the interior values, and is therefore suitable for defining interpreters for GADTs/types lacking a 'Functor' instance.
iterFreer :: (forall x. (x -> a) -> f x -> a) -> Freer f a -> a
iterFreer algebra = go
where go (Return result) = result
where go (Return result) = result
go (Then action continue) = algebra (go . continue) action
{-# INLINE go #-}
{-# INLINE iterFreer #-}

Some files were not shown because too many files have changed in this diff Show More