1
1
mirror of https://github.com/github/semantic.git synced 2024-12-27 17:05:33 +03:00

Merge pull request #448 from github/my-grate

Migrating ASTs to Semantic 🧀
This commit is contained in:
Ayman Nadeem 2020-02-03 16:31:40 -05:00 committed by GitHub
commit 114316c6c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
84 changed files with 1586 additions and 90 deletions

View File

@ -37,7 +37,7 @@ jobs:
name: Cache ~/.cabal/store
with:
path: ~/.cabal/store
key: ${{ runner.os }}-${{ matrix.ghc }}-v5-cabal-store
key: ${{ runner.os }}-${{ matrix.ghc }}-v6-cabal-store
- uses: actions/cache@v1
name: Cache dist-newstyle
@ -52,7 +52,7 @@ jobs:
- name: Install dependencies
run: |
cabal v2-update
script/bootstrap
cabal v2-configure --project-file=cabal.project.ci --disable-optimization --enable-benchmarks --enable-tests --write-ghc-environment-files=always -j2
cabal v2-build --project-file=cabal.project.ci all --only-dependencies

21
.gitmodules vendored
View File

@ -0,0 +1,21 @@
[submodule "semantic-json/vendor/tree-sitter-json"]
path = semantic-json/vendor/tree-sitter-json
url = https://github.com/tree-sitter/tree-sitter-json.git
[submodule "semantic-python/vendor/tree-sitter-python"]
path = semantic-python/vendor/tree-sitter-python
url = https://github.com/tree-sitter/tree-sitter-python.git
[submodule "semantic-java/vendor/tree-sitter-java"]
path = semantic-java/vendor/tree-sitter-java
url = https://github.com/tree-sitter/tree-sitter-java.git
[submodule "semantic-go/vendor/tree-sitter-go"]
path = semantic-go/vendor/tree-sitter-go
url = https://github.com/tree-sitter/tree-sitter-go.git
[submodule "semantic-ruby/vendor/tree-sitter-ruby"]
path = semantic-ruby/vendor/tree-sitter-ruby
url = https://github.com/tree-sitter/tree-sitter-ruby.git
[submodule "semantic-typescript/vendor/tree-sitter-typescript"]
path = semantic-typescript/vendor/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript.git
[submodule "semantic-tsx/vendor/tree-sitter-typescript"]
path = semantic-tsx/vendor/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript.git

View File

@ -1,6 +1,5 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
@ -12,7 +11,7 @@ import Data.Abstract.Evaluatable
import Data.Bifunctor
import Data.Blob.IO (readBlobFromPath)
import qualified Data.Duration as Duration
import "semantic" Data.Graph (topologicalSort)
import Data.Graph.Algebraic (topologicalSort)
import qualified Data.Language as Language
import Data.Project
import Data.Proxy

View File

@ -1,9 +1,10 @@
-- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file).
-- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file).
-- Local packages
packages: .
semantic-analysis
semantic-ast
semantic-codegen
semantic-core
semantic-go
semantic-java

View File

@ -1,9 +1,10 @@
-- ATTENTION: care must be taken to keep this file in sync with cabal.project. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file).
-- ATTENTION: care must be taken to keep this file in sync with cabal.project and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file).
-- Local packages
packages: .
semantic-analysis
semantic-ast
semantic-codegen
semantic-core
semantic-go
semantic-java
@ -43,6 +44,9 @@ package semantic-analysis
package semantic-ast
ghc-options: -Werror
package semantic-codegen
ghc-options: -Werror
package semantic-core
ghc-options: -Werror

View File

@ -9,7 +9,7 @@ Please note that this list of steps reflects the state of Semantic as is, not wh
1. **Find or write a [tree-sitter](https://tree-sitter.github.io) parser for your language.** The tree-sitter [organization page](https://github.com/tree-sitter) has a number of parsers beyond those we currently support in Semantic; look there first to make sure you're not duplicating work. The tree-sitter [documentation on creating parsers](http://tree-sitter.github.io/tree-sitter/creating-parsers) provides an exhaustive look at the process of developing and debugging tree-sitter parsers. Though we do not support grammars written with other toolkits such as [ANTLR](https://www.antlr.org), translating an ANTLR or other BNF-style grammar into a tree-sitter grammar is usually straightforward.
2. **Create a Haskell library providing an interface to that C source.** The [`haskell-tree-sitter`](https://github.com/tree-sitter/haskell-tree-sitter) repository provides a Cabal package for each supported language. You can find an example of a pull request to add such a package here. Each package needs to provide two API surfaces:
* a bridged (via the FFI) reference to the toplevel parser in the generated file ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/internal/TreeSitter/JSON/Internal.hs))
* symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/TreeSitter/JSON.hs))
* symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/github/semantic/blob/master/semantic-json/src/Language/JSON/Grammar.hs)). For more information, see [CodeGen docs](https://github.com/github/semantic/blob/master/semantic-codegen/README.md).
3. **Identify the new syntax nodes required to represent your language.** While we provide an extensive library of reusable AST nodes for [literals](https://github.com/github/semantic/blob/master/src/Data/Syntax/Literal.hs), [expressions](https://github.com/github/semantic/blob/master/src/Data/Syntax/Expression.hs), [statements](https://github.com/github/semantic/blob/master/src/Data/Syntax/Statement.hs), and [types](https://github.com/github/semantic/blob/master/src/Data/Syntax/Type.hs), most languages will require some syntax nodes not found in other languages. You'll need to create a new module providing those data types, and those data types must be written as an open union: [here](https://github.com/github/semantic/commits/master/src/Language/Ruby/Syntax.hs?author=charliesome) is an example for Ruby's syntactic details.
4. **Write an assignment step that translates tree-sitter trees into Haskell datatypes.** More information about this can be found in the [assignment documentation](assignment.md). This is currently the most time-consuming and error-prone part of the process (see [https://github.com/github/semantic/issues/77]).
5. **Implement `Evaluatable` instances and add new [`Value` effects](https://github.com/github/semantic/blob/master/src/Control/Abstract/Value.hs) as is needed to describe the control flow of your language.** While several features of Semantic (e.g. `semantic parse --symbols` and `semantic diff`) will become fully available given a working assignment step, further features based on concrete or abstract interpretation (such as `semantic graph`) require implementing the `Evaluatable` typeclass and providing value-style effects for each control flow feature provided by the language. This means that language support is a spectrum: Semantic can provide useful information without any knowledge of a language's semantics, but each successive addition to its interpretive capabilities enables more functionality.

View File

@ -48,6 +48,7 @@ function flags {
# TODO: would be nice to figure this out from cabal.project & the .cabal files
echo "-isemantic-analysis/src"
echo "-isemantic-ast/src"
echo "-isemantic-codegen/src"
echo "-isemantic-core/src"
echo "-isemantic-go/src"
echo "-isemantic-java/src"

View File

@ -2,9 +2,9 @@
module Main (main) where
import TreeSitter.Unmarshal
import qualified TreeSitter.Python.AST as AST
import qualified TreeSitter.Python as Python
import AST.Unmarshal
import qualified Language.Python.AST as AST
import qualified Language.Python.Grammar as Python
import Source.Range
import Source.Span
import Data.Aeson (toJSON)

View File

@ -39,19 +39,22 @@ library
import: haskell
exposed-modules: Marshal.JSON
-- other-modules:
-- other-extensions:
build-depends: base ^>= 4.13
, tree-sitter ^>= 0.8
, semantic-source ^>= 0.0.2
, tree-sitter-python ^>= 0.8.1
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16
, pretty-simple ^>= 3.1.0.0
, aeson ^>= 1.4.2.0
, text ^>= 1.2.3.1
, bytestring ^>= 0.10.8.2
, aeson-pretty ^>= 0.8.8
, bytestring ^>= 0.10.8.2
, tree-sitter ^>= 0.8
, semantic-source ^>= 0.0.2
, template-haskell ^>= 2.15
, tree-sitter-python ^>= 0.8.1
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16
, pretty-simple ^>= 3.1.0.0
, text ^>= 1.2.3.1
hs-source-dirs: src
default-language: Haskell2010
@ -72,5 +75,7 @@ executable semantic-ast
, aeson
, bytestring
, aeson-pretty
, semantic-python
, text
hs-source-dirs: app
default-language: Haskell2010

View File

@ -0,0 +1,5 @@
# Revision history for semantic-codegen
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

4
semantic-codegen/Main.hs Normal file
View File

@ -0,0 +1,4 @@
module Main where
main :: IO ()
main = putStrLn "Hello, Haskell!"

216
semantic-codegen/README.md Normal file
View File

@ -0,0 +1,216 @@
# CodeGen Documentation
CodeGen is the process for auto-generating language-specific, strongly-typed ASTs to be used in [Semantic](https://github.com/github/semantic-code/blob/d9f91a05dc30a61b9ff8c536d75661d417f3c506/design-docs/precise-code-navigation.md).
_Note: This project was recently moved from `tree-sitter` into `Semantic`. These docs are in the process of being updated to reflect changes._
### Prerequisites
To get started, first make sure your language has:
1. An existing [tree-sitter](http://tree-sitter.github.io/tree-sitter/) parser;
2. An existing Cabal package in this repository for said language. This will provide an interface into tree-sitter's C source. [Here](https://github.com/tree-sitter/haskell-tree-sitter/tree/master/tree-sitter-python) is an example of a library for Python, a supported language that the remaining documentation will refer to.
### CodeGen Pipeline
During parser generation, tree-sitter produces a JSON file that captures the structure of a language's grammar. Based on this, we're able to derive datatypes representing surface languages, and then use those datatypes to generically build ASTs. This automates the engineering effort [historically required for adding a new language](https://github.com/github/semantic/blob/master/docs/adding-new-languages.md).
The following steps provide a high-level outline of the process:
1. [**Deserialize.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Deserialize.hs) First, we deserialize the `node-types.json` file for a given language into the desired shape of datatypes via parsing capabilities afforded by the [Aeson](http://hackage.haskell.org/package/aeson) library. There are four distinct types represented in the node-types.json file takes on: sums, products, named leaves and anonymous leaves.
2. [**Generate Syntax.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/GenerateSyntax.hs) We then use Template Haskell to auto-generate language-specific, strongly-typed datatypes that represent various language constructs. This API exports the top-level function `astDeclarationsForLanguage` to auto-generate datatypes at compile-time, which is is invoked by a given language [AST](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/AST.hs) module.
3. [**Unmarshal.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Unmarshal.hs) Unmarshaling is the process of iterating over tree-sitters parse trees using its tree cursor API, and producing Haskell ASTs for the relevant nodes. We parse source code from tree-sitter and unmarshal the data we get to build these ASTs generically. This file exports the top-level function `parseByteString`, which takes source code and a language as arguments, and produces an AST.
Here is an example that describes the relationship between a Python identifier represented in the tree-sitter generated JSON file, and a datatype generated by Template Haskell based on the provided JSON:
| Type | JSON | TH-generated code |
|----------|--------------|------------|
|Named leaf|<code>{<br>"type": "identifier",<br>"named": true<br>}|<code>data TreeSitter.Python.AST.Identifier a<br>= TreeSitter.Python.AST.Identifier {TreeSitter.Python.AST.ann :: a,<br>TreeSitter.Python.AST.bytes :: text-1.2.3.1:Data.Text.Internal.Text} -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Show a => Show (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Ord a => Ord (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Eq a => Eq (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Traversable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Functor TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Foldable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Unmarshal TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance SymbolMatching TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1|
The remaining document provides more details on generating ASTs, inspecting datatypes, tests, and information on decisions pertaining to relevant APIs.
___
### Table of Contents
- [CodeGen Documentation](#codegen-documentation)
- [Prerequisites](#prerequisites)
- [CodeGen Pipeline](#codegen-pipeline)
- [Table of Contents](#table-of-contents)
- [Generating ASTs](#generating-asts)
- [Inspecting auto-generated datatypes](#inspecting-auto-generated-datatypes)
- [Tests](#tests)
- [Additional notes](#additional-notes)
___
### Generating ASTs
To parse source code and produce ASTs locally:
1. Load the REPL for a given language:
```
cabal new-repl lib:tree-sitter-python
```
2. Set language extensions, `OverloadedStrings` and `TypeApplications`, and import relevant modules, `AST.Unmarshal`, `Source.Range` and `Source.Span`:
```
:seti -XOverloadedStrings
:seti -XTypeApplications
import Source.Span
import Source.Range
import AST.Unmarshal
```
3. You can now call `parseByteString`, passing in the desired language you wish to parse (in this case Python exemplified by `tree_sitter_python`), and the source code (in this case an integer `1`). Since the function is constrained by `(Unmarshal t, UnmarshalAnn a)`, you can use type applications to provide a top-level node `t`, an entry point into the tree, in addition to a polymorphic annotation `a` used to represent range and span:
```
parseByteString @TreeSitter.Python.AST.Module @(Source.Span.Span, Source.Range.Range) tree_sitter_python "1"
```
This generates the following AST:
```
Right
( Module
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, extraChildren =
[ R1
( SimpleStatement
( L1
( R1
( R1
( L1
( ExpressionStatement
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, extraChildren = L1
( L1
( Expression
( L1
( L1
( L1
( PrimaryExpression
( R1
( L1
( L1
( L1
( Integer
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, text = "1"
}
)
)
)
)
)
)
)
)
)
)
) :| []
}
)
)
)
)
)
)
]
}
)
```
### Inspecting auto-generated datatypes
Datatypes are derived from a language and its `node-types.json` file using the GenerateSyntax API. Definition can be viewed in the REPL just as they would for any other datatype, using `:i`:
```
:i TreeSitter.Python.AST.Module
```
This shows us the auto-generated `Module` datatype:
```Haskell
data TreeSitter.Python.AST.Module a
= TreeSitter.Python.AST.Module {TreeSitter.Python.AST.ann :: a,
TreeSitter.Python.AST.extraChildren :: [(GHC.Generics.:+:)
TreeSitter.Python.AST.CompoundStatement
TreeSitter.Python.AST.SimpleStatement
a]}
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Show a => Show (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Ord a => Ord (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Eq a => Eq (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Traversable TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Functor TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Foldable TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Unmarshal TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance SymbolMatching TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
```
### Tests
As of right now, Hedgehog tests are minimal and only in place for the Python library.
To run tests:
`cabal v2-test tree-sitter-python`
### Additional notes
- [GenerateSyntax](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/GenerateSyntax.hs) provides a way to pre-define certain datatypes for which Template Haskell is not used. Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. While this gives us flexibility, we encourage that this is used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into `Integer`s), and may require defining `TS.UnmarshalAnn` or `TS.SymbolMatching` instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual `Foldable`, `Functor`, etc. instances provided for generated datatypes.
- Annotations are captured by a polymorphic parameter `a`
- [Unmarshal](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/Unmarshal.hs) defines both generic and non-generic classes. This is because generic behaviors are different than what we get non-generically, and in the case of ` Maybe`, `[]`—we actually preference doing things non-generically. Since `[]` is a sum, the generic behavior for `:+:` would be invoked and expect that wed have repetitions represented in the parse tree as right-nested singly-linked lists (ex., `(a (b (c (d…))))`) rather than as just consecutive sibling nodes (ex., `(a b c ...d)`, which is what our trees have). We want to match the latter.

View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,82 @@
cabal-version: 2.4
-- Initial package description 'semantic-codegen.cabal' generated by 'cabal
-- init'. For further documentation, see
-- http://haskell.org/cabal/users-guide/
name: semantic-codegen
version: 0.1.0.0
-- synopsis:
-- description:
-- bug-reports:
license: MIT
license-file: LICENSE
author: The Semantic Authors
maintainer: opensource+semantic@github.com
copyright: (c) 2019 GitHub, Inc.
category: Language
extra-source-files: CHANGELOG.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
library
import: haskell
exposed-modules: AST.Deserialize
AST.GenerateSyntax
AST.Grammar.TH
AST.Token
AST.Unmarshal
-- other-modules:
-- other-extensions:
build-depends: base >= 4.13
, aeson ^>= 1.4.2.0
, bytestring ^>= 0.10.8.2
, tree-sitter ^>= 0.8
, fused-effects ^>= 1.0
, semantic-source ^>= 0.0.2
, template-haskell ^>= 2.15
, text ^>= 1.2.3.1
, unordered-containers ^>= 0.2.10
, containers >= 0.6.0.1
, text ^>= 1.2.3.1
, filepath ^>= 1.4.1
, directory ^>= 1.3.3.2
hs-source-dirs: src
default-language: Haskell2010
executable semantic-codegen
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base
, tree-sitter
, semantic-source
, bytestring
, aeson
, bytestring
, text
, unordered-containers
, containers
, filepath
default-language: Haskell2010
-- hs-source-dirs:
default-language: Haskell2010

View File

@ -0,0 +1,133 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveLift #-}
-- Turn off partial field warnings for Datatype.
{-# OPTIONS_GHC -Wno-partial-fields #-}
module AST.Deserialize
( Datatype (..)
, Field (..)
, Children(..)
, Required (..)
, Type (..)
, DatatypeName (..)
, Named (..)
, Multiple (..)
) where
import Data.Aeson as Aeson
import Data.Aeson.Types
import Data.Char
import GHC.Generics hiding (Constructor, Datatype)
import Language.Haskell.TH.Syntax (Lift)
import Data.Text (Text, unpack)
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe)
-- Types to deserialize into:
data Datatype
= SumType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
, datatypeSubtypes :: NonEmpty Type
}
| ProductType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
, datatypeChildren :: Maybe Children
, datatypeFields :: [(String, Field)]
}
| LeafType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Datatype where
parseJSON = withObject "Datatype" $ \v -> do
type' <- v .: "type"
named <- v .: "named"
subtypes <- v .:? "subtypes"
case subtypes of
Nothing -> do
fields <- fmap (fromMaybe HM.empty) (v .:? "fields")
children <- v .:? "children"
if null fields && null children then
pure (LeafType type' named)
else
ProductType type' named children <$> parseKVPairs (HM.toList fields)
Just subtypes -> pure (SumType type' named subtypes)
-- | Transforms list of key-value pairs to a Parser
parseKVPairs :: [(Text, Value)] -> Parser [(String, Field)]
parseKVPairs = traverse go
where go :: (Text, Value) -> Parser (String, Field)
go (t,v) = do
v' <- parseJSON v
pure (unpack t, v')
data Field = MkField
{ fieldRequired :: Required
, fieldTypes :: NonEmpty Type
, fieldMultiple :: Multiple
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Field where
parseJSON = genericParseJSON customOptions
newtype Children = MkChildren Field
deriving (Eq, Ord, Show, Generic)
deriving newtype (ToJSON, FromJSON)
data Required = Optional | Required
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Required where
parseJSON = withBool "Required" (\p -> pure (if p then Required else Optional))
data Type = MkType
{ fieldType :: DatatypeName
, isNamed :: Named
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Type where
parseJSON = genericParseJSON customOptions
newtype DatatypeName = DatatypeName { getDatatypeName :: String }
deriving (Eq, Ord, Show, Generic)
deriving newtype (FromJSON, ToJSON)
data Named = Anonymous | Named
deriving (Eq, Ord, Show, Generic, ToJSON, Lift)
instance FromJSON Named where
parseJSON = withBool "Named" (\p -> pure (if p then Named else Anonymous))
data Multiple = Single | Multiple
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Multiple where
parseJSON = withBool "Multiple" (\p -> pure (if p then Multiple else Single))
customOptions :: Aeson.Options
customOptions = Aeson.defaultOptions
{
fieldLabelModifier = initLower . dropPrefix
, constructorTagModifier = initLower
}
dropPrefix :: String -> String
dropPrefix = Prelude.dropWhile isLower
initLower :: String -> String
initLower (c:cs) = toLower c : cs
initLower "" = ""

View File

@ -0,0 +1,186 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module AST.GenerateSyntax
( syntaxDatatype
, astDeclarationsForLanguage
) where
import Data.Aeson hiding (String)
import Data.Foldable
import Data.List
import Data.List.NonEmpty (NonEmpty (..))
import Data.Text (Text)
import Foreign.C.String
import Foreign.Ptr
import GHC.Generics hiding (Constructor, Datatype)
import GHC.Records
import Language.Haskell.TH as TH
import Language.Haskell.TH.Syntax as TH
import System.Directory
import System.FilePath.Posix
import AST.Deserialize (Children (..), Datatype (..), DatatypeName (..), Field (..), Multiple (..), Named (..), Required (..), Type (..))
import qualified TreeSitter.Language as TS
import TreeSitter.Node
import TreeSitter.Symbol (TSSymbol, toHaskellCamelCaseIdentifier, toHaskellPascalCaseIdentifier)
import AST.Token
import qualified AST.Unmarshal as TS
-- | Derive Haskell datatypes from a language and its @node-types.json@ file.
--
-- Datatypes will be generated according to the specification in the @node-types.json@ file, with anonymous leaf types defined as synonyms for the 'Token' datatype.
--
-- Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. Note that this should be used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into 'Integer's), and may require defining 'TS.UnmarshalAnn' or 'TS.SymbolMatching' instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual 'Foldable', 'Functor', etc. instances provided for generated datatypes.
astDeclarationsForLanguage :: Ptr TS.Language -> FilePath -> Q [Dec]
astDeclarationsForLanguage language filePath = do
_ <- TS.addDependentFileRelative filePath
currentFilename <- loc_filename <$> location
pwd <- runIO getCurrentDirectory
let invocationRelativePath = takeDirectory (pwd </> currentFilename) </> filePath
input <- runIO (eitherDecodeFileStrict' invocationRelativePath) >>= either fail pure
allSymbols <- runIO (getAllSymbols language)
debugSymbolNames <- [d|
debugSymbolNames :: [String]
debugSymbolNames = $(listE (map (litE . stringL . debugPrefix) allSymbols))
|]
(debugSymbolNames <>) . concat @[] <$> traverse (syntaxDatatype language allSymbols) input
-- Build a list of all symbols
getAllSymbols :: Ptr TS.Language -> IO [(String, Named)]
getAllSymbols language = do
count <- TS.ts_language_symbol_count language
mapM getSymbol [(0 :: TSSymbol) .. fromIntegral (pred count)]
where
getSymbol i = do
cname <- TS.ts_language_symbol_name language i
n <- peekCString cname
t <- TS.ts_language_symbol_type language i
let named = if t == 0 then Named else Anonymous
pure (n, named)
-- Auto-generate Haskell datatypes for sums, products and leaf types
syntaxDatatype :: Ptr TS.Language -> [(String, Named)] -> Datatype -> Q [Dec]
syntaxDatatype language allSymbols datatype = skipDefined $ do
typeParameterName <- newName "a"
case datatype of
SumType (DatatypeName _) _ subtypes -> do
types' <- fieldTypesToNestedSum subtypes
let fieldName = mkName ("get" <> nameStr)
con <- recC name [TH.varBangType fieldName (TH.bangType strictness (pure types' `appT` varT typeParameterName))]
hasFieldInstance <- makeHasFieldInstance (conT name) (varT typeParameterName) (varE fieldName)
pure
( NewtypeD [] name [PlainTV typeParameterName] Nothing con [deriveGN, deriveStockClause, deriveAnyClassClause]
: hasFieldInstance)
ProductType (DatatypeName datatypeName) named children fields -> do
con <- ctorForProductType datatypeName typeParameterName children fields
result <- symbolMatchingInstance allSymbols name named datatypeName
pure $ generatedDatatype name [con] typeParameterName:result
-- Anonymous leaf types are defined as synonyms for the `Token` datatype
LeafType (DatatypeName datatypeName) Anonymous -> do
tsSymbol <- runIO $ withCStringLen datatypeName (\(s, len) -> TS.ts_language_symbol_for_name language s len False)
pure [ TySynD name [] (ConT ''Token `AppT` LitT (StrTyLit datatypeName) `AppT` LitT (NumTyLit (fromIntegral tsSymbol))) ]
LeafType (DatatypeName datatypeName) Named -> do
con <- ctorForLeafType (DatatypeName datatypeName) typeParameterName
result <- symbolMatchingInstance allSymbols name Named datatypeName
pure $ generatedDatatype name [con] typeParameterName:result
where
-- Skip generating datatypes that have already been defined (overridden) in the module where the splice is running.
skipDefined m = do
isLocal <- lookupTypeName nameStr >>= maybe (pure False) isLocalName
if isLocal then pure [] else m
name = mkName nameStr
nameStr = toNameString (datatypeNameStatus datatype) (getDatatypeName (AST.Deserialize.datatypeName datatype))
deriveStockClause = DerivClause (Just StockStrategy) [ ConT ''Eq, ConT ''Ord, ConT ''Show, ConT ''Generic, ConT ''Foldable, ConT ''Functor, ConT ''Traversable, ConT ''Generic1]
deriveAnyClassClause = DerivClause (Just AnyclassStrategy) [ConT ''TS.Unmarshal]
deriveGN = DerivClause (Just NewtypeStrategy) [ConT ''TS.SymbolMatching]
generatedDatatype name cons typeParameterName = DataD [] name [PlainTV typeParameterName] Nothing cons [deriveStockClause, deriveAnyClassClause]
makeHasFieldInstance :: TypeQ -> TypeQ -> ExpQ -> Q [Dec]
makeHasFieldInstance ty param elim =
[d|instance HasField "ann" $(ty `appT` param) $param where
getField = TS.gann . $elim |]
-- | Create TH-generated SymbolMatching instances for sums, products, leaves
symbolMatchingInstance :: [(String, Named)] -> Name -> Named -> String -> Q [Dec]
symbolMatchingInstance allSymbols name named str = do
let tsSymbols = elemIndices (str, named) allSymbols
names = intercalate ", " $ fmap (debugPrefix . (!!) allSymbols) tsSymbols
[d|instance TS.SymbolMatching $(conT name) where
matchedSymbols _ = tsSymbols
showFailure _ node = "expected " <> $(litE (stringL names))
<> " but got " <> if nodeSymbol node == 65535 then "ERROR" else genericIndex debugSymbolNames (nodeSymbol node)
<> " [" <> show r1 <> ", " <> show c1 <> "] -"
<> " [" <> show r2 <> ", " <> show c2 <> "]"
where TSPoint r1 c1 = nodeStartPoint node
TSPoint r2 c2 = nodeEndPoint node|]
-- | Prefix symbol names for debugging to disambiguate between Named and Anonymous nodes.
debugPrefix :: (String, Named) -> String
debugPrefix (name, Named) = name
debugPrefix (name, Anonymous) = "_" <> name
-- | Build Q Constructor for product types (nodes with fields)
ctorForProductType :: String -> Name -> Maybe Children -> [(String, Field)] -> Q Con
ctorForProductType constructorName typeParameterName children fields = ctorForTypes constructorName lists where
lists = annotation : fieldList ++ childList
annotation = ("ann", varT typeParameterName)
fieldList = map (fmap toType) fields
childList = toList $ fmap toTypeChild children
toType (MkField required fieldTypes mult) =
let ftypes = fieldTypesToNestedSum fieldTypes `appT` varT typeParameterName
in case (required, mult) of
(Required, Multiple) -> appT (conT ''NonEmpty) ftypes
(Required, Single) -> ftypes
(Optional, Multiple) -> appT (conT ''[]) ftypes
(Optional, Single) -> appT (conT ''Maybe) ftypes
toTypeChild (MkChildren field) = ("extra_children", toType field)
-- | Build Q Constructor for leaf types (nodes with no fields or subtypes)
ctorForLeafType :: DatatypeName -> Name -> Q Con
ctorForLeafType (DatatypeName name) typeParameterName = ctorForTypes name
[ ("ann", varT typeParameterName) -- ann :: a
, ("text", conT ''Text) -- text :: Text
]
-- | Build Q Constructor for records
ctorForTypes :: String -> [(String, Q TH.Type)] -> Q Con
ctorForTypes constructorName types = recC (toName Named constructorName) recordFields where
recordFields = map (uncurry toVarBangType) types
toVarBangType str type' = TH.varBangType (mkName . toHaskellCamelCaseIdentifier $ str) (TH.bangType strictness type')
-- | Convert field types to Q types
fieldTypesToNestedSum :: NonEmpty AST.Deserialize.Type -> Q TH.Type
fieldTypesToNestedSum xs = go (toList xs)
where
combine lhs rhs = (conT ''(:+:) `appT` lhs) `appT` rhs -- (((((a :+: b) :+: c) :+: d)) :+: e) ((a :+: b) :+: (c :+: d))
convertToQType (MkType (DatatypeName n) named) = conT (toName named n)
go [x] = convertToQType x
go xs = let (l,r) = splitAt (length xs `div` 2) xs in combine (go l) (go r)
-- | Create bang required to build records
strictness :: BangQ
strictness = TH.bang noSourceUnpackedness noSourceStrictness
-- | Prepend "Anonymous" to named node when false, otherwise use regular toName
toName :: Named -> String -> Name
toName named str = mkName (toNameString named str)
toNameString :: Named -> String -> String
toNameString named str = prefix named <> toHaskellPascalCaseIdentifier str
where
prefix Anonymous = "Anonymous"
prefix Named = ""
-- | Get the 'Module', if any, for a given 'Name'.
moduleForName :: Name -> Maybe Module
moduleForName n = Module . PkgName <$> namePackage n <*> (ModName <$> nameModule n)
-- | Test whether the name is defined in the module where the splice is executed.
isLocalName :: Name -> Q Bool
isLocalName n = (moduleForName n ==) . Just <$> thisModule

View File

@ -0,0 +1,83 @@
{-# LANGUAGE DataKinds, DeriveAnyClass, DeriveGeneric, DuplicateRecordFields, TypeOperators #-}
module AST.Grammar.Examples () where
import Control.Effect.Reader
import Control.Monad.Fail
import qualified Data.ByteString as B
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import GHC.Generics ((:+:), Generic1)
import Numeric (readDec)
import Prelude hiding (fail)
import Source.Range
import AST.Token
import AST.Unmarshal
-- | An example of a sum-of-products datatype.
newtype Expr a = Expr ((If :+: Block :+: Var :+: Lit :+: Bin) a)
deriving (Generic1, Unmarshal)
instance SymbolMatching Expr where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Product with multiple fields.
data If a = If { ann :: a, condition :: Expr a, consequence :: Expr a, alternative :: Maybe (Expr a) }
deriving (Generic1, Unmarshal)
instance SymbolMatching If where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Single-field product.
data Block a = Block { ann :: a, body :: [Expr a] }
deriving (Generic1, Unmarshal)
instance SymbolMatching Block where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Leaf node.
data Var a = Var { ann :: a, text :: Text.Text }
deriving (Generic1, Unmarshal)
instance SymbolMatching Var where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Custom leaf node.
data Lit a = Lit { ann :: a, lit :: IntegerLit }
deriving (Generic1, Unmarshal)
instance SymbolMatching Lit where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Product with anonymous sum field.
data Bin a = Bin { ann :: a, lhs :: Expr a, op :: (AnonPlus :+: AnonTimes) a, rhs :: Expr a }
deriving (Generic1, Unmarshal)
instance SymbolMatching Bin where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Anonymous leaf node.
type AnonPlus = Token "+" 0
-- | Anonymous leaf node.
type AnonTimes = Token "*" 1
newtype IntegerLit = IntegerLit Integer
instance UnmarshalAnn IntegerLit where
unmarshalAnn node = do
Range start end <- unmarshalAnn node
bytestring <- asks source
let drop = B.drop start
take = B.take (end - start)
slice = take . drop
str = Text.unpack (Text.decodeUtf8 (slice bytestring))
case readDec str of
(i, _):_ -> pure (IntegerLit i)
_ -> fail ("could not parse '" <> str <> "'")

View File

@ -0,0 +1,33 @@
{-# LANGUAGE TemplateHaskell #-}
module AST.Grammar.TH
( mkStaticallyKnownRuleGrammarData
) where
import Data.Ix (Ix)
import Data.List (mapAccumL)
import qualified Data.Set as Set
import Foreign.Ptr
import Language.Haskell.TH
import Language.Haskell.TH.Syntax
import TreeSitter.Symbol
import TreeSitter.Language (Language, languageSymbols)
-- | TemplateHaskell construction of a datatype for the referenced Language.
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData :: Name -> Ptr Language -> Q [Dec]
mkStaticallyKnownRuleGrammarData name language = do
symbols <- renameDups . map ((,) . fst <*> uncurry symbolToName) . (++ [(Regular, "ParseError")]) <$> runIO (languageSymbols language)
Module _ modName <- thisModule
let mkMatch symbolType str = match (conP (Name (OccName str) (NameQ modName)) []) (normalB [e|symbolType|]) []
datatype <- dataD (pure []) name [] Nothing (flip normalC [] . mkName . snd <$> symbols)
[ derivClause Nothing (map conT [ ''Bounded, ''Enum, ''Eq, ''Ix, ''Ord, ''Show ]) ]
symbolInstance <- [d|
instance Symbol $(conT name) where
symbolType = $(lamCaseE (uncurry mkMatch <$> symbols)) |]
pure (datatype : symbolInstance)
renameDups :: [(a, String)] -> [(a, String)]
renameDups = snd . mapAccumL go mempty
where go done (ty, name) = let name' = rename name in (Set.insert name' done, (ty, name'))
where rename name | name `Set.member` done = rename (name ++ "'")
| otherwise = name

View File

@ -0,0 +1,17 @@
{-# LANGUAGE DataKinds, DeriveGeneric, DeriveTraversable, KindSignatures #-}
module AST.Token
( Token(..)
) where
import GHC.Generics (Generic, Generic1)
import GHC.TypeLits (Symbol, Nat)
-- | An AST node representing a token, indexed by its name and numeric value.
--
-- For convenience, token types are typically used via type synonyms, e.g.:
--
-- @
-- type AnonymousPlus = Token "+" 123
-- @
newtype Token (symName :: Symbol) (symVal :: Nat) a = Token { ann :: a }
deriving (Eq, Foldable, Functor, Generic, Generic1, Ord, Show, Traversable)

View File

@ -0,0 +1,405 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module AST.Unmarshal
( parseByteString
, UnmarshalState(..)
, UnmarshalError(..)
, FieldName(..)
, Unmarshal(..)
, UnmarshalAnn(..)
, UnmarshalField(..)
, SymbolMatching(..)
, Match(..)
, hoist
, lookupSymbol
, unmarshalNode
, GHasAnn(..)
) where
import Control.Algebra (send)
import Control.Carrier.Reader hiding (asks)
import Control.Exception
import Control.Monad.IO.Class
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.Coerce
import Data.Foldable (toList)
import qualified Data.IntMap as IntMap
import Data.List.NonEmpty (NonEmpty (..))
import Data.Proxy
import qualified Data.Text as Text
import Data.Text.Encoding
import Data.Text.Encoding.Error (lenientDecode)
import Foreign.C.String
import Foreign.Marshal.Array
import Foreign.Marshal.Utils
import Foreign.Ptr
import Foreign.Storable
import GHC.Generics
import GHC.Records
import GHC.TypeLits
import Source.Loc
import Source.Span
import TreeSitter.Cursor as TS
import TreeSitter.Language as TS
import TreeSitter.Node as TS
import TreeSitter.Parser as TS
import AST.Token as TS
import TreeSitter.Tree as TS
asks :: Has (Reader r) sig m => (r -> r') -> m r'
asks f = send (Ask (pure . f))
{-# INLINE asks #-}
-- Parse source code and produce AST
parseByteString :: (Unmarshal t, UnmarshalAnn a) => Ptr TS.Language -> ByteString -> IO (Either String (t a))
parseByteString language bytestring = withParser language $ \ parser -> withParseTree parser bytestring $ \ treePtr ->
if treePtr == nullPtr then
pure (Left "error: didn't get a root node")
else
withRootNode treePtr $ \ rootPtr ->
withCursor (castPtr rootPtr) $ \ cursor ->
(Right <$> runReader (UnmarshalState bytestring cursor) (liftIO (peek rootPtr) >>= unmarshalNode))
`catch` (pure . Left . getUnmarshalError)
newtype UnmarshalError = UnmarshalError { getUnmarshalError :: String }
deriving (Show)
instance Exception UnmarshalError
data UnmarshalState = UnmarshalState
{ source :: {-# UNPACK #-} !ByteString
, cursor :: {-# UNPACK #-} !(Ptr Cursor)
}
type MatchM = ReaderC UnmarshalState IO
newtype Match t = Match
{ runMatch :: forall a . UnmarshalAnn a => Node -> MatchM (t a)
}
-- | A church-encoded binary tree with constant-time 'singleton', 'mempty', '<>', and 'fmap', and linear-time 'foldMap'.
newtype B a = B (forall r . (r -> r -> r) -> (a -> r) -> r -> r)
instance Functor B where
fmap f (B run) = B (\ fork leaf -> run fork (leaf . f))
{-# INLINE fmap #-}
a <$ B run = B (\ fork leaf -> run fork (leaf . const a))
{-# INLINE (<$) #-}
instance Semigroup (B a) where
B l <> B r = B (\ fork leaf nil -> fork (l fork leaf nil) (r fork leaf nil))
{-# INLINE (<>) #-}
instance Monoid (B a) where
mempty = B (\ _ _ nil -> nil)
{-# INLINE mempty #-}
instance Foldable B where
foldMap f (B run) = run (<>) f mempty
{-# INLINE foldMap #-}
singleton :: a -> B a
singleton a = B (\ _ leaf _ -> leaf a)
{-# INLINE singleton #-}
hoist :: (forall x . t x -> t' x) -> Match t -> Match t'
hoist f (Match run) = Match (fmap f . run)
{-# INLINE hoist #-}
lookupSymbol :: TSSymbol -> IntMap.IntMap a -> Maybe a
lookupSymbol sym map = IntMap.lookup (fromIntegral sym) map
{-# INLINE lookupSymbol #-}
-- | Unmarshal a node
unmarshalNode :: forall t a .
( UnmarshalAnn a
, Unmarshal t
)
=> Node
-> MatchM (t a)
unmarshalNode node = case lookupSymbol (nodeSymbol node) matchers' of
Just t -> runMatch t node
Nothing -> liftIO . throwIO . UnmarshalError $ showFailure (Proxy @t) node
{-# INLINE unmarshalNode #-}
-- | Unmarshalling is the process of iterating over tree-sitters parse trees using its tree cursor API and producing Haskell ASTs for the relevant nodes.
--
-- Datatypes which can be constructed from tree-sitter parse trees may use the default definition of 'matchers' providing that they have a suitable 'Generic1' instance.
class SymbolMatching t => Unmarshal t where
matchers' :: IntMap.IntMap (Match t)
matchers' = IntMap.fromList (toList matchers)
matchers :: B (Int, Match t)
default matchers :: (Generic1 t, GUnmarshal (Rep1 t)) => B (Int, Match t)
matchers = foldMap (singleton . (, match)) (matchedSymbols (Proxy @t))
where match = Match $ \ node -> do
cursor <- asks cursor
goto cursor (nodeTSNode node)
fmap to1 (gunmarshalNode node)
instance (Unmarshal f, Unmarshal g) => Unmarshal (f :+: g) where
matchers = fmap (fmap (hoist L1)) matchers <> fmap (fmap (hoist R1)) matchers
instance Unmarshal t => Unmarshal (Rec1 t) where
matchers = coerce (matchers @t)
instance (KnownNat n, KnownSymbol sym) => Unmarshal (Token sym n) where
matchers = singleton (fromIntegral (natVal (Proxy @n)), Match (fmap Token . unmarshalAnn))
-- | Unmarshal an annotation field.
--
-- Leaf nodes have 'Text.Text' fields, and leaves, anonymous leaves, and products all have parametric annotation fields. All of these fields are unmarshalled using the metadata of the node, e.g. its start/end bytes, without reference to any child nodes it may contain.
class UnmarshalAnn a where
unmarshalAnn
:: Node
-> MatchM a
instance UnmarshalAnn () where
unmarshalAnn _ = pure ()
instance UnmarshalAnn Text.Text where
unmarshalAnn node = do
range <- unmarshalAnn node
asks (decodeUtf8With lenientDecode . slice range . source)
-- | Instance for pairs of annotations
instance (UnmarshalAnn a, UnmarshalAnn b) => UnmarshalAnn (a,b) where
unmarshalAnn node = (,)
<$> unmarshalAnn @a node
<*> unmarshalAnn @b node
instance UnmarshalAnn Loc where
unmarshalAnn node = Loc
<$> unmarshalAnn @Range node
<*> unmarshalAnn @Span node
instance UnmarshalAnn Range where
unmarshalAnn node = do
let start = fromIntegral (nodeStartByte node)
end = fromIntegral (nodeEndByte node)
pure (Range start end)
instance UnmarshalAnn Span where
unmarshalAnn node = do
let spanStart = pointToPos (nodeStartPoint node)
spanEnd = pointToPos (nodeEndPoint node)
pure (Span spanStart spanEnd)
pointToPos :: TSPoint -> Pos
pointToPos (TSPoint line column) = Pos (fromIntegral line) (fromIntegral column)
-- | Optional/repeated fields occurring in product datatypes are wrapped in type constructors, e.g. 'Maybe', '[]', or 'NonEmpty', and thus can unmarshal zero or more nodes for the same field name.
class UnmarshalField t where
unmarshalField
:: ( Unmarshal f
, UnmarshalAnn a
)
=> String -- ^ datatype name
-> String -- ^ field name
-> [Node] -- ^ nodes
-> MatchM (t (f a))
instance UnmarshalField Maybe where
unmarshalField _ _ [] = pure Nothing
unmarshalField _ _ [x] = Just <$> unmarshalNode x
unmarshalField d f _ = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected zero or one nodes in field '" <> f <> "' but got multiple"
instance UnmarshalField [] where
unmarshalField d f (x:xs) = do
head' <- unmarshalNode x
tail' <- unmarshalField d f xs
pure $ head' : tail'
unmarshalField _ _ [] = pure []
instance UnmarshalField NonEmpty where
unmarshalField d f (x:xs) = do
head' <- unmarshalNode x
tail' <- unmarshalField d f xs
pure $ head' :| tail'
unmarshalField d f [] = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected one or more nodes in field '" <> f <> "' but got zero"
class SymbolMatching (a :: * -> *) where
matchedSymbols :: Proxy a -> [Int]
-- | Provide error message describing the node symbol vs. the symbols this can match
showFailure :: Proxy a -> Node -> String
instance SymbolMatching f => SymbolMatching (M1 i c f) where
matchedSymbols _ = matchedSymbols (Proxy @f)
showFailure _ = showFailure (Proxy @f)
instance SymbolMatching f => SymbolMatching (Rec1 f) where
matchedSymbols _ = matchedSymbols (Proxy @f)
showFailure _ = showFailure (Proxy @f)
instance (KnownNat n, KnownSymbol sym) => SymbolMatching (Token sym n) where
matchedSymbols _ = [fromIntegral (natVal (Proxy @n))]
showFailure _ _ = "expected " ++ symbolVal (Proxy @sym)
instance (SymbolMatching f, SymbolMatching g) => SymbolMatching (f :+: g) where
matchedSymbols _ = matchedSymbols (Proxy @f) <> matchedSymbols (Proxy @g)
showFailure _ = sep <$> showFailure (Proxy @f) <*> showFailure (Proxy @g)
sep :: String -> String -> String
sep a b = a ++ ". " ++ b
-- | Move the cursor to point at the passed 'TSNode'.
goto :: Ptr Cursor -> TSNode -> MatchM ()
goto cursor node = liftIO (with node (ts_tree_cursor_reset_p cursor))
type Fields = [(FieldName, Node)]
-- | Return the fields remaining in the current branch, represented as 'Map.Map' of 'FieldName's to their corresponding 'Node's.
getFields :: Ptr Cursor -> Node -> MatchM Fields
getFields cursor node
| maxCount == 0 = pure []
| otherwise = do
nodes <- liftIO . allocaArray maxCount $ \ ptr -> do
actualCount <- ts_tree_cursor_copy_child_nodes cursor ptr
peekArray (fromIntegral actualCount) ptr
traverse (\ node -> (, node) <$> getFieldName node) nodes
where
maxCount = fromIntegral (nodeChildCount node)
getFieldName node
| nodeFieldName node == nullPtr = pure (FieldName "extraChildren")
| otherwise = FieldName . toHaskellCamelCaseIdentifier <$> liftIO (peekCString (nodeFieldName node))
lookupField :: FieldName -> Fields -> [Node]
lookupField k = map snd . filter ((== k) . fst)
-- | Return a 'ByteString' that contains a slice of the given 'ByteString'.
slice :: Range -> ByteString -> ByteString
slice (Range start end) = take . drop
where drop = B.drop start
take = B.take (end - start)
newtype FieldName = FieldName { getFieldName :: String }
deriving (Eq, Ord, Show)
-- | Generic construction of ASTs from a 'Map.Map' of named fields.
--
-- Product types (specifically, record types) are constructed by looking up the node for each corresponding field name in the map, moving the cursor to it, and then invoking 'unmarshalNode' to construct the value for that field. Leaf types are constructed as a special case of product types.
--
-- Sum types are constructed by using the current nodes symbol to select the corresponding constructor deterministically.
class GUnmarshal f where
gunmarshalNode
:: UnmarshalAnn a
=> Node
-> MatchM (f a)
instance (Datatype d, GUnmarshalData f) => GUnmarshal (M1 D d f) where
gunmarshalNode = go (gunmarshalNode' (datatypeName @d undefined)) where
go :: (Node -> MatchM (f a)) -> Node -> MatchM (M1 i c f a)
go = coerce
class GUnmarshalData f where
gunmarshalNode'
:: UnmarshalAnn a
=> String
-> Node
-> MatchM (f a)
instance GUnmarshalData f => GUnmarshalData (M1 i c f) where
gunmarshalNode' = go gunmarshalNode' where
go :: (String -> Node -> MatchM (f a)) -> String -> Node -> MatchM (M1 i c f a)
go = coerce
-- For anonymous leaf nodes:
instance GUnmarshalData U1 where
gunmarshalNode' _ _ = pure U1
-- For unary products:
instance UnmarshalAnn k => GUnmarshalData (K1 c k) where
gunmarshalNode' _ = go unmarshalAnn where
go :: (Node -> MatchM k) -> Node -> MatchM (K1 c k a)
go = coerce
-- For anonymous leaf nodes
instance GUnmarshalData Par1 where
gunmarshalNode' _ = go unmarshalAnn where
go :: (Node -> MatchM a) -> Node -> MatchM (Par1 a)
go = coerce
instance Unmarshal t => GUnmarshalData (Rec1 t) where
gunmarshalNode' _ = go unmarshalNode where
go :: (Node -> MatchM (t a)) -> Node -> MatchM (Rec1 t a)
go = coerce
-- For product datatypes:
instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalData (f :*: g) where
gunmarshalNode' datatypeName node = asks cursor >>= flip getFields node >>= gunmarshalProductNode @(f :*: g) datatypeName node
-- | Generically unmarshal products
class GUnmarshalProduct f where
gunmarshalProductNode
:: UnmarshalAnn a
=> String
-> Node
-> Fields
-> MatchM (f a)
-- Product structure
instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalProduct (f :*: g) where
gunmarshalProductNode datatypeName node fields = (:*:)
<$> gunmarshalProductNode @f datatypeName node fields
<*> gunmarshalProductNode @g datatypeName node fields
-- Contents of product types (ie., the leaves of the product tree)
instance UnmarshalAnn k => GUnmarshalProduct (M1 S c (K1 i k)) where
gunmarshalProductNode _ node _ = go unmarshalAnn node where
go :: (Node -> MatchM k) -> Node -> MatchM (M1 S c (K1 i k) a)
go = coerce
instance GUnmarshalProduct (M1 S c Par1) where
gunmarshalProductNode _ node _ = go unmarshalAnn node where
go :: (Node -> MatchM a) -> Node -> MatchM (M1 S c Par1 a)
go = coerce
instance (UnmarshalField f, Unmarshal g, Selector c) => GUnmarshalProduct (M1 S c (f :.: g)) where
gunmarshalProductNode datatypeName _ = go (unmarshalField datatypeName fieldName . lookupField (FieldName fieldName)) where
go :: (Fields -> MatchM (f (g a))) -> Fields -> MatchM (M1 S c (f :.: g) a)
go = coerce
fieldName = selName @c undefined
instance (Unmarshal t, Selector c) => GUnmarshalProduct (M1 S c (Rec1 t)) where
gunmarshalProductNode datatypeName _ fields =
case lookupField (FieldName fieldName) fields of
[] -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node '" <> fieldName <> "' but didn't get one"
[x] -> go unmarshalNode x where
go :: (Node -> MatchM (t a)) -> Node -> MatchM (M1 S c (Rec1 t) a)
go = coerce
_ -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node but got multiple"
where
fieldName = selName @c undefined
class GHasAnn a t where
gann :: t a -> a
instance GHasAnn a f => GHasAnn a (M1 i c f) where
gann = gann . unM1
instance (GHasAnn a l, GHasAnn a r) => GHasAnn a (l :+: r) where
gann (L1 l) = gann l
gann (R1 r) = gann r
instance {-# OVERLAPPABLE #-} HasField "ann" (t a) a => GHasAnn a t where
gann = getField @"ann"

View File

@ -24,9 +24,11 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-go ^>= 0.4.1
@ -50,5 +52,7 @@ library
import: haskell
exposed-modules:
Language.Go
Language.Go.AST
Language.Go.Grammar
Language.Go.Tags
hs-source-dirs: src

View File

@ -6,11 +6,11 @@ module Language.Go
import Data.Proxy
import qualified Language.Go.AST as Go
import qualified Language.Go.Tags as GoTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Go (tree_sitter_go)
import qualified TreeSitter.Go.AST as Go
import qualified TreeSitter.Unmarshal as TS
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Go.SourceFile a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Go.AST
( module Language.Go.AST
) where
import Prelude hiding (False, Float, Integer, Rational, String, True)
import AST.GenerateSyntax
import qualified Language.Go.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_go "../../../vendor/tree-sitter-go/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Go.Grammar
( tree_sitter_go
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Go (tree_sitter_go)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-go/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_go

View File

@ -13,12 +13,12 @@ import Control.Effect.Reader
import Control.Effect.Writer
import Data.Text as Text
import GHC.Generics
import qualified Language.Go.AST as Go
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Go.AST as Go
import TreeSitter.Token
import AST.Token
class ToTags t where
tags

1
semantic-go/vendor/tree-sitter-go vendored Submodule

@ -0,0 +1 @@
Subproject commit 689cc8fbdc0613d267434f221af85aff91a31f8c

View File

@ -21,12 +21,16 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Language.Java
Language.Java.AST
Language.Java.Grammar
Language.Java.Tags
build-depends:
base >= 4.13 && < 5
, fused-effects ^>= 1.0
, semantic-codegen
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, tree-sitter ^>= 0.8
, tree-sitter-java ^>= 0.6.1
hs-source-dirs: src
@ -43,3 +47,4 @@ library
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
-Wno-missing-deriving-strategies

View File

@ -1,15 +1,15 @@
-- | Semantic functionality for Java programs.
module Language.Java
( Term(..)
, TreeSitter.Java.tree_sitter_java
, Language.Java.Grammar.tree_sitter_java
) where
import Data.Proxy
import qualified Language.Java.AST as Java
import qualified Language.Java.Tags as JavaTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Java (tree_sitter_java)
import qualified TreeSitter.Java.AST as Java
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Java.Grammar (tree_sitter_java)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Java.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Java.AST
( module Language.Java.AST
) where
import AST.GenerateSyntax
import qualified Language.Java.Grammar as Grammar
import AST.Token
astDeclarationsForLanguage Grammar.tree_sitter_java "../../../vendor/tree-sitter-java/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Java.Grammar
( tree_sitter_java
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Java (tree_sitter_java)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-java/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_java

View File

@ -11,13 +11,13 @@ module Language.Java.Tags
import Control.Effect.Reader
import Control.Effect.Writer
import GHC.Generics
import qualified Language.Java.AST as Java
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Java.AST as Java
import TreeSitter.Token
import AST.Token
class ToTags t where
tags
@ -140,7 +140,7 @@ instance ToTags Java.FieldAccess
instance ToTags Java.FieldDeclaration
instance ToTags Java.FinallyClause
instance ToTags Java.FloatingPointType
instance ToTags Java.ForInit
-- instance ToTags Java.ForInit
instance ToTags Java.ForStatement
instance ToTags Java.FormalParameter
instance ToTags Java.FormalParameters
@ -160,7 +160,7 @@ instance ToTags Java.LabeledStatement
instance ToTags Java.LambdaExpression
instance ToTags Java.Literal
instance ToTags Java.LocalVariableDeclaration
instance ToTags Java.LocalVariableDeclarationStatement
-- instance ToTags Java.LocalVariableDeclarationStatement
instance ToTags Java.MarkerAnnotation
-- instance ToTags Java.MethodDeclaration
-- instance ToTags Java.MethodInvocation

@ -0,0 +1 @@
Subproject commit afc4cec799f6594390aeb0ca5e16ec89e73d488e

View File

@ -21,9 +21,13 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Language.JSON
Language.JSON.AST
Language.JSON.Grammar
build-depends:
base >= 4.13 && < 5
, semantic-codegen
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, tree-sitter ^>= 0.8
, tree-sitter-json ^>= 0.6
hs-source-dirs: src
@ -40,3 +44,5 @@ library
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies

View File

@ -1,14 +1,14 @@
-- | Semantic functionality for JSON programs.
module Language.JSON
( Term(..)
, TreeSitter.JSON.tree_sitter_json
, Language.JSON.Grammar.tree_sitter_json
) where
import Data.Proxy
import qualified Language.JSON.AST as JSON
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.JSON (tree_sitter_json)
import qualified TreeSitter.JSON.AST as JSON
import qualified TreeSitter.Unmarshal as TS
import qualified Language.JSON.Grammar (tree_sitter_json)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: JSON.Document a }

View File

@ -0,0 +1,20 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.JSON.AST
( module Language.JSON.AST
) where
import Prelude hiding (String)
import AST.GenerateSyntax
import qualified Language.JSON.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_json "../../../vendor/tree-sitter-json/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.JSON.Grammar
( tree_sitter_json
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.JSON (tree_sitter_json)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-json/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_json

@ -0,0 +1 @@
Subproject commit 7b6a33f300e3e88c3017e0a9d88c77b50ea6d149

View File

@ -26,10 +26,12 @@ common haskell
, parsers ^>= 0.12.10
, semantic-analysis ^>= 0
, semantic-core ^>= 0.0
, semantic-codegen
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, semantic-scope-graph ^>= 0.0
, semilattices ^>= 0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-python ^>= 0.8.1
@ -54,7 +56,9 @@ library
import: haskell
exposed-modules:
Language.Python
Language.Python.AST
Language.Python.Core
Language.Python.Grammar
Language.Python.Failure
Language.Python.Patterns
Language.Python.ScopeGraph
@ -80,6 +84,7 @@ test-suite compiling
, process ^>= 1.6.5
, resourcet ^>= 1.2.2
, semantic-analysis ^>= 0
, semantic-codegen
, streaming ^>= 0.2.2
, streaming-process ^>= 0.1
, streaming-bytestring ^>= 0.1.6
@ -100,6 +105,7 @@ test-suite graphing
build-depends: base
, semantic-python
, semantic-codegen
, semantic-scope-graph
, bytestring
, pathtype

View File

@ -1,17 +1,17 @@
-- | Semantic functionality for Python programs.
module Language.Python
( Term(..)
, TreeSitter.Python.tree_sitter_python
, Language.Python.Grammar.tree_sitter_python
) where
import Data.Proxy
import qualified Language.Python.AST as Py
import Language.Python.ScopeGraph
import qualified Language.Python.Tags as PyTags
import ScopeGraph.Convert
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python (tree_sitter_python)
import qualified TreeSitter.Python.AST as Py
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Python.Grammar (tree_sitter_python)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Py.Module a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Python.AST
( module Language.Python.AST
) where
import Prelude hiding (False, Float, Integer, String, True)
import AST.GenerateSyntax
import qualified Language.Python.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_python "../../../vendor/tree-sitter-python/src/node-types.json"

View File

@ -33,12 +33,12 @@ import Data.Function
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe
import GHC.Records
import qualified Language.Python.AST as Py
import Language.Python.Failure
import Language.Python.Patterns
import Source.Span (Span)
import Syntax.Stack (Stack (..))
import qualified Syntax.Stack as Stack
import qualified TreeSitter.Python.AST as Py
-- | Keeps track of the current scope's bindings (so that we can, when
-- compiling a class or module, return the list of bound variables as

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Python.Grammar
( tree_sitter_python
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Python (tree_sitter_python)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-python/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_python

View File

@ -10,7 +10,7 @@ module Language.Python.Patterns
import AST.Element
import qualified Analysis.Name
import qualified TreeSitter.Python.AST as Py
import qualified Language.Python.AST as Py
-- | Useful pattern synonym for extracting a single identifier from
-- a Python ExpressionList. Easier than pattern-matching every time.

View File

@ -34,10 +34,10 @@ import qualified Data.ScopeGraph as ScopeGraph
import Data.Traversable
import GHC.Records
import GHC.TypeLits
import qualified Language.Python.AST as Py
import Language.Python.Patterns
import ScopeGraph.Convert (Result (..), complete, todo)
import Source.Loc
import qualified TreeSitter.Python.AST as Py
-- This orphan instance will perish once it lands in fused-effects.
instance Algebra sig m => Algebra sig (Ap m) where

View File

@ -16,13 +16,13 @@ import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (listToMaybe)
import Data.Text as Text
import GHC.Generics
import qualified Language.Python.AST as Py
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python.AST as Py
import TreeSitter.Token
import AST.Token
class ToTags t where
tags

View File

@ -24,8 +24,8 @@ import qualified System.Path as Path
import qualified System.Path.Directory as Path
import qualified Test.Tasty as Tasty
import qualified Test.Tasty.HUnit as HUnit
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Python.Grammar as TSP
import qualified AST.Unmarshal as TS
{-

View File

@ -39,8 +39,8 @@ import System.Path ((</>))
import qualified System.Path as Path
import qualified System.Path.Directory as Path
import qualified Text.Trifecta as Trifecta
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Python.Grammar as TSP
import qualified AST.Unmarshal as TS
import qualified Test.Tasty as Tasty
import qualified Test.Tasty.HUnit as HUnit

@ -0,0 +1 @@
Subproject commit 899ac8d5d6c883b2f05362c9953e14e78aac474c

View File

@ -24,9 +24,11 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-ruby ^>= 0.4.1
@ -50,5 +52,7 @@ library
import: haskell
exposed-modules:
Language.Ruby
Language.Ruby.AST
Language.Ruby.Grammar
Language.Ruby.Tags
hs-source-dirs: src

View File

@ -3,17 +3,17 @@
-- | Semantic functionality for Ruby programs.
module Language.Ruby
( Term(..)
, TreeSitter.Ruby.tree_sitter_ruby
, Language.Ruby.Grammar.tree_sitter_ruby
) where
import qualified AST.Unmarshal as TS
import Control.Carrier.State.Strict
import Data.Proxy
import Data.Text (Text)
import qualified Language.Ruby.AST as Rb
import qualified Language.Ruby.Grammar (tree_sitter_ruby)
import qualified Language.Ruby.Tags as RbTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Ruby (tree_sitter_ruby)
import qualified TreeSitter.Ruby.AST as Rb
import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Rb.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Ruby.AST
( module Language.Ruby.AST
) where
import Prelude hiding (False, Float, Integer, Rational, String, True)
import AST.GenerateSyntax
import qualified Language.Ruby.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_ruby "../../../vendor/tree-sitter-ruby/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Ruby.Grammar
( tree_sitter_ruby
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Ruby (tree_sitter_ruby)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-ruby/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_ruby

View File

@ -19,14 +19,14 @@ import Control.Monad
import Data.Foldable
import Data.Text as Text
import GHC.Generics
import qualified Language.Ruby.AST as Rb
import Source.Loc
import Source.Range as Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Ruby.AST as Rb
import TreeSitter.Token
import qualified TreeSitter.Unmarshal as TS
import AST.Token
import qualified AST.Unmarshal as TS
class ToTags t where
tags

@ -0,0 +1 @@
Subproject commit eb2b6225bfb80010f2e4cbd27db8c6f3775230b5

View File

@ -24,9 +24,11 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-tsx ^>= 0.4.2
@ -50,5 +52,6 @@ library
import: haskell
exposed-modules:
Language.TSX
Language.TSX.AST
Language.TSX.Tags
hs-source-dirs: src

View File

@ -6,11 +6,11 @@ module Language.TSX
) where
import Data.Proxy
import qualified Language.TSX.AST as TSX
import qualified Language.TSX.Tags as TsxTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.TSX (tree_sitter_tsx)
import qualified TreeSitter.TSX.AST as TSX
import qualified TreeSitter.Unmarshal as TS
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: TSX.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.TSX.AST
( module Language.TSX.AST
) where
import Prelude hiding (False, Float, Integer, String, True)
import AST.GenerateSyntax
import qualified TreeSitter.TSX as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_tsx "../../../vendor/tree-sitter-typescript/tsx/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.TSX.Grammar
( tree_sitter_tsx
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.TSX (tree_sitter_tsx)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-typescript/tsx/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_tsx

View File

@ -15,12 +15,12 @@ import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
import GHC.Generics
import qualified Language.TSX.AST as Tsx
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import TreeSitter.Token
import qualified TreeSitter.TSX.AST as Tsx
import AST.Token
class ToTags t where
tags

@ -0,0 +1 @@
Subproject commit aa950f58ea8aa112bc72f3481b98fc2d3c07b3e0

View File

@ -24,9 +24,11 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-typescript ^>= 0.4.2
@ -50,5 +52,7 @@ library
import: haskell
exposed-modules:
Language.TypeScript
Language.TypeScript.AST
Language.TypeScript.Grammar
Language.TypeScript.Tags
hs-source-dirs: src

View File

@ -2,15 +2,15 @@
-- | Semantic functionality for TypeScript programs.
module Language.TypeScript
( Term(..)
, TreeSitter.TypeScript.tree_sitter_typescript
, Language.TypeScript.Grammar.tree_sitter_typescript
) where
import Data.Proxy
import qualified Language.TypeScript.AST as TypeScript
import qualified Language.TypeScript.Tags as TsTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.TypeScript (tree_sitter_typescript)
import qualified TreeSitter.TypeScript.AST as TypeScript
import qualified TreeSitter.Unmarshal as TS
import qualified Language.TypeScript.Grammar (tree_sitter_typescript)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: TypeScript.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.TypeScript.AST
( module Language.TypeScript.AST
) where
import Prelude hiding (False, Float, Integer, String, True)
import AST.GenerateSyntax
import qualified Language.TypeScript.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_typescript "../../../vendor/tree-sitter-typescript/typescript/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.TypeScript.Grammar
( tree_sitter_typescript
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.TypeScript (tree_sitter_typescript)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-typescript/typescript/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_typescript

View File

@ -15,12 +15,12 @@ import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
import GHC.Generics
import qualified Language.TypeScript.AST as Ts
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import TreeSitter.Token
import qualified TreeSitter.TypeScript.AST as Ts
import AST.Token
class ToTags t where
tags

@ -0,0 +1 @@
Subproject commit 40320d8e0953db5e173e6dfbb596d375a085ca65

View File

@ -67,6 +67,7 @@ common dependencies
, recursion-schemes ^>= 5.1
, scientific ^>= 0.3.6.2
, safe-exceptions ^>= 0.1.7.0
, semantic-codegen
, semantic-analysis ^>= 0
, semantic-source ^>= 0.0.2
, semilattices ^>= 0.0.0.3
@ -150,7 +151,7 @@ library
, Data.Error
, Data.Flag
, Data.Functor.Classes.Generic
, Data.Graph
, Data.Graph.Algebraic
, Data.Graph.ControlFlowVertex
, Data.Handle
, Data.History
@ -304,6 +305,8 @@ library
, unordered-containers ^>= 0.2.9.0
, vector ^>= 0.12.0.2
, tree-sitter-go ^>= 0.4.1.1
, tree-sitter-java ^>= 0.6.1
, tree-sitter-json ^>= 0.6
, tree-sitter-php ^>= 0.2
, tree-sitter-python ^>= 0.8.1
, tree-sitter-ruby ^>= 0.4.1
@ -358,6 +361,7 @@ test-suite test
, Generators
, Properties
build-depends: semantic
, semantic-json
, tree-sitter-json ^>= 0.6
, Glob ^>= 0.10.0
, hedgehog ^>= 1

View File

@ -34,7 +34,7 @@ import Control.Effect.Sum.Project
import Data.Abstract.BaseError
import Data.Abstract.Module (Module (moduleInfo), ModuleInfo (..))
import Data.ByteString.Builder
import Data.Graph
import Data.Graph.Algebraic
import Data.Graph.ControlFlowVertex
import Data.Map (Map)
import qualified Data.Map as Map

View File

@ -4,7 +4,7 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Data.Graph
module Data.Graph.Algebraic
( Graph(..)
, overlay
, connect

View File

@ -29,7 +29,7 @@ import Data.Abstract.Declarations
import Data.Abstract.Module (ModuleInfo (..))
import Data.Abstract.Package (PackageInfo (..))
import Data.Aeson
import Data.Graph (VertexTag (..))
import Data.Graph.Algebraic (VertexTag (..))
import Data.Hashable
import Data.Proxy
import Data.Quieterm (Quieterm (..))

View File

@ -15,7 +15,7 @@ import Assigning.Assignment hiding (Assignment, Error)
import qualified Assigning.Assignment as Assignment
import Control.Monad
import qualified Data.Abstract.ScopeGraph as ScopeGraph (AccessControl (..))
import Data.ImportPath (defaultAlias, importPath)
import Data.ImportPath ()
import Data.List.NonEmpty (NonEmpty (..), some1)
import Data.Sum
import Data.Syntax
@ -31,7 +31,8 @@ import qualified Data.Term as Term
import Language.Go.Syntax as Go.Syntax hiding (labelName, runeLiteral)
import Language.Go.Term as Go
import Language.Go.Type as Go.Type
import TreeSitter.Go as Grammar
import Data.ImportPath (importPath, defaultAlias)
import Language.Go.Grammar as Grammar
type Assignment = Assignment.Assignment [] Grammar

View File

@ -40,7 +40,7 @@ import qualified Data.Syntax.Statement as Statement
import qualified Data.Syntax.Type as Type
import Language.Python.Syntax as Python.Syntax
import Language.Python.Term as Python
import TreeSitter.Python as Grammar
import Language.Python.Grammar as Grammar
type Assignment = Assignment.Assignment [] Grammar

View File

@ -42,7 +42,7 @@ import qualified Data.Syntax.Statement as Statement
import qualified Data.Text as Text
import qualified Language.Ruby.Syntax as Ruby.Syntax
import Language.Ruby.Term as Ruby
import TreeSitter.Ruby as Grammar
import Language.Ruby.Grammar as Grammar
type Assignment = Assignment.Assignment [] Grammar

View File

@ -43,7 +43,7 @@ import qualified Data.Syntax.Type as Type
import qualified Language.TypeScript.Resolution as TypeScript.Resolution
import qualified Language.TypeScript.Syntax as TypeScript.Syntax
import Language.TypeScript.Term as TypeScript
import TreeSitter.TypeScript as Grammar
import Language.TypeScript.Grammar as Grammar
type Assignment = Assignment.Assignment [] Grammar

View File

@ -66,14 +66,14 @@ import qualified Language.TSX.Assignment as TSXALaCarte
import qualified Language.TypeScript as TypeScriptPrecise
import qualified Language.TypeScript.Assignment as TypeScriptALaCarte
import Prelude hiding (fail)
import TreeSitter.Go
import Language.Go.Grammar
import qualified TreeSitter.Language as TS (Language, Symbol)
import TreeSitter.PHP
import TreeSitter.Python
import TreeSitter.Ruby (tree_sitter_ruby)
import Language.Python.Grammar
import Language.Ruby.Grammar (tree_sitter_ruby)
import TreeSitter.TSX
import TreeSitter.TypeScript
import TreeSitter.Unmarshal
import Language.TypeScript.Grammar
import AST.Unmarshal
-- | A parser from 'Source' onto some term type.
data Parser term where

View File

@ -36,7 +36,7 @@ import qualified TreeSitter.Language as TS
import qualified TreeSitter.Node as TS
import qualified TreeSitter.Parser as TS
import qualified TreeSitter.Tree as TS
import qualified TreeSitter.Unmarshal as TS
import qualified AST.Unmarshal as TS
data TSParseException
= ParserTimedOut

View File

@ -20,7 +20,7 @@ import Data.Diff
import Data.Edit
import Data.Foldable
import Data.Functor.Foldable
import Data.Graph
import Data.Graph.Algebraic
import Data.ProtoLens (defMessage)
import Data.String (IsString (..))
import Data.Term

View File

@ -28,7 +28,7 @@ import Data.Diff
import Data.Edit
import Data.Foldable
import Data.Functor.Classes
import Data.Graph
import Data.Graph.Algebraic
import Data.JSON.Fields (ToJSONFields1)
import Data.Language
import Data.Map.Strict (Map)

View File

@ -29,7 +29,7 @@ import Data.Either
import Data.Foldable
import Data.Functor.Classes
import Data.Functor.Foldable
import Data.Graph
import Data.Graph.Algebraic (Edge(..), vertexList, edgeList)
import Data.Language
import Data.Map.Strict (Map)
import Data.ProtoLens (defMessage)

View File

@ -68,7 +68,7 @@ import Data.Abstract.Value.Concrete as Concrete (Value, ValueError (..
import Data.Abstract.Value.Type as Type
import Data.Blob
import Data.Functor.Foldable
import Data.Graph
import Data.Graph.Algebraic
import Data.Graph.ControlFlowVertex (VertexDeclaration)
import Data.Language as Language
import Data.List (find, isPrefixOf)

View File

@ -36,7 +36,7 @@ import qualified Data.Abstract.ModuleTable as ModuleTable
import Data.Abstract.Package
import Data.Abstract.Value.Concrete as Concrete
import Data.Blob.IO
import Data.Graph (topologicalSort)
import Data.Graph.Algebraic (topologicalSort)
import qualified Data.Language as Language
import Data.List (uncons)
import Data.Maybe

View File

@ -1,10 +1,8 @@
{-# LANGUAGE PackageImports #-}
module Data.Graph.Spec (spec) where
import SpecHelpers
import "semantic" Data.Graph
import Data.Graph.Algebraic
import qualified Algebra.Graph.Class as Class
spec :: Spec

View File

@ -1,7 +1,6 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE TypeApplications #-}
module Graphing.Calls.Spec ( spec ) where
@ -13,7 +12,7 @@ import Algebra.Graph
import qualified Analysis.File as File
import Control.Effect.Parse
import "semantic" Data.Graph (Graph (..), topologicalSort)
import Data.Graph.Algebraic (Graph (..), topologicalSort)
import Data.Graph.ControlFlowVertex
import qualified Data.Language as Language
import Semantic.Graph

View File

@ -10,7 +10,7 @@ import Parsing.TreeSitter
import Source.Source
import SpecHelpers
import qualified System.Path as Path
import TreeSitter.JSON (Grammar, tree_sitter_json)
import Language.JSON.Grammar (Grammar, tree_sitter_json)
spec :: Spec
spec = do