diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml index 21c35846c..1129295df 100644 --- a/.github/workflows/haskell.yml +++ b/.github/workflows/haskell.yml @@ -37,7 +37,7 @@ jobs: name: Cache ~/.cabal/store with: path: ~/.cabal/store - key: ${{ runner.os }}-${{ matrix.ghc }}-v5-cabal-store + key: ${{ runner.os }}-${{ matrix.ghc }}-v6-cabal-store - uses: actions/cache@v1 name: Cache dist-newstyle @@ -52,7 +52,7 @@ jobs: - name: Install dependencies run: | - cabal v2-update + script/bootstrap cabal v2-configure --project-file=cabal.project.ci --disable-optimization --enable-benchmarks --enable-tests --write-ghc-environment-files=always -j2 cabal v2-build --project-file=cabal.project.ci all --only-dependencies diff --git a/.gitmodules b/.gitmodules index e69de29bb..bd49d21d1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,21 @@ +[submodule "semantic-json/vendor/tree-sitter-json"] + path = semantic-json/vendor/tree-sitter-json + url = https://github.com/tree-sitter/tree-sitter-json.git +[submodule "semantic-python/vendor/tree-sitter-python"] + path = semantic-python/vendor/tree-sitter-python + url = https://github.com/tree-sitter/tree-sitter-python.git +[submodule "semantic-java/vendor/tree-sitter-java"] + path = semantic-java/vendor/tree-sitter-java + url = https://github.com/tree-sitter/tree-sitter-java.git +[submodule "semantic-go/vendor/tree-sitter-go"] + path = semantic-go/vendor/tree-sitter-go + url = https://github.com/tree-sitter/tree-sitter-go.git +[submodule "semantic-ruby/vendor/tree-sitter-ruby"] + path = semantic-ruby/vendor/tree-sitter-ruby + url = https://github.com/tree-sitter/tree-sitter-ruby.git +[submodule "semantic-typescript/vendor/tree-sitter-typescript"] + path = semantic-typescript/vendor/tree-sitter-typescript + url = https://github.com/tree-sitter/tree-sitter-typescript.git +[submodule "semantic-tsx/vendor/tree-sitter-typescript"] + path = semantic-tsx/vendor/tree-sitter-typescript + url = https://github.com/tree-sitter/tree-sitter-typescript.git diff --git a/bench/Evaluation.hs b/bench/Evaluation.hs index 101da54da..df13b4ff8 100644 --- a/bench/Evaluation.hs +++ b/bench/Evaluation.hs @@ -1,6 +1,5 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE PackageImports #-} {-# LANGUAGE PartialTypeSignatures #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} @@ -12,7 +11,7 @@ import Data.Abstract.Evaluatable import Data.Bifunctor import Data.Blob.IO (readBlobFromPath) import qualified Data.Duration as Duration -import "semantic" Data.Graph (topologicalSort) +import Data.Graph.Algebraic (topologicalSort) import qualified Data.Language as Language import Data.Project import Data.Proxy diff --git a/cabal.project b/cabal.project index 5e020ac53..3bba22f83 100644 --- a/cabal.project +++ b/cabal.project @@ -1,9 +1,10 @@ --- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file). +-- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file). -- Local packages packages: . semantic-analysis semantic-ast + semantic-codegen semantic-core semantic-go semantic-java diff --git a/cabal.project.ci b/cabal.project.ci index 51b00d1da..4ce526d96 100644 --- a/cabal.project.ci +++ b/cabal.project.ci @@ -1,9 +1,10 @@ --- ATTENTION: care must be taken to keep this file in sync with cabal.project. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file). +-- ATTENTION: care must be taken to keep this file in sync with cabal.project and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file). -- Local packages packages: . semantic-analysis semantic-ast + semantic-codegen semantic-core semantic-go semantic-java @@ -43,6 +44,9 @@ package semantic-analysis package semantic-ast ghc-options: -Werror +package semantic-codegen + ghc-options: -Werror + package semantic-core ghc-options: -Werror diff --git a/docs/adding-new-languages.md b/docs/adding-new-languages.md index 9d6bc5e0f..a12b33f08 100644 --- a/docs/adding-new-languages.md +++ b/docs/adding-new-languages.md @@ -9,7 +9,7 @@ Please note that this list of steps reflects the state of Semantic as is, not wh 1. **Find or write a [tree-sitter](https://tree-sitter.github.io) parser for your language.** The tree-sitter [organization page](https://github.com/tree-sitter) has a number of parsers beyond those we currently support in Semantic; look there first to make sure you're not duplicating work. The tree-sitter [documentation on creating parsers](http://tree-sitter.github.io/tree-sitter/creating-parsers) provides an exhaustive look at the process of developing and debugging tree-sitter parsers. Though we do not support grammars written with other toolkits such as [ANTLR](https://www.antlr.org), translating an ANTLR or other BNF-style grammar into a tree-sitter grammar is usually straightforward. 2. **Create a Haskell library providing an interface to that C source.** The [`haskell-tree-sitter`](https://github.com/tree-sitter/haskell-tree-sitter) repository provides a Cabal package for each supported language. You can find an example of a pull request to add such a package here. Each package needs to provide two API surfaces: * a bridged (via the FFI) reference to the toplevel parser in the generated file ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/internal/TreeSitter/JSON/Internal.hs)) - * symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/TreeSitter/JSON.hs)) + * symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/github/semantic/blob/master/semantic-json/src/Language/JSON/Grammar.hs)). For more information, see [CodeGen docs](https://github.com/github/semantic/blob/master/semantic-codegen/README.md). 3. **Identify the new syntax nodes required to represent your language.** While we provide an extensive library of reusable AST nodes for [literals](https://github.com/github/semantic/blob/master/src/Data/Syntax/Literal.hs), [expressions](https://github.com/github/semantic/blob/master/src/Data/Syntax/Expression.hs), [statements](https://github.com/github/semantic/blob/master/src/Data/Syntax/Statement.hs), and [types](https://github.com/github/semantic/blob/master/src/Data/Syntax/Type.hs), most languages will require some syntax nodes not found in other languages. You'll need to create a new module providing those data types, and those data types must be written as an open union: [here](https://github.com/github/semantic/commits/master/src/Language/Ruby/Syntax.hs?author=charliesome) is an example for Ruby's syntactic details. 4. **Write an assignment step that translates tree-sitter trees into Haskell datatypes.** More information about this can be found in the [assignment documentation](assignment.md). This is currently the most time-consuming and error-prone part of the process (see [https://github.com/github/semantic/issues/77]). 5. **Implement `Evaluatable` instances and add new [`Value` effects](https://github.com/github/semantic/blob/master/src/Control/Abstract/Value.hs) as is needed to describe the control flow of your language.** While several features of Semantic (e.g. `semantic parse --symbols` and `semantic diff`) will become fully available given a working assignment step, further features based on concrete or abstract interpretation (such as `semantic graph`) require implementing the `Evaluatable` typeclass and providing value-style effects for each control flow feature provided by the language. This means that language support is a spectrum: Semantic can provide useful information without any knowledge of a language's semantics, but each successive addition to its interpretive capabilities enables more functionality. diff --git a/script/ghci-flags b/script/ghci-flags index e72205d1f..627c9f3e8 100755 --- a/script/ghci-flags +++ b/script/ghci-flags @@ -48,6 +48,7 @@ function flags { # TODO: would be nice to figure this out from cabal.project & the .cabal files echo "-isemantic-analysis/src" echo "-isemantic-ast/src" + echo "-isemantic-codegen/src" echo "-isemantic-core/src" echo "-isemantic-go/src" echo "-isemantic-java/src" diff --git a/semantic-ast/app/Main.hs b/semantic-ast/app/Main.hs index 82ed1da7f..9425b22a5 100644 --- a/semantic-ast/app/Main.hs +++ b/semantic-ast/app/Main.hs @@ -2,9 +2,9 @@ module Main (main) where -import TreeSitter.Unmarshal -import qualified TreeSitter.Python.AST as AST -import qualified TreeSitter.Python as Python +import AST.Unmarshal +import qualified Language.Python.AST as AST +import qualified Language.Python.Grammar as Python import Source.Range import Source.Span import Data.Aeson (toJSON) diff --git a/semantic-ast/semantic-ast.cabal b/semantic-ast/semantic-ast.cabal index 70735ba2f..0702851ca 100644 --- a/semantic-ast/semantic-ast.cabal +++ b/semantic-ast/semantic-ast.cabal @@ -39,19 +39,22 @@ library import: haskell exposed-modules: Marshal.JSON + -- other-modules: -- other-extensions: build-depends: base ^>= 4.13 - , tree-sitter ^>= 0.8 - , semantic-source ^>= 0.0.2 - , tree-sitter-python ^>= 0.8.1 - , bytestring ^>= 0.10.8.2 - , optparse-applicative >= 0.14.3 && < 0.16 - , pretty-simple ^>= 3.1.0.0 , aeson ^>= 1.4.2.0 - , text ^>= 1.2.3.1 - , bytestring ^>= 0.10.8.2 , aeson-pretty ^>= 0.8.8 + , bytestring ^>= 0.10.8.2 + , tree-sitter ^>= 0.8 + , semantic-source ^>= 0.0.2 + , template-haskell ^>= 2.15 + , tree-sitter-python ^>= 0.8.1 + , bytestring ^>= 0.10.8.2 + , optparse-applicative >= 0.14.3 && < 0.16 + , pretty-simple ^>= 3.1.0.0 + , text ^>= 1.2.3.1 + hs-source-dirs: src default-language: Haskell2010 @@ -72,5 +75,7 @@ executable semantic-ast , aeson , bytestring , aeson-pretty + , semantic-python + , text hs-source-dirs: app default-language: Haskell2010 diff --git a/semantic-codegen/CHANGELOG.md b/semantic-codegen/CHANGELOG.md new file mode 100644 index 000000000..f97ded255 --- /dev/null +++ b/semantic-codegen/CHANGELOG.md @@ -0,0 +1,5 @@ +# Revision history for semantic-codegen + +## 0.1.0.0 -- YYYY-mm-dd + +* First version. Released on an unsuspecting world. diff --git a/semantic-codegen/Main.hs b/semantic-codegen/Main.hs new file mode 100644 index 000000000..65ae4a05d --- /dev/null +++ b/semantic-codegen/Main.hs @@ -0,0 +1,4 @@ +module Main where + +main :: IO () +main = putStrLn "Hello, Haskell!" diff --git a/semantic-codegen/README.md b/semantic-codegen/README.md new file mode 100644 index 000000000..18b78a94b --- /dev/null +++ b/semantic-codegen/README.md @@ -0,0 +1,216 @@ +# CodeGen Documentation + +CodeGen is the process for auto-generating language-specific, strongly-typed ASTs to be used in [Semantic](https://github.com/github/semantic-code/blob/d9f91a05dc30a61b9ff8c536d75661d417f3c506/design-docs/precise-code-navigation.md). + +_Note: This project was recently moved from `tree-sitter` into `Semantic`. These docs are in the process of being updated to reflect changes._ + +### Prerequisites +To get started, first make sure your language has: + +1. An existing [tree-sitter](http://tree-sitter.github.io/tree-sitter/) parser; +2. An existing Cabal package in this repository for said language. This will provide an interface into tree-sitter's C source. [Here](https://github.com/tree-sitter/haskell-tree-sitter/tree/master/tree-sitter-python) is an example of a library for Python, a supported language that the remaining documentation will refer to. + +### CodeGen Pipeline + +During parser generation, tree-sitter produces a JSON file that captures the structure of a language's grammar. Based on this, we're able to derive datatypes representing surface languages, and then use those datatypes to generically build ASTs. This automates the engineering effort [historically required for adding a new language](https://github.com/github/semantic/blob/master/docs/adding-new-languages.md). + +The following steps provide a high-level outline of the process: + +1. [**Deserialize.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Deserialize.hs) First, we deserialize the `node-types.json` file for a given language into the desired shape of datatypes via parsing capabilities afforded by the [Aeson](http://hackage.haskell.org/package/aeson) library. There are four distinct types represented in the node-types.json file takes on: sums, products, named leaves and anonymous leaves. +2. [**Generate Syntax.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/GenerateSyntax.hs) We then use Template Haskell to auto-generate language-specific, strongly-typed datatypes that represent various language constructs. This API exports the top-level function `astDeclarationsForLanguage` to auto-generate datatypes at compile-time, which is is invoked by a given language [AST](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/AST.hs) module. +3. [**Unmarshal.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Unmarshal.hs) Unmarshaling is the process of iterating over tree-sitter’s parse trees using its tree cursor API, and producing Haskell ASTs for the relevant nodes. We parse source code from tree-sitter and unmarshal the data we get to build these ASTs generically. This file exports the top-level function `parseByteString`, which takes source code and a language as arguments, and produces an AST. + +Here is an example that describes the relationship between a Python identifier represented in the tree-sitter generated JSON file, and a datatype generated by Template Haskell based on the provided JSON: + +| Type | JSON | TH-generated code | +|----------|--------------|------------| +|Named leaf|{
"type": "identifier",
"named": true
}|data TreeSitter.Python.AST.Identifier a
= TreeSitter.Python.AST.Identifier {TreeSitter.Python.AST.ann :: a,
TreeSitter.Python.AST.bytes :: text-1.2.3.1:Data.Text.Internal.Text} -- Defined at TreeSitter/Python/AST.hs:10:1
instance Show a => Show (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1
instance Ord a => Ord (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1
instance Eq a => Eq (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1
instance Traversable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1
instance Functor TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1
instance Foldable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1
instance Unmarshal TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1
instance SymbolMatching TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1| + +The remaining document provides more details on generating ASTs, inspecting datatypes, tests, and information on decisions pertaining to relevant APIs. +___ + +### Table of Contents +- [CodeGen Documentation](#codegen-documentation) + - [Prerequisites](#prerequisites) + - [CodeGen Pipeline](#codegen-pipeline) + - [Table of Contents](#table-of-contents) + - [Generating ASTs](#generating-asts) + - [Inspecting auto-generated datatypes](#inspecting-auto-generated-datatypes) + - [Tests](#tests) + - [Additional notes](#additional-notes) +___ + +### Generating ASTs + +To parse source code and produce ASTs locally: + +1. Load the REPL for a given language: + +``` +cabal new-repl lib:tree-sitter-python +``` + +2. Set language extensions, `OverloadedStrings` and `TypeApplications`, and import relevant modules, `AST.Unmarshal`, `Source.Range` and `Source.Span`: + +``` +:seti -XOverloadedStrings +:seti -XTypeApplications + +import Source.Span +import Source.Range +import AST.Unmarshal +``` + +3. You can now call `parseByteString`, passing in the desired language you wish to parse (in this case Python exemplified by `tree_sitter_python`), and the source code (in this case an integer `1`). Since the function is constrained by `(Unmarshal t, UnmarshalAnn a)`, you can use type applications to provide a top-level node `t`, an entry point into the tree, in addition to a polymorphic annotation `a` used to represent range and span: + +``` +parseByteString @TreeSitter.Python.AST.Module @(Source.Span.Span, Source.Range.Range) tree_sitter_python "1" +``` + +This generates the following AST: + +``` +Right + ( Module + { ann = + ( Range + { start = 0 + , end = 1 + } + , Span + { start = Pos + { line = 0 + , column = 0 + } + , end = Pos + { line = 0 + , column = 1 + } + } + ) + , extraChildren = + [ R1 + ( SimpleStatement + ( L1 + ( R1 + ( R1 + ( L1 + ( ExpressionStatement + { ann = + ( Range + { start = 0 + , end = 1 + } + , Span + { start = Pos + { line = 0 + , column = 0 + } + , end = Pos + { line = 0 + , column = 1 + } + } + ) + , extraChildren = L1 + ( L1 + ( Expression + ( L1 + ( L1 + ( L1 + ( PrimaryExpression + ( R1 + ( L1 + ( L1 + ( L1 + ( Integer + { ann = + ( Range + { start = 0 + , end = 1 + } + , Span + { start = Pos + { line = 0 + , column = 0 + } + , end = Pos + { line = 0 + , column = 1 + } + } + ) + , text = "1" + } + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) :| [] + } + ) + ) + ) + ) + ) + ) + ] + } + ) +``` + +### Inspecting auto-generated datatypes + +Datatypes are derived from a language and its `node-types.json` file using the GenerateSyntax API. Definition can be viewed in the REPL just as they would for any other datatype, using `:i`: + +``` +:i TreeSitter.Python.AST.Module +``` + +This shows us the auto-generated `Module` datatype: + +```Haskell +data TreeSitter.Python.AST.Module a + = TreeSitter.Python.AST.Module {TreeSitter.Python.AST.ann :: a, + TreeSitter.Python.AST.extraChildren :: [(GHC.Generics.:+:) + TreeSitter.Python.AST.CompoundStatement + TreeSitter.Python.AST.SimpleStatement + a]} + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance Show a => Show (TreeSitter.Python.AST.Module a) + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance Ord a => Ord (TreeSitter.Python.AST.Module a) + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance Eq a => Eq (TreeSitter.Python.AST.Module a) + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance Traversable TreeSitter.Python.AST.Module + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance Functor TreeSitter.Python.AST.Module + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance Foldable TreeSitter.Python.AST.Module + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance Unmarshal TreeSitter.Python.AST.Module + -- Defined at TreeSitter/Python/AST.hs:10:1 +instance SymbolMatching TreeSitter.Python.AST.Module + -- Defined at TreeSitter/Python/AST.hs:10:1 +``` + +### Tests + +As of right now, Hedgehog tests are minimal and only in place for the Python library. + +To run tests: + +`cabal v2-test tree-sitter-python` + +### Additional notes + +- [GenerateSyntax](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/GenerateSyntax.hs) provides a way to pre-define certain datatypes for which Template Haskell is not used. Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. While this gives us flexibility, we encourage that this is used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into `Integer`s), and may require defining `TS.UnmarshalAnn` or `TS.SymbolMatching` instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual `Foldable`, `Functor`, etc. instances provided for generated datatypes. +- Annotations are captured by a polymorphic parameter `a` +- [Unmarshal](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/Unmarshal.hs) defines both generic and non-generic classes. This is because generic behaviors are different than what we get non-generically, and in the case of ` Maybe`, `[]`—we actually preference doing things non-generically. Since `[]` is a sum, the generic behavior for `:+:` would be invoked and expect that we’d have repetitions represented in the parse tree as right-nested singly-linked lists (ex., `(a (b (c (d…))))`) rather than as just consecutive sibling nodes (ex., `(a b c ...d)`, which is what our trees have). We want to match the latter. diff --git a/semantic-codegen/Setup.hs b/semantic-codegen/Setup.hs new file mode 100644 index 000000000..9a994af67 --- /dev/null +++ b/semantic-codegen/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/semantic-codegen/semantic-codegen.cabal b/semantic-codegen/semantic-codegen.cabal new file mode 100644 index 000000000..5285cc4e4 --- /dev/null +++ b/semantic-codegen/semantic-codegen.cabal @@ -0,0 +1,82 @@ +cabal-version: 2.4 +-- Initial package description 'semantic-codegen.cabal' generated by 'cabal +-- init'. For further documentation, see +-- http://haskell.org/cabal/users-guide/ + +name: semantic-codegen +version: 0.1.0.0 +-- synopsis: +-- description: +-- bug-reports: +license: MIT +license-file: LICENSE +author: The Semantic Authors +maintainer: opensource+semantic@github.com +copyright: (c) 2019 GitHub, Inc. +category: Language +extra-source-files: CHANGELOG.md + +tested-with: GHC == 8.6.5 + +common haskell + default-language: Haskell2010 + ghc-options: + -Weverything + -Wno-missing-local-signatures + -Wno-missing-import-lists + -Wno-implicit-prelude + -Wno-safe + -Wno-unsafe + -Wno-name-shadowing + -Wno-monomorphism-restriction + -Wno-missed-specialisations + -Wno-all-missed-specialisations + -Wno-star-is-type + if (impl(ghc >= 8.8)) + ghc-options: -Wno-missing-deriving-strategies + +library + import: haskell + exposed-modules: AST.Deserialize + AST.GenerateSyntax + AST.Grammar.TH + AST.Token + AST.Unmarshal + +-- other-modules: + -- other-extensions: + build-depends: base >= 4.13 + , aeson ^>= 1.4.2.0 + , bytestring ^>= 0.10.8.2 + , tree-sitter ^>= 0.8 + , fused-effects ^>= 1.0 + , semantic-source ^>= 0.0.2 + , template-haskell ^>= 2.15 + , text ^>= 1.2.3.1 + , unordered-containers ^>= 0.2.10 + , containers >= 0.6.0.1 + , text ^>= 1.2.3.1 + , filepath ^>= 1.4.1 + , directory ^>= 1.3.3.2 + + hs-source-dirs: src + default-language: Haskell2010 + +executable semantic-codegen + import: haskell + main-is: Main.hs + -- other-modules: + -- other-extensions: + build-depends: base + , tree-sitter + , semantic-source + , bytestring + , aeson + , bytestring + , text + , unordered-containers + , containers + , filepath + default-language: Haskell2010 + -- hs-source-dirs: + default-language: Haskell2010 diff --git a/semantic-codegen/src/AST/Deserialize.hs b/semantic-codegen/src/AST/Deserialize.hs new file mode 100644 index 000000000..f808f7d9b --- /dev/null +++ b/semantic-codegen/src/AST/Deserialize.hs @@ -0,0 +1,133 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DeriveLift #-} + +-- Turn off partial field warnings for Datatype. +{-# OPTIONS_GHC -Wno-partial-fields #-} +module AST.Deserialize +( Datatype (..) +, Field (..) +, Children(..) +, Required (..) +, Type (..) +, DatatypeName (..) +, Named (..) +, Multiple (..) +) where + +import Data.Aeson as Aeson +import Data.Aeson.Types +import Data.Char +import GHC.Generics hiding (Constructor, Datatype) +import Language.Haskell.TH.Syntax (Lift) +import Data.Text (Text, unpack) +import Data.List.NonEmpty (NonEmpty (..)) +import qualified Data.HashMap.Strict as HM +import Data.Maybe (fromMaybe) + +-- Types to deserialize into: +data Datatype + = SumType + { datatypeName :: DatatypeName + , datatypeNameStatus :: Named + , datatypeSubtypes :: NonEmpty Type + } + | ProductType + { datatypeName :: DatatypeName + , datatypeNameStatus :: Named + , datatypeChildren :: Maybe Children + , datatypeFields :: [(String, Field)] + } + | LeafType + { datatypeName :: DatatypeName + , datatypeNameStatus :: Named + } + deriving (Eq, Ord, Show, Generic, ToJSON) + +instance FromJSON Datatype where + parseJSON = withObject "Datatype" $ \v -> do + type' <- v .: "type" + named <- v .: "named" + subtypes <- v .:? "subtypes" + case subtypes of + Nothing -> do + fields <- fmap (fromMaybe HM.empty) (v .:? "fields") + children <- v .:? "children" + if null fields && null children then + pure (LeafType type' named) + else + ProductType type' named children <$> parseKVPairs (HM.toList fields) + Just subtypes -> pure (SumType type' named subtypes) + + +-- | Transforms list of key-value pairs to a Parser +parseKVPairs :: [(Text, Value)] -> Parser [(String, Field)] +parseKVPairs = traverse go + where go :: (Text, Value) -> Parser (String, Field) + go (t,v) = do + v' <- parseJSON v + pure (unpack t, v') + +data Field = MkField + { fieldRequired :: Required + , fieldTypes :: NonEmpty Type + , fieldMultiple :: Multiple + } + deriving (Eq, Ord, Show, Generic, ToJSON) + +instance FromJSON Field where + parseJSON = genericParseJSON customOptions + + +newtype Children = MkChildren Field + deriving (Eq, Ord, Show, Generic) + deriving newtype (ToJSON, FromJSON) + + +data Required = Optional | Required + deriving (Eq, Ord, Show, Generic, ToJSON) + +instance FromJSON Required where + parseJSON = withBool "Required" (\p -> pure (if p then Required else Optional)) + +data Type = MkType + { fieldType :: DatatypeName + , isNamed :: Named + } + deriving (Eq, Ord, Show, Generic, ToJSON) + +instance FromJSON Type where + parseJSON = genericParseJSON customOptions + +newtype DatatypeName = DatatypeName { getDatatypeName :: String } + deriving (Eq, Ord, Show, Generic) + deriving newtype (FromJSON, ToJSON) + +data Named = Anonymous | Named + deriving (Eq, Ord, Show, Generic, ToJSON, Lift) + +instance FromJSON Named where + parseJSON = withBool "Named" (\p -> pure (if p then Named else Anonymous)) + +data Multiple = Single | Multiple + deriving (Eq, Ord, Show, Generic, ToJSON) + +instance FromJSON Multiple where + parseJSON = withBool "Multiple" (\p -> pure (if p then Multiple else Single)) + +customOptions :: Aeson.Options +customOptions = Aeson.defaultOptions + { + fieldLabelModifier = initLower . dropPrefix + , constructorTagModifier = initLower + } + +dropPrefix :: String -> String +dropPrefix = Prelude.dropWhile isLower + +initLower :: String -> String +initLower (c:cs) = toLower c : cs +initLower "" = "" \ No newline at end of file diff --git a/semantic-codegen/src/AST/GenerateSyntax.hs b/semantic-codegen/src/AST/GenerateSyntax.hs new file mode 100644 index 000000000..11dd92c08 --- /dev/null +++ b/semantic-codegen/src/AST/GenerateSyntax.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} +module AST.GenerateSyntax +( syntaxDatatype +, astDeclarationsForLanguage +) where + +import Data.Aeson hiding (String) +import Data.Foldable +import Data.List +import Data.List.NonEmpty (NonEmpty (..)) +import Data.Text (Text) +import Foreign.C.String +import Foreign.Ptr +import GHC.Generics hiding (Constructor, Datatype) +import GHC.Records +import Language.Haskell.TH as TH +import Language.Haskell.TH.Syntax as TH +import System.Directory +import System.FilePath.Posix +import AST.Deserialize (Children (..), Datatype (..), DatatypeName (..), Field (..), Multiple (..), Named (..), Required (..), Type (..)) +import qualified TreeSitter.Language as TS +import TreeSitter.Node +import TreeSitter.Symbol (TSSymbol, toHaskellCamelCaseIdentifier, toHaskellPascalCaseIdentifier) +import AST.Token +import qualified AST.Unmarshal as TS + +-- | Derive Haskell datatypes from a language and its @node-types.json@ file. +-- +-- Datatypes will be generated according to the specification in the @node-types.json@ file, with anonymous leaf types defined as synonyms for the 'Token' datatype. +-- +-- Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. Note that this should be used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into 'Integer's), and may require defining 'TS.UnmarshalAnn' or 'TS.SymbolMatching' instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual 'Foldable', 'Functor', etc. instances provided for generated datatypes. +astDeclarationsForLanguage :: Ptr TS.Language -> FilePath -> Q [Dec] +astDeclarationsForLanguage language filePath = do + _ <- TS.addDependentFileRelative filePath + currentFilename <- loc_filename <$> location + pwd <- runIO getCurrentDirectory + let invocationRelativePath = takeDirectory (pwd currentFilename) filePath + input <- runIO (eitherDecodeFileStrict' invocationRelativePath) >>= either fail pure + allSymbols <- runIO (getAllSymbols language) + debugSymbolNames <- [d| + debugSymbolNames :: [String] + debugSymbolNames = $(listE (map (litE . stringL . debugPrefix) allSymbols)) + |] + (debugSymbolNames <>) . concat @[] <$> traverse (syntaxDatatype language allSymbols) input + +-- Build a list of all symbols +getAllSymbols :: Ptr TS.Language -> IO [(String, Named)] +getAllSymbols language = do + count <- TS.ts_language_symbol_count language + mapM getSymbol [(0 :: TSSymbol) .. fromIntegral (pred count)] + where + getSymbol i = do + cname <- TS.ts_language_symbol_name language i + n <- peekCString cname + t <- TS.ts_language_symbol_type language i + let named = if t == 0 then Named else Anonymous + pure (n, named) + +-- Auto-generate Haskell datatypes for sums, products and leaf types +syntaxDatatype :: Ptr TS.Language -> [(String, Named)] -> Datatype -> Q [Dec] +syntaxDatatype language allSymbols datatype = skipDefined $ do + typeParameterName <- newName "a" + case datatype of + SumType (DatatypeName _) _ subtypes -> do + types' <- fieldTypesToNestedSum subtypes + let fieldName = mkName ("get" <> nameStr) + con <- recC name [TH.varBangType fieldName (TH.bangType strictness (pure types' `appT` varT typeParameterName))] + hasFieldInstance <- makeHasFieldInstance (conT name) (varT typeParameterName) (varE fieldName) + pure + ( NewtypeD [] name [PlainTV typeParameterName] Nothing con [deriveGN, deriveStockClause, deriveAnyClassClause] + : hasFieldInstance) + ProductType (DatatypeName datatypeName) named children fields -> do + con <- ctorForProductType datatypeName typeParameterName children fields + result <- symbolMatchingInstance allSymbols name named datatypeName + pure $ generatedDatatype name [con] typeParameterName:result + -- Anonymous leaf types are defined as synonyms for the `Token` datatype + LeafType (DatatypeName datatypeName) Anonymous -> do + tsSymbol <- runIO $ withCStringLen datatypeName (\(s, len) -> TS.ts_language_symbol_for_name language s len False) + pure [ TySynD name [] (ConT ''Token `AppT` LitT (StrTyLit datatypeName) `AppT` LitT (NumTyLit (fromIntegral tsSymbol))) ] + LeafType (DatatypeName datatypeName) Named -> do + con <- ctorForLeafType (DatatypeName datatypeName) typeParameterName + result <- symbolMatchingInstance allSymbols name Named datatypeName + pure $ generatedDatatype name [con] typeParameterName:result + where + -- Skip generating datatypes that have already been defined (overridden) in the module where the splice is running. + skipDefined m = do + isLocal <- lookupTypeName nameStr >>= maybe (pure False) isLocalName + if isLocal then pure [] else m + name = mkName nameStr + nameStr = toNameString (datatypeNameStatus datatype) (getDatatypeName (AST.Deserialize.datatypeName datatype)) + deriveStockClause = DerivClause (Just StockStrategy) [ ConT ''Eq, ConT ''Ord, ConT ''Show, ConT ''Generic, ConT ''Foldable, ConT ''Functor, ConT ''Traversable, ConT ''Generic1] + deriveAnyClassClause = DerivClause (Just AnyclassStrategy) [ConT ''TS.Unmarshal] + deriveGN = DerivClause (Just NewtypeStrategy) [ConT ''TS.SymbolMatching] + generatedDatatype name cons typeParameterName = DataD [] name [PlainTV typeParameterName] Nothing cons [deriveStockClause, deriveAnyClassClause] + + +makeHasFieldInstance :: TypeQ -> TypeQ -> ExpQ -> Q [Dec] +makeHasFieldInstance ty param elim = + [d|instance HasField "ann" $(ty `appT` param) $param where + getField = TS.gann . $elim |] + +-- | Create TH-generated SymbolMatching instances for sums, products, leaves +symbolMatchingInstance :: [(String, Named)] -> Name -> Named -> String -> Q [Dec] +symbolMatchingInstance allSymbols name named str = do + let tsSymbols = elemIndices (str, named) allSymbols + names = intercalate ", " $ fmap (debugPrefix . (!!) allSymbols) tsSymbols + [d|instance TS.SymbolMatching $(conT name) where + matchedSymbols _ = tsSymbols + showFailure _ node = "expected " <> $(litE (stringL names)) + <> " but got " <> if nodeSymbol node == 65535 then "ERROR" else genericIndex debugSymbolNames (nodeSymbol node) + <> " [" <> show r1 <> ", " <> show c1 <> "] -" + <> " [" <> show r2 <> ", " <> show c2 <> "]" + where TSPoint r1 c1 = nodeStartPoint node + TSPoint r2 c2 = nodeEndPoint node|] + +-- | Prefix symbol names for debugging to disambiguate between Named and Anonymous nodes. +debugPrefix :: (String, Named) -> String +debugPrefix (name, Named) = name +debugPrefix (name, Anonymous) = "_" <> name + +-- | Build Q Constructor for product types (nodes with fields) +ctorForProductType :: String -> Name -> Maybe Children -> [(String, Field)] -> Q Con +ctorForProductType constructorName typeParameterName children fields = ctorForTypes constructorName lists where + lists = annotation : fieldList ++ childList + annotation = ("ann", varT typeParameterName) + fieldList = map (fmap toType) fields + childList = toList $ fmap toTypeChild children + toType (MkField required fieldTypes mult) = + let ftypes = fieldTypesToNestedSum fieldTypes `appT` varT typeParameterName + in case (required, mult) of + (Required, Multiple) -> appT (conT ''NonEmpty) ftypes + (Required, Single) -> ftypes + (Optional, Multiple) -> appT (conT ''[]) ftypes + (Optional, Single) -> appT (conT ''Maybe) ftypes + toTypeChild (MkChildren field) = ("extra_children", toType field) + +-- | Build Q Constructor for leaf types (nodes with no fields or subtypes) +ctorForLeafType :: DatatypeName -> Name -> Q Con +ctorForLeafType (DatatypeName name) typeParameterName = ctorForTypes name + [ ("ann", varT typeParameterName) -- ann :: a + , ("text", conT ''Text) -- text :: Text + ] + +-- | Build Q Constructor for records +ctorForTypes :: String -> [(String, Q TH.Type)] -> Q Con +ctorForTypes constructorName types = recC (toName Named constructorName) recordFields where + recordFields = map (uncurry toVarBangType) types + toVarBangType str type' = TH.varBangType (mkName . toHaskellCamelCaseIdentifier $ str) (TH.bangType strictness type') + + +-- | Convert field types to Q types +fieldTypesToNestedSum :: NonEmpty AST.Deserialize.Type -> Q TH.Type +fieldTypesToNestedSum xs = go (toList xs) + where + combine lhs rhs = (conT ''(:+:) `appT` lhs) `appT` rhs -- (((((a :+: b) :+: c) :+: d)) :+: e) ((a :+: b) :+: (c :+: d)) + convertToQType (MkType (DatatypeName n) named) = conT (toName named n) + go [x] = convertToQType x + go xs = let (l,r) = splitAt (length xs `div` 2) xs in combine (go l) (go r) + + +-- | Create bang required to build records +strictness :: BangQ +strictness = TH.bang noSourceUnpackedness noSourceStrictness + +-- | Prepend "Anonymous" to named node when false, otherwise use regular toName +toName :: Named -> String -> Name +toName named str = mkName (toNameString named str) + +toNameString :: Named -> String -> String +toNameString named str = prefix named <> toHaskellPascalCaseIdentifier str + where + prefix Anonymous = "Anonymous" + prefix Named = "" + +-- | Get the 'Module', if any, for a given 'Name'. +moduleForName :: Name -> Maybe Module +moduleForName n = Module . PkgName <$> namePackage n <*> (ModName <$> nameModule n) + +-- | Test whether the name is defined in the module where the splice is executed. +isLocalName :: Name -> Q Bool +isLocalName n = (moduleForName n ==) . Just <$> thisModule diff --git a/semantic-codegen/src/AST/Grammar/Examples.hs b/semantic-codegen/src/AST/Grammar/Examples.hs new file mode 100644 index 000000000..71e2c352a --- /dev/null +++ b/semantic-codegen/src/AST/Grammar/Examples.hs @@ -0,0 +1,83 @@ +{-# LANGUAGE DataKinds, DeriveAnyClass, DeriveGeneric, DuplicateRecordFields, TypeOperators #-} +module AST.Grammar.Examples () where + +import Control.Effect.Reader +import Control.Monad.Fail +import qualified Data.ByteString as B +import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text +import GHC.Generics ((:+:), Generic1) +import Numeric (readDec) +import Prelude hiding (fail) +import Source.Range +import AST.Token +import AST.Unmarshal + +-- | An example of a sum-of-products datatype. +newtype Expr a = Expr ((If :+: Block :+: Var :+: Lit :+: Bin) a) + deriving (Generic1, Unmarshal) + +instance SymbolMatching Expr where + matchedSymbols _ = [] + showFailure _ _ = "" + +-- | Product with multiple fields. +data If a = If { ann :: a, condition :: Expr a, consequence :: Expr a, alternative :: Maybe (Expr a) } + deriving (Generic1, Unmarshal) + +instance SymbolMatching If where + matchedSymbols _ = [] + showFailure _ _ = "" + +-- | Single-field product. +data Block a = Block { ann :: a, body :: [Expr a] } + deriving (Generic1, Unmarshal) + +instance SymbolMatching Block where + matchedSymbols _ = [] + showFailure _ _ = "" + +-- | Leaf node. +data Var a = Var { ann :: a, text :: Text.Text } + deriving (Generic1, Unmarshal) + +instance SymbolMatching Var where + matchedSymbols _ = [] + showFailure _ _ = "" + +-- | Custom leaf node. +data Lit a = Lit { ann :: a, lit :: IntegerLit } + deriving (Generic1, Unmarshal) + +instance SymbolMatching Lit where + matchedSymbols _ = [] + showFailure _ _ = "" + +-- | Product with anonymous sum field. +data Bin a = Bin { ann :: a, lhs :: Expr a, op :: (AnonPlus :+: AnonTimes) a, rhs :: Expr a } + deriving (Generic1, Unmarshal) + +instance SymbolMatching Bin where + matchedSymbols _ = [] + showFailure _ _ = "" + +-- | Anonymous leaf node. +type AnonPlus = Token "+" 0 + +-- | Anonymous leaf node. +type AnonTimes = Token "*" 1 + + +newtype IntegerLit = IntegerLit Integer + +instance UnmarshalAnn IntegerLit where + unmarshalAnn node = do + Range start end <- unmarshalAnn node + bytestring <- asks source + let drop = B.drop start + take = B.take (end - start) + slice = take . drop + str = Text.unpack (Text.decodeUtf8 (slice bytestring)) + case readDec str of + (i, _):_ -> pure (IntegerLit i) + _ -> fail ("could not parse '" <> str <> "'") diff --git a/semantic-codegen/src/AST/Grammar/TH.hs b/semantic-codegen/src/AST/Grammar/TH.hs new file mode 100644 index 000000000..769ccd915 --- /dev/null +++ b/semantic-codegen/src/AST/Grammar/TH.hs @@ -0,0 +1,33 @@ +{-# LANGUAGE TemplateHaskell #-} +module AST.Grammar.TH +( mkStaticallyKnownRuleGrammarData +) where + +import Data.Ix (Ix) +import Data.List (mapAccumL) +import qualified Data.Set as Set +import Foreign.Ptr +import Language.Haskell.TH +import Language.Haskell.TH.Syntax +import TreeSitter.Symbol +import TreeSitter.Language (Language, languageSymbols) + +-- | TemplateHaskell construction of a datatype for the referenced Language. +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData :: Name -> Ptr Language -> Q [Dec] +mkStaticallyKnownRuleGrammarData name language = do + symbols <- renameDups . map ((,) . fst <*> uncurry symbolToName) . (++ [(Regular, "ParseError")]) <$> runIO (languageSymbols language) + Module _ modName <- thisModule + let mkMatch symbolType str = match (conP (Name (OccName str) (NameQ modName)) []) (normalB [e|symbolType|]) [] + datatype <- dataD (pure []) name [] Nothing (flip normalC [] . mkName . snd <$> symbols) + [ derivClause Nothing (map conT [ ''Bounded, ''Enum, ''Eq, ''Ix, ''Ord, ''Show ]) ] + symbolInstance <- [d| + instance Symbol $(conT name) where + symbolType = $(lamCaseE (uncurry mkMatch <$> symbols)) |] + pure (datatype : symbolInstance) + +renameDups :: [(a, String)] -> [(a, String)] +renameDups = snd . mapAccumL go mempty + where go done (ty, name) = let name' = rename name in (Set.insert name' done, (ty, name')) + where rename name | name `Set.member` done = rename (name ++ "'") + | otherwise = name \ No newline at end of file diff --git a/semantic-codegen/src/AST/Token.hs b/semantic-codegen/src/AST/Token.hs new file mode 100644 index 000000000..7d3aa3644 --- /dev/null +++ b/semantic-codegen/src/AST/Token.hs @@ -0,0 +1,17 @@ +{-# LANGUAGE DataKinds, DeriveGeneric, DeriveTraversable, KindSignatures #-} +module AST.Token +( Token(..) +) where + +import GHC.Generics (Generic, Generic1) +import GHC.TypeLits (Symbol, Nat) + +-- | An AST node representing a token, indexed by its name and numeric value. +-- +-- For convenience, token types are typically used via type synonyms, e.g.: +-- +-- @ +-- type AnonymousPlus = Token "+" 123 +-- @ +newtype Token (symName :: Symbol) (symVal :: Nat) a = Token { ann :: a } + deriving (Eq, Foldable, Functor, Generic, Generic1, Ord, Show, Traversable) \ No newline at end of file diff --git a/semantic-codegen/src/AST/Unmarshal.hs b/semantic-codegen/src/AST/Unmarshal.hs new file mode 100644 index 000000000..425bd8f6f --- /dev/null +++ b/semantic-codegen/src/AST/Unmarshal.hs @@ -0,0 +1,405 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DefaultSignatures #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE UndecidableInstances #-} + +module AST.Unmarshal +( parseByteString +, UnmarshalState(..) +, UnmarshalError(..) +, FieldName(..) +, Unmarshal(..) +, UnmarshalAnn(..) +, UnmarshalField(..) +, SymbolMatching(..) +, Match(..) +, hoist +, lookupSymbol +, unmarshalNode +, GHasAnn(..) +) where + +import Control.Algebra (send) +import Control.Carrier.Reader hiding (asks) +import Control.Exception +import Control.Monad.IO.Class +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import Data.Coerce +import Data.Foldable (toList) +import qualified Data.IntMap as IntMap +import Data.List.NonEmpty (NonEmpty (..)) +import Data.Proxy +import qualified Data.Text as Text +import Data.Text.Encoding +import Data.Text.Encoding.Error (lenientDecode) +import Foreign.C.String +import Foreign.Marshal.Array +import Foreign.Marshal.Utils +import Foreign.Ptr +import Foreign.Storable +import GHC.Generics +import GHC.Records +import GHC.TypeLits +import Source.Loc +import Source.Span +import TreeSitter.Cursor as TS +import TreeSitter.Language as TS +import TreeSitter.Node as TS +import TreeSitter.Parser as TS +import AST.Token as TS +import TreeSitter.Tree as TS + +asks :: Has (Reader r) sig m => (r -> r') -> m r' +asks f = send (Ask (pure . f)) +{-# INLINE asks #-} + +-- Parse source code and produce AST +parseByteString :: (Unmarshal t, UnmarshalAnn a) => Ptr TS.Language -> ByteString -> IO (Either String (t a)) +parseByteString language bytestring = withParser language $ \ parser -> withParseTree parser bytestring $ \ treePtr -> + if treePtr == nullPtr then + pure (Left "error: didn't get a root node") + else + withRootNode treePtr $ \ rootPtr -> + withCursor (castPtr rootPtr) $ \ cursor -> + (Right <$> runReader (UnmarshalState bytestring cursor) (liftIO (peek rootPtr) >>= unmarshalNode)) + `catch` (pure . Left . getUnmarshalError) + +newtype UnmarshalError = UnmarshalError { getUnmarshalError :: String } + deriving (Show) + +instance Exception UnmarshalError + +data UnmarshalState = UnmarshalState + { source :: {-# UNPACK #-} !ByteString + , cursor :: {-# UNPACK #-} !(Ptr Cursor) + } + +type MatchM = ReaderC UnmarshalState IO + +newtype Match t = Match + { runMatch :: forall a . UnmarshalAnn a => Node -> MatchM (t a) + } + +-- | A church-encoded binary tree with constant-time 'singleton', 'mempty', '<>', and 'fmap', and linear-time 'foldMap'. +newtype B a = B (forall r . (r -> r -> r) -> (a -> r) -> r -> r) + +instance Functor B where + fmap f (B run) = B (\ fork leaf -> run fork (leaf . f)) + {-# INLINE fmap #-} + a <$ B run = B (\ fork leaf -> run fork (leaf . const a)) + {-# INLINE (<$) #-} + +instance Semigroup (B a) where + B l <> B r = B (\ fork leaf nil -> fork (l fork leaf nil) (r fork leaf nil)) + {-# INLINE (<>) #-} + +instance Monoid (B a) where + mempty = B (\ _ _ nil -> nil) + {-# INLINE mempty #-} + +instance Foldable B where + foldMap f (B run) = run (<>) f mempty + {-# INLINE foldMap #-} + +singleton :: a -> B a +singleton a = B (\ _ leaf _ -> leaf a) +{-# INLINE singleton #-} + +hoist :: (forall x . t x -> t' x) -> Match t -> Match t' +hoist f (Match run) = Match (fmap f . run) +{-# INLINE hoist #-} + +lookupSymbol :: TSSymbol -> IntMap.IntMap a -> Maybe a +lookupSymbol sym map = IntMap.lookup (fromIntegral sym) map +{-# INLINE lookupSymbol #-} + +-- | Unmarshal a node +unmarshalNode :: forall t a . + ( UnmarshalAnn a + , Unmarshal t + ) + => Node + -> MatchM (t a) +unmarshalNode node = case lookupSymbol (nodeSymbol node) matchers' of + Just t -> runMatch t node + Nothing -> liftIO . throwIO . UnmarshalError $ showFailure (Proxy @t) node +{-# INLINE unmarshalNode #-} + +-- | Unmarshalling is the process of iterating over tree-sitter’s parse trees using its tree cursor API and producing Haskell ASTs for the relevant nodes. +-- +-- Datatypes which can be constructed from tree-sitter parse trees may use the default definition of 'matchers' providing that they have a suitable 'Generic1' instance. +class SymbolMatching t => Unmarshal t where + matchers' :: IntMap.IntMap (Match t) + matchers' = IntMap.fromList (toList matchers) + + matchers :: B (Int, Match t) + default matchers :: (Generic1 t, GUnmarshal (Rep1 t)) => B (Int, Match t) + matchers = foldMap (singleton . (, match)) (matchedSymbols (Proxy @t)) + where match = Match $ \ node -> do + cursor <- asks cursor + goto cursor (nodeTSNode node) + fmap to1 (gunmarshalNode node) + +instance (Unmarshal f, Unmarshal g) => Unmarshal (f :+: g) where + matchers = fmap (fmap (hoist L1)) matchers <> fmap (fmap (hoist R1)) matchers + +instance Unmarshal t => Unmarshal (Rec1 t) where + matchers = coerce (matchers @t) + +instance (KnownNat n, KnownSymbol sym) => Unmarshal (Token sym n) where + matchers = singleton (fromIntegral (natVal (Proxy @n)), Match (fmap Token . unmarshalAnn)) + + +-- | Unmarshal an annotation field. +-- +-- Leaf nodes have 'Text.Text' fields, and leaves, anonymous leaves, and products all have parametric annotation fields. All of these fields are unmarshalled using the metadata of the node, e.g. its start/end bytes, without reference to any child nodes it may contain. +class UnmarshalAnn a where + unmarshalAnn + :: Node + -> MatchM a + +instance UnmarshalAnn () where + unmarshalAnn _ = pure () + +instance UnmarshalAnn Text.Text where + unmarshalAnn node = do + range <- unmarshalAnn node + asks (decodeUtf8With lenientDecode . slice range . source) + +-- | Instance for pairs of annotations +instance (UnmarshalAnn a, UnmarshalAnn b) => UnmarshalAnn (a,b) where + unmarshalAnn node = (,) + <$> unmarshalAnn @a node + <*> unmarshalAnn @b node + +instance UnmarshalAnn Loc where + unmarshalAnn node = Loc + <$> unmarshalAnn @Range node + <*> unmarshalAnn @Span node + +instance UnmarshalAnn Range where + unmarshalAnn node = do + let start = fromIntegral (nodeStartByte node) + end = fromIntegral (nodeEndByte node) + pure (Range start end) + +instance UnmarshalAnn Span where + unmarshalAnn node = do + let spanStart = pointToPos (nodeStartPoint node) + spanEnd = pointToPos (nodeEndPoint node) + pure (Span spanStart spanEnd) + +pointToPos :: TSPoint -> Pos +pointToPos (TSPoint line column) = Pos (fromIntegral line) (fromIntegral column) + + +-- | Optional/repeated fields occurring in product datatypes are wrapped in type constructors, e.g. 'Maybe', '[]', or 'NonEmpty', and thus can unmarshal zero or more nodes for the same field name. +class UnmarshalField t where + unmarshalField + :: ( Unmarshal f + , UnmarshalAnn a + ) + => String -- ^ datatype name + -> String -- ^ field name + -> [Node] -- ^ nodes + -> MatchM (t (f a)) + +instance UnmarshalField Maybe where + unmarshalField _ _ [] = pure Nothing + unmarshalField _ _ [x] = Just <$> unmarshalNode x + unmarshalField d f _ = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected zero or one nodes in field '" <> f <> "' but got multiple" + +instance UnmarshalField [] where + unmarshalField d f (x:xs) = do + head' <- unmarshalNode x + tail' <- unmarshalField d f xs + pure $ head' : tail' + unmarshalField _ _ [] = pure [] + +instance UnmarshalField NonEmpty where + unmarshalField d f (x:xs) = do + head' <- unmarshalNode x + tail' <- unmarshalField d f xs + pure $ head' :| tail' + unmarshalField d f [] = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected one or more nodes in field '" <> f <> "' but got zero" + +class SymbolMatching (a :: * -> *) where + matchedSymbols :: Proxy a -> [Int] + + -- | Provide error message describing the node symbol vs. the symbols this can match + showFailure :: Proxy a -> Node -> String + +instance SymbolMatching f => SymbolMatching (M1 i c f) where + matchedSymbols _ = matchedSymbols (Proxy @f) + showFailure _ = showFailure (Proxy @f) + +instance SymbolMatching f => SymbolMatching (Rec1 f) where + matchedSymbols _ = matchedSymbols (Proxy @f) + showFailure _ = showFailure (Proxy @f) + +instance (KnownNat n, KnownSymbol sym) => SymbolMatching (Token sym n) where + matchedSymbols _ = [fromIntegral (natVal (Proxy @n))] + showFailure _ _ = "expected " ++ symbolVal (Proxy @sym) + +instance (SymbolMatching f, SymbolMatching g) => SymbolMatching (f :+: g) where + matchedSymbols _ = matchedSymbols (Proxy @f) <> matchedSymbols (Proxy @g) + showFailure _ = sep <$> showFailure (Proxy @f) <*> showFailure (Proxy @g) + +sep :: String -> String -> String +sep a b = a ++ ". " ++ b + +-- | Move the cursor to point at the passed 'TSNode'. +goto :: Ptr Cursor -> TSNode -> MatchM () +goto cursor node = liftIO (with node (ts_tree_cursor_reset_p cursor)) + + +type Fields = [(FieldName, Node)] + +-- | Return the fields remaining in the current branch, represented as 'Map.Map' of 'FieldName's to their corresponding 'Node's. +getFields :: Ptr Cursor -> Node -> MatchM Fields +getFields cursor node + | maxCount == 0 = pure [] + | otherwise = do + nodes <- liftIO . allocaArray maxCount $ \ ptr -> do + actualCount <- ts_tree_cursor_copy_child_nodes cursor ptr + peekArray (fromIntegral actualCount) ptr + traverse (\ node -> (, node) <$> getFieldName node) nodes + where + maxCount = fromIntegral (nodeChildCount node) + getFieldName node + | nodeFieldName node == nullPtr = pure (FieldName "extraChildren") + | otherwise = FieldName . toHaskellCamelCaseIdentifier <$> liftIO (peekCString (nodeFieldName node)) + +lookupField :: FieldName -> Fields -> [Node] +lookupField k = map snd . filter ((== k) . fst) + + +-- | Return a 'ByteString' that contains a slice of the given 'ByteString'. +slice :: Range -> ByteString -> ByteString +slice (Range start end) = take . drop + where drop = B.drop start + take = B.take (end - start) + + +newtype FieldName = FieldName { getFieldName :: String } + deriving (Eq, Ord, Show) + +-- | Generic construction of ASTs from a 'Map.Map' of named fields. +-- +-- Product types (specifically, record types) are constructed by looking up the node for each corresponding field name in the map, moving the cursor to it, and then invoking 'unmarshalNode' to construct the value for that field. Leaf types are constructed as a special case of product types. +-- +-- Sum types are constructed by using the current node’s symbol to select the corresponding constructor deterministically. +class GUnmarshal f where + gunmarshalNode + :: UnmarshalAnn a + => Node + -> MatchM (f a) + +instance (Datatype d, GUnmarshalData f) => GUnmarshal (M1 D d f) where + gunmarshalNode = go (gunmarshalNode' (datatypeName @d undefined)) where + go :: (Node -> MatchM (f a)) -> Node -> MatchM (M1 i c f a) + go = coerce + +class GUnmarshalData f where + gunmarshalNode' + :: UnmarshalAnn a + => String + -> Node + -> MatchM (f a) + +instance GUnmarshalData f => GUnmarshalData (M1 i c f) where + gunmarshalNode' = go gunmarshalNode' where + go :: (String -> Node -> MatchM (f a)) -> String -> Node -> MatchM (M1 i c f a) + go = coerce + +-- For anonymous leaf nodes: +instance GUnmarshalData U1 where + gunmarshalNode' _ _ = pure U1 + +-- For unary products: +instance UnmarshalAnn k => GUnmarshalData (K1 c k) where + gunmarshalNode' _ = go unmarshalAnn where + go :: (Node -> MatchM k) -> Node -> MatchM (K1 c k a) + go = coerce + +-- For anonymous leaf nodes +instance GUnmarshalData Par1 where + gunmarshalNode' _ = go unmarshalAnn where + go :: (Node -> MatchM a) -> Node -> MatchM (Par1 a) + go = coerce + +instance Unmarshal t => GUnmarshalData (Rec1 t) where + gunmarshalNode' _ = go unmarshalNode where + go :: (Node -> MatchM (t a)) -> Node -> MatchM (Rec1 t a) + go = coerce + +-- For product datatypes: +instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalData (f :*: g) where + gunmarshalNode' datatypeName node = asks cursor >>= flip getFields node >>= gunmarshalProductNode @(f :*: g) datatypeName node + + +-- | Generically unmarshal products +class GUnmarshalProduct f where + gunmarshalProductNode + :: UnmarshalAnn a + => String + -> Node + -> Fields + -> MatchM (f a) + +-- Product structure +instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalProduct (f :*: g) where + gunmarshalProductNode datatypeName node fields = (:*:) + <$> gunmarshalProductNode @f datatypeName node fields + <*> gunmarshalProductNode @g datatypeName node fields + +-- Contents of product types (ie., the leaves of the product tree) +instance UnmarshalAnn k => GUnmarshalProduct (M1 S c (K1 i k)) where + gunmarshalProductNode _ node _ = go unmarshalAnn node where + go :: (Node -> MatchM k) -> Node -> MatchM (M1 S c (K1 i k) a) + go = coerce + +instance GUnmarshalProduct (M1 S c Par1) where + gunmarshalProductNode _ node _ = go unmarshalAnn node where + go :: (Node -> MatchM a) -> Node -> MatchM (M1 S c Par1 a) + go = coerce + +instance (UnmarshalField f, Unmarshal g, Selector c) => GUnmarshalProduct (M1 S c (f :.: g)) where + gunmarshalProductNode datatypeName _ = go (unmarshalField datatypeName fieldName . lookupField (FieldName fieldName)) where + go :: (Fields -> MatchM (f (g a))) -> Fields -> MatchM (M1 S c (f :.: g) a) + go = coerce + fieldName = selName @c undefined + +instance (Unmarshal t, Selector c) => GUnmarshalProduct (M1 S c (Rec1 t)) where + gunmarshalProductNode datatypeName _ fields = + case lookupField (FieldName fieldName) fields of + [] -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node '" <> fieldName <> "' but didn't get one" + [x] -> go unmarshalNode x where + go :: (Node -> MatchM (t a)) -> Node -> MatchM (M1 S c (Rec1 t) a) + go = coerce + _ -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node but got multiple" + where + fieldName = selName @c undefined + + +class GHasAnn a t where + gann :: t a -> a + +instance GHasAnn a f => GHasAnn a (M1 i c f) where + gann = gann . unM1 + +instance (GHasAnn a l, GHasAnn a r) => GHasAnn a (l :+: r) where + gann (L1 l) = gann l + gann (R1 r) = gann r + +instance {-# OVERLAPPABLE #-} HasField "ann" (t a) a => GHasAnn a t where + gann = getField @"ann" \ No newline at end of file diff --git a/semantic-go/semantic-go.cabal b/semantic-go/semantic-go.cabal index 8639a241a..a6371963e 100644 --- a/semantic-go/semantic-go.cabal +++ b/semantic-go/semantic-go.cabal @@ -24,9 +24,11 @@ common haskell , fused-effects ^>= 1.0 , fused-syntax , parsers ^>= 0.12.10 + , semantic-codegen , semantic-core ^>= 0.0 , semantic-source ^>= 0.0.2 , semantic-tags ^>= 0.0 + , template-haskell ^>= 2.15 , text ^>= 1.2.3 , tree-sitter ^>= 0.8 , tree-sitter-go ^>= 0.4.1 @@ -50,5 +52,7 @@ library import: haskell exposed-modules: Language.Go + Language.Go.AST + Language.Go.Grammar Language.Go.Tags hs-source-dirs: src diff --git a/semantic-go/src/Language/Go.hs b/semantic-go/src/Language/Go.hs index 38bf2e79f..4e7ef430a 100644 --- a/semantic-go/src/Language/Go.hs +++ b/semantic-go/src/Language/Go.hs @@ -6,11 +6,11 @@ module Language.Go import Data.Proxy +import qualified Language.Go.AST as Go import qualified Language.Go.Tags as GoTags import qualified Tags.Tagging.Precise as Tags import qualified TreeSitter.Go (tree_sitter_go) -import qualified TreeSitter.Go.AST as Go -import qualified TreeSitter.Unmarshal as TS +import qualified AST.Unmarshal as TS newtype Term a = Term { getTerm :: Go.SourceFile a } diff --git a/semantic-go/src/Language/Go/AST.hs b/semantic-go/src/Language/Go/AST.hs new file mode 100644 index 000000000..7b4499a9b --- /dev/null +++ b/semantic-go/src/Language/Go/AST.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} + +module Language.Go.AST +( module Language.Go.AST +) where + +import Prelude hiding (False, Float, Integer, Rational, String, True) +import AST.GenerateSyntax +import qualified Language.Go.Grammar as Grammar + +astDeclarationsForLanguage Grammar.tree_sitter_go "../../../vendor/tree-sitter-go/src/node-types.json" \ No newline at end of file diff --git a/semantic-go/src/Language/Go/Grammar.hs b/semantic-go/src/Language/Go/Grammar.hs new file mode 100644 index 000000000..719860bef --- /dev/null +++ b/semantic-go/src/Language/Go/Grammar.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.Go.Grammar +( tree_sitter_go +, Grammar(..) +) where + +import Language.Haskell.TH +import TreeSitter.Go (tree_sitter_go) +import AST.Grammar.TH +import TreeSitter.Language (addDependentFileRelative) + +-- Regenerate template haskell code when these files change: +addDependentFileRelative "../../../vendor/tree-sitter-go/src/parser.c" + +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_go diff --git a/semantic-go/src/Language/Go/Tags.hs b/semantic-go/src/Language/Go/Tags.hs index 1800de5af..48294c21f 100644 --- a/semantic-go/src/Language/Go/Tags.hs +++ b/semantic-go/src/Language/Go/Tags.hs @@ -13,12 +13,12 @@ import Control.Effect.Reader import Control.Effect.Writer import Data.Text as Text import GHC.Generics +import qualified Language.Go.AST as Go import Source.Loc import Source.Source as Source import Tags.Tag import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.Go.AST as Go -import TreeSitter.Token +import AST.Token class ToTags t where tags diff --git a/semantic-go/vendor/tree-sitter-go b/semantic-go/vendor/tree-sitter-go new file mode 160000 index 000000000..689cc8fbd --- /dev/null +++ b/semantic-go/vendor/tree-sitter-go @@ -0,0 +1 @@ +Subproject commit 689cc8fbdc0613d267434f221af85aff91a31f8c diff --git a/semantic-java/semantic-java.cabal b/semantic-java/semantic-java.cabal index 0b7a2063e..a07cdf653 100644 --- a/semantic-java/semantic-java.cabal +++ b/semantic-java/semantic-java.cabal @@ -21,12 +21,16 @@ tested-with: GHC == 8.6.5 library exposed-modules: Language.Java + Language.Java.AST + Language.Java.Grammar Language.Java.Tags build-depends: base >= 4.13 && < 5 , fused-effects ^>= 1.0 + , semantic-codegen , semantic-source ^>= 0.0.2 , semantic-tags ^>= 0.0 + , template-haskell ^>= 2.15 , tree-sitter ^>= 0.8 , tree-sitter-java ^>= 0.6.1 hs-source-dirs: src @@ -43,3 +47,4 @@ library -Wno-missed-specialisations -Wno-all-missed-specialisations -Wno-star-is-type + -Wno-missing-deriving-strategies diff --git a/semantic-java/src/Language/Java.hs b/semantic-java/src/Language/Java.hs index 70a449cf7..293a16fd5 100644 --- a/semantic-java/src/Language/Java.hs +++ b/semantic-java/src/Language/Java.hs @@ -1,15 +1,15 @@ -- | Semantic functionality for Java programs. module Language.Java ( Term(..) -, TreeSitter.Java.tree_sitter_java +, Language.Java.Grammar.tree_sitter_java ) where import Data.Proxy +import qualified Language.Java.AST as Java import qualified Language.Java.Tags as JavaTags import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.Java (tree_sitter_java) -import qualified TreeSitter.Java.AST as Java -import qualified TreeSitter.Unmarshal as TS +import qualified Language.Java.Grammar (tree_sitter_java) +import qualified AST.Unmarshal as TS newtype Term a = Term { getTerm :: Java.Program a } diff --git a/semantic-java/src/Language/Java/AST.hs b/semantic-java/src/Language/Java/AST.hs new file mode 100644 index 000000000..274843883 --- /dev/null +++ b/semantic-java/src/Language/Java/AST.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} + +module Language.Java.AST +( module Language.Java.AST +) where + +import AST.GenerateSyntax +import qualified Language.Java.Grammar as Grammar +import AST.Token + +astDeclarationsForLanguage Grammar.tree_sitter_java "../../../vendor/tree-sitter-java/src/node-types.json" \ No newline at end of file diff --git a/semantic-java/src/Language/Java/Grammar.hs b/semantic-java/src/Language/Java/Grammar.hs new file mode 100644 index 000000000..8ac864fb6 --- /dev/null +++ b/semantic-java/src/Language/Java/Grammar.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.Java.Grammar +( tree_sitter_java +, Grammar(..) +) where + +import Language.Haskell.TH +import TreeSitter.Java (tree_sitter_java) +import AST.Grammar.TH +import TreeSitter.Language (addDependentFileRelative) + +-- Regenerate template haskell code when these files change: +addDependentFileRelative "../../../vendor/tree-sitter-java/src/parser.c" + +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_java diff --git a/semantic-java/src/Language/Java/Tags.hs b/semantic-java/src/Language/Java/Tags.hs index 992daf8fd..6d7b5c005 100644 --- a/semantic-java/src/Language/Java/Tags.hs +++ b/semantic-java/src/Language/Java/Tags.hs @@ -11,13 +11,13 @@ module Language.Java.Tags import Control.Effect.Reader import Control.Effect.Writer import GHC.Generics +import qualified Language.Java.AST as Java import Source.Loc import Source.Range import Source.Source as Source import Tags.Tag import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.Java.AST as Java -import TreeSitter.Token +import AST.Token class ToTags t where tags @@ -140,7 +140,7 @@ instance ToTags Java.FieldAccess instance ToTags Java.FieldDeclaration instance ToTags Java.FinallyClause instance ToTags Java.FloatingPointType -instance ToTags Java.ForInit +-- instance ToTags Java.ForInit instance ToTags Java.ForStatement instance ToTags Java.FormalParameter instance ToTags Java.FormalParameters @@ -160,7 +160,7 @@ instance ToTags Java.LabeledStatement instance ToTags Java.LambdaExpression instance ToTags Java.Literal instance ToTags Java.LocalVariableDeclaration -instance ToTags Java.LocalVariableDeclarationStatement +-- instance ToTags Java.LocalVariableDeclarationStatement instance ToTags Java.MarkerAnnotation -- instance ToTags Java.MethodDeclaration -- instance ToTags Java.MethodInvocation diff --git a/semantic-java/vendor/tree-sitter-java b/semantic-java/vendor/tree-sitter-java new file mode 160000 index 000000000..afc4cec79 --- /dev/null +++ b/semantic-java/vendor/tree-sitter-java @@ -0,0 +1 @@ +Subproject commit afc4cec799f6594390aeb0ca5e16ec89e73d488e diff --git a/semantic-json/semantic-json.cabal b/semantic-json/semantic-json.cabal index 8096aeb1a..cca29cbff 100644 --- a/semantic-json/semantic-json.cabal +++ b/semantic-json/semantic-json.cabal @@ -21,9 +21,13 @@ tested-with: GHC == 8.6.5 library exposed-modules: Language.JSON + Language.JSON.AST + Language.JSON.Grammar build-depends: base >= 4.13 && < 5 + , semantic-codegen , semantic-tags ^>= 0.0 + , template-haskell ^>= 2.15 , tree-sitter ^>= 0.8 , tree-sitter-json ^>= 0.6 hs-source-dirs: src @@ -40,3 +44,5 @@ library -Wno-missed-specialisations -Wno-all-missed-specialisations -Wno-star-is-type + if (impl(ghc >= 8.8)) + ghc-options: -Wno-missing-deriving-strategies diff --git a/semantic-json/src/Language/JSON.hs b/semantic-json/src/Language/JSON.hs index 8768226c7..54a0a95da 100644 --- a/semantic-json/src/Language/JSON.hs +++ b/semantic-json/src/Language/JSON.hs @@ -1,14 +1,14 @@ -- | Semantic functionality for JSON programs. module Language.JSON ( Term(..) -, TreeSitter.JSON.tree_sitter_json +, Language.JSON.Grammar.tree_sitter_json ) where import Data.Proxy +import qualified Language.JSON.AST as JSON import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.JSON (tree_sitter_json) -import qualified TreeSitter.JSON.AST as JSON -import qualified TreeSitter.Unmarshal as TS +import qualified Language.JSON.Grammar (tree_sitter_json) +import qualified AST.Unmarshal as TS newtype Term a = Term { getTerm :: JSON.Document a } diff --git a/semantic-json/src/Language/JSON/AST.hs b/semantic-json/src/Language/JSON/AST.hs new file mode 100644 index 000000000..15b02f840 --- /dev/null +++ b/semantic-json/src/Language/JSON/AST.hs @@ -0,0 +1,20 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} +module Language.JSON.AST +( module Language.JSON.AST +) where + +import Prelude hiding (String) +import AST.GenerateSyntax +import qualified Language.JSON.Grammar as Grammar + +astDeclarationsForLanguage Grammar.tree_sitter_json "../../../vendor/tree-sitter-json/src/node-types.json" diff --git a/semantic-json/src/Language/JSON/Grammar.hs b/semantic-json/src/Language/JSON/Grammar.hs new file mode 100644 index 000000000..798366d84 --- /dev/null +++ b/semantic-json/src/Language/JSON/Grammar.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.JSON.Grammar +( tree_sitter_json +, Grammar(..) +) where + +import Language.Haskell.TH +import TreeSitter.JSON (tree_sitter_json) +import AST.Grammar.TH +import TreeSitter.Language (addDependentFileRelative) + +-- Regenerate template haskell code when these files change: +addDependentFileRelative "../../../vendor/tree-sitter-json/src/parser.c" + +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_json diff --git a/semantic-json/vendor/tree-sitter-json b/semantic-json/vendor/tree-sitter-json new file mode 160000 index 000000000..7b6a33f30 --- /dev/null +++ b/semantic-json/vendor/tree-sitter-json @@ -0,0 +1 @@ +Subproject commit 7b6a33f300e3e88c3017e0a9d88c77b50ea6d149 diff --git a/semantic-python/semantic-python.cabal b/semantic-python/semantic-python.cabal index 39d06b261..4c5cbe389 100644 --- a/semantic-python/semantic-python.cabal +++ b/semantic-python/semantic-python.cabal @@ -26,10 +26,12 @@ common haskell , parsers ^>= 0.12.10 , semantic-analysis ^>= 0 , semantic-core ^>= 0.0 + , semantic-codegen , semantic-source ^>= 0.0.2 , semantic-tags ^>= 0.0 , semantic-scope-graph ^>= 0.0 , semilattices ^>= 0 + , template-haskell ^>= 2.15 , text ^>= 1.2.3 , tree-sitter ^>= 0.8 , tree-sitter-python ^>= 0.8.1 @@ -54,7 +56,9 @@ library import: haskell exposed-modules: Language.Python + Language.Python.AST Language.Python.Core + Language.Python.Grammar Language.Python.Failure Language.Python.Patterns Language.Python.ScopeGraph @@ -80,6 +84,7 @@ test-suite compiling , process ^>= 1.6.5 , resourcet ^>= 1.2.2 , semantic-analysis ^>= 0 + , semantic-codegen , streaming ^>= 0.2.2 , streaming-process ^>= 0.1 , streaming-bytestring ^>= 0.1.6 @@ -100,6 +105,7 @@ test-suite graphing build-depends: base , semantic-python + , semantic-codegen , semantic-scope-graph , bytestring , pathtype diff --git a/semantic-python/src/Language/Python.hs b/semantic-python/src/Language/Python.hs index 0539ba4e6..59abf0d09 100644 --- a/semantic-python/src/Language/Python.hs +++ b/semantic-python/src/Language/Python.hs @@ -1,17 +1,17 @@ -- | Semantic functionality for Python programs. module Language.Python ( Term(..) -, TreeSitter.Python.tree_sitter_python +, Language.Python.Grammar.tree_sitter_python ) where import Data.Proxy +import qualified Language.Python.AST as Py import Language.Python.ScopeGraph import qualified Language.Python.Tags as PyTags import ScopeGraph.Convert import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.Python (tree_sitter_python) -import qualified TreeSitter.Python.AST as Py -import qualified TreeSitter.Unmarshal as TS +import qualified Language.Python.Grammar (tree_sitter_python) +import qualified AST.Unmarshal as TS newtype Term a = Term { getTerm :: Py.Module a } diff --git a/semantic-python/src/Language/Python/AST.hs b/semantic-python/src/Language/Python/AST.hs new file mode 100644 index 000000000..0f10d379d --- /dev/null +++ b/semantic-python/src/Language/Python/AST.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} + +module Language.Python.AST +( module Language.Python.AST +) where + +import Prelude hiding (False, Float, Integer, String, True) +import AST.GenerateSyntax +import qualified Language.Python.Grammar as Grammar + +astDeclarationsForLanguage Grammar.tree_sitter_python "../../../vendor/tree-sitter-python/src/node-types.json" diff --git a/semantic-python/src/Language/Python/Core.hs b/semantic-python/src/Language/Python/Core.hs index f4664dbeb..1dc61eafc 100644 --- a/semantic-python/src/Language/Python/Core.hs +++ b/semantic-python/src/Language/Python/Core.hs @@ -33,12 +33,12 @@ import Data.Function import Data.List.NonEmpty (NonEmpty (..)) import Data.Maybe import GHC.Records +import qualified Language.Python.AST as Py import Language.Python.Failure import Language.Python.Patterns import Source.Span (Span) import Syntax.Stack (Stack (..)) import qualified Syntax.Stack as Stack -import qualified TreeSitter.Python.AST as Py -- | Keeps track of the current scope's bindings (so that we can, when -- compiling a class or module, return the list of bound variables as diff --git a/semantic-python/src/Language/Python/Grammar.hs b/semantic-python/src/Language/Python/Grammar.hs new file mode 100644 index 000000000..2a975ee35 --- /dev/null +++ b/semantic-python/src/Language/Python/Grammar.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.Python.Grammar +( tree_sitter_python +, Grammar(..) +) where + +import Language.Haskell.TH +import TreeSitter.Python (tree_sitter_python) +import AST.Grammar.TH +import TreeSitter.Language (addDependentFileRelative) + +-- Regenerate template haskell code when these files change: +addDependentFileRelative "../../../vendor/tree-sitter-python/src/parser.c" + +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_python diff --git a/semantic-python/src/Language/Python/Patterns.hs b/semantic-python/src/Language/Python/Patterns.hs index 7cff2a38a..afe40c829 100644 --- a/semantic-python/src/Language/Python/Patterns.hs +++ b/semantic-python/src/Language/Python/Patterns.hs @@ -10,7 +10,7 @@ module Language.Python.Patterns import AST.Element import qualified Analysis.Name -import qualified TreeSitter.Python.AST as Py +import qualified Language.Python.AST as Py -- | Useful pattern synonym for extracting a single identifier from -- a Python ExpressionList. Easier than pattern-matching every time. diff --git a/semantic-python/src/Language/Python/ScopeGraph.hs b/semantic-python/src/Language/Python/ScopeGraph.hs index 54748331d..d9bcd6b85 100644 --- a/semantic-python/src/Language/Python/ScopeGraph.hs +++ b/semantic-python/src/Language/Python/ScopeGraph.hs @@ -34,10 +34,10 @@ import qualified Data.ScopeGraph as ScopeGraph import Data.Traversable import GHC.Records import GHC.TypeLits +import qualified Language.Python.AST as Py import Language.Python.Patterns import ScopeGraph.Convert (Result (..), complete, todo) import Source.Loc -import qualified TreeSitter.Python.AST as Py -- This orphan instance will perish once it lands in fused-effects. instance Algebra sig m => Algebra sig (Ap m) where diff --git a/semantic-python/src/Language/Python/Tags.hs b/semantic-python/src/Language/Python/Tags.hs index 00a9d1a9e..b308ff1d2 100644 --- a/semantic-python/src/Language/Python/Tags.hs +++ b/semantic-python/src/Language/Python/Tags.hs @@ -16,13 +16,13 @@ import Data.List.NonEmpty (NonEmpty (..)) import Data.Maybe (listToMaybe) import Data.Text as Text import GHC.Generics +import qualified Language.Python.AST as Py import Source.Loc import Source.Range import Source.Source as Source import Tags.Tag import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.Python.AST as Py -import TreeSitter.Token +import AST.Token class ToTags t where tags diff --git a/semantic-python/test-graphing/GraphTest.hs b/semantic-python/test-graphing/GraphTest.hs index ce30b84a7..5f169dac0 100644 --- a/semantic-python/test-graphing/GraphTest.hs +++ b/semantic-python/test-graphing/GraphTest.hs @@ -24,8 +24,8 @@ import qualified System.Path as Path import qualified System.Path.Directory as Path import qualified Test.Tasty as Tasty import qualified Test.Tasty.HUnit as HUnit -import qualified TreeSitter.Python as TSP -import qualified TreeSitter.Unmarshal as TS +import qualified Language.Python.Grammar as TSP +import qualified AST.Unmarshal as TS {- diff --git a/semantic-python/test/Test.hs b/semantic-python/test/Test.hs index de15847c7..29f29244c 100644 --- a/semantic-python/test/Test.hs +++ b/semantic-python/test/Test.hs @@ -39,8 +39,8 @@ import System.Path (()) import qualified System.Path as Path import qualified System.Path.Directory as Path import qualified Text.Trifecta as Trifecta -import qualified TreeSitter.Python as TSP -import qualified TreeSitter.Unmarshal as TS +import qualified Language.Python.Grammar as TSP +import qualified AST.Unmarshal as TS import qualified Test.Tasty as Tasty import qualified Test.Tasty.HUnit as HUnit diff --git a/semantic-python/vendor/tree-sitter-python b/semantic-python/vendor/tree-sitter-python new file mode 160000 index 000000000..899ac8d5d --- /dev/null +++ b/semantic-python/vendor/tree-sitter-python @@ -0,0 +1 @@ +Subproject commit 899ac8d5d6c883b2f05362c9953e14e78aac474c diff --git a/semantic-ruby/semantic-ruby.cabal b/semantic-ruby/semantic-ruby.cabal index b592485bc..0fedf0862 100644 --- a/semantic-ruby/semantic-ruby.cabal +++ b/semantic-ruby/semantic-ruby.cabal @@ -24,9 +24,11 @@ common haskell , fused-effects ^>= 1.0 , fused-syntax , parsers ^>= 0.12.10 + , semantic-codegen , semantic-core ^>= 0.0 , semantic-source ^>= 0.0.2 , semantic-tags ^>= 0.0 + , template-haskell ^>= 2.15 , text ^>= 1.2.3 , tree-sitter ^>= 0.8 , tree-sitter-ruby ^>= 0.4.1 @@ -50,5 +52,7 @@ library import: haskell exposed-modules: Language.Ruby + Language.Ruby.AST + Language.Ruby.Grammar Language.Ruby.Tags hs-source-dirs: src diff --git a/semantic-ruby/src/Language/Ruby.hs b/semantic-ruby/src/Language/Ruby.hs index 06c1b0dc8..fad36a7eb 100644 --- a/semantic-ruby/src/Language/Ruby.hs +++ b/semantic-ruby/src/Language/Ruby.hs @@ -3,17 +3,17 @@ -- | Semantic functionality for Ruby programs. module Language.Ruby ( Term(..) -, TreeSitter.Ruby.tree_sitter_ruby +, Language.Ruby.Grammar.tree_sitter_ruby ) where +import qualified AST.Unmarshal as TS import Control.Carrier.State.Strict import Data.Proxy import Data.Text (Text) +import qualified Language.Ruby.AST as Rb +import qualified Language.Ruby.Grammar (tree_sitter_ruby) import qualified Language.Ruby.Tags as RbTags import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.Ruby (tree_sitter_ruby) -import qualified TreeSitter.Ruby.AST as Rb -import qualified TreeSitter.Unmarshal as TS newtype Term a = Term { getTerm :: Rb.Program a } diff --git a/semantic-ruby/src/Language/Ruby/AST.hs b/semantic-ruby/src/Language/Ruby/AST.hs new file mode 100644 index 000000000..270b4436d --- /dev/null +++ b/semantic-ruby/src/Language/Ruby/AST.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} + +module Language.Ruby.AST +( module Language.Ruby.AST +) where + +import Prelude hiding (False, Float, Integer, Rational, String, True) +import AST.GenerateSyntax +import qualified Language.Ruby.Grammar as Grammar + +astDeclarationsForLanguage Grammar.tree_sitter_ruby "../../../vendor/tree-sitter-ruby/src/node-types.json" \ No newline at end of file diff --git a/semantic-ruby/src/Language/Ruby/Grammar.hs b/semantic-ruby/src/Language/Ruby/Grammar.hs new file mode 100644 index 000000000..2f426e334 --- /dev/null +++ b/semantic-ruby/src/Language/Ruby/Grammar.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.Ruby.Grammar +( tree_sitter_ruby +, Grammar(..) +) where + +import Language.Haskell.TH +import TreeSitter.Ruby (tree_sitter_ruby) +import AST.Grammar.TH +import TreeSitter.Language (addDependentFileRelative) + +-- Regenerate template haskell code when these files change: +addDependentFileRelative "../../../vendor/tree-sitter-ruby/src/parser.c" + +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_ruby diff --git a/semantic-ruby/src/Language/Ruby/Tags.hs b/semantic-ruby/src/Language/Ruby/Tags.hs index 3fe1cbe41..22d01a2e3 100644 --- a/semantic-ruby/src/Language/Ruby/Tags.hs +++ b/semantic-ruby/src/Language/Ruby/Tags.hs @@ -19,14 +19,14 @@ import Control.Monad import Data.Foldable import Data.Text as Text import GHC.Generics +import qualified Language.Ruby.AST as Rb import Source.Loc import Source.Range as Range import Source.Source as Source import Tags.Tag import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.Ruby.AST as Rb -import TreeSitter.Token -import qualified TreeSitter.Unmarshal as TS +import AST.Token +import qualified AST.Unmarshal as TS class ToTags t where tags diff --git a/semantic-ruby/vendor/tree-sitter-ruby b/semantic-ruby/vendor/tree-sitter-ruby new file mode 160000 index 000000000..eb2b6225b --- /dev/null +++ b/semantic-ruby/vendor/tree-sitter-ruby @@ -0,0 +1 @@ +Subproject commit eb2b6225bfb80010f2e4cbd27db8c6f3775230b5 diff --git a/semantic-tsx/semantic-tsx.cabal b/semantic-tsx/semantic-tsx.cabal index cd74cbcc6..02bffdaaf 100644 --- a/semantic-tsx/semantic-tsx.cabal +++ b/semantic-tsx/semantic-tsx.cabal @@ -24,9 +24,11 @@ common haskell , fused-effects ^>= 1.0 , fused-syntax , parsers ^>= 0.12.10 + , semantic-codegen , semantic-core ^>= 0.0 , semantic-source ^>= 0.0.2 , semantic-tags ^>= 0.0 + , template-haskell ^>= 2.15 , text ^>= 1.2.3 , tree-sitter ^>= 0.8 , tree-sitter-tsx ^>= 0.4.2 @@ -50,5 +52,6 @@ library import: haskell exposed-modules: Language.TSX + Language.TSX.AST Language.TSX.Tags hs-source-dirs: src diff --git a/semantic-tsx/src/Language/TSX.hs b/semantic-tsx/src/Language/TSX.hs index 2a439c54a..a60d936af 100644 --- a/semantic-tsx/src/Language/TSX.hs +++ b/semantic-tsx/src/Language/TSX.hs @@ -6,11 +6,11 @@ module Language.TSX ) where import Data.Proxy +import qualified Language.TSX.AST as TSX import qualified Language.TSX.Tags as TsxTags import qualified Tags.Tagging.Precise as Tags import qualified TreeSitter.TSX (tree_sitter_tsx) -import qualified TreeSitter.TSX.AST as TSX -import qualified TreeSitter.Unmarshal as TS +import qualified AST.Unmarshal as TS newtype Term a = Term { getTerm :: TSX.Program a } diff --git a/semantic-tsx/src/Language/TSX/AST.hs b/semantic-tsx/src/Language/TSX/AST.hs new file mode 100644 index 000000000..524f042cc --- /dev/null +++ b/semantic-tsx/src/Language/TSX/AST.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} + +module Language.TSX.AST +( module Language.TSX.AST +) where + +import Prelude hiding (False, Float, Integer, String, True) +import AST.GenerateSyntax +import qualified TreeSitter.TSX as Grammar + +astDeclarationsForLanguage Grammar.tree_sitter_tsx "../../../vendor/tree-sitter-typescript/tsx/src/node-types.json" \ No newline at end of file diff --git a/semantic-tsx/src/Language/TSX/Grammar.hs b/semantic-tsx/src/Language/TSX/Grammar.hs new file mode 100644 index 000000000..3af3469b6 --- /dev/null +++ b/semantic-tsx/src/Language/TSX/Grammar.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.TSX.Grammar +( tree_sitter_tsx +, Grammar(..) +) where + +import Language.Haskell.TH +import TreeSitter.TSX (tree_sitter_tsx) +import AST.Grammar.TH +import TreeSitter.Language (addDependentFileRelative) + +-- Regenerate template haskell code when these files change: +addDependentFileRelative "../../../vendor/tree-sitter-typescript/tsx/src/parser.c" + +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_tsx diff --git a/semantic-tsx/src/Language/TSX/Tags.hs b/semantic-tsx/src/Language/TSX/Tags.hs index ade3bc2c4..00b75676f 100644 --- a/semantic-tsx/src/Language/TSX/Tags.hs +++ b/semantic-tsx/src/Language/TSX/Tags.hs @@ -15,12 +15,12 @@ import Control.Effect.Writer import Data.Foldable import Data.Text as Text import GHC.Generics +import qualified Language.TSX.AST as Tsx import Source.Loc import Source.Source as Source import Tags.Tag import qualified Tags.Tagging.Precise as Tags -import TreeSitter.Token -import qualified TreeSitter.TSX.AST as Tsx +import AST.Token class ToTags t where tags diff --git a/semantic-tsx/vendor/tree-sitter-typescript b/semantic-tsx/vendor/tree-sitter-typescript new file mode 160000 index 000000000..aa950f58e --- /dev/null +++ b/semantic-tsx/vendor/tree-sitter-typescript @@ -0,0 +1 @@ +Subproject commit aa950f58ea8aa112bc72f3481b98fc2d3c07b3e0 diff --git a/semantic-typescript/semantic-typescript.cabal b/semantic-typescript/semantic-typescript.cabal index 3bd8b22c1..8c15efa66 100644 --- a/semantic-typescript/semantic-typescript.cabal +++ b/semantic-typescript/semantic-typescript.cabal @@ -24,9 +24,11 @@ common haskell , fused-effects ^>= 1.0 , fused-syntax , parsers ^>= 0.12.10 + , semantic-codegen , semantic-core ^>= 0.0 , semantic-source ^>= 0.0.2 , semantic-tags ^>= 0.0 + , template-haskell ^>= 2.15 , text ^>= 1.2.3 , tree-sitter ^>= 0.8 , tree-sitter-typescript ^>= 0.4.2 @@ -50,5 +52,7 @@ library import: haskell exposed-modules: Language.TypeScript + Language.TypeScript.AST + Language.TypeScript.Grammar Language.TypeScript.Tags hs-source-dirs: src diff --git a/semantic-typescript/src/Language/TypeScript.hs b/semantic-typescript/src/Language/TypeScript.hs index 13989839e..29460afc1 100644 --- a/semantic-typescript/src/Language/TypeScript.hs +++ b/semantic-typescript/src/Language/TypeScript.hs @@ -2,15 +2,15 @@ -- | Semantic functionality for TypeScript programs. module Language.TypeScript ( Term(..) -, TreeSitter.TypeScript.tree_sitter_typescript +, Language.TypeScript.Grammar.tree_sitter_typescript ) where import Data.Proxy +import qualified Language.TypeScript.AST as TypeScript import qualified Language.TypeScript.Tags as TsTags import qualified Tags.Tagging.Precise as Tags -import qualified TreeSitter.TypeScript (tree_sitter_typescript) -import qualified TreeSitter.TypeScript.AST as TypeScript -import qualified TreeSitter.Unmarshal as TS +import qualified Language.TypeScript.Grammar (tree_sitter_typescript) +import qualified AST.Unmarshal as TS newtype Term a = Term { getTerm :: TypeScript.Program a } diff --git a/semantic-typescript/src/Language/TypeScript/AST.hs b/semantic-typescript/src/Language/TypeScript/AST.hs new file mode 100644 index 000000000..e55c62f4b --- /dev/null +++ b/semantic-typescript/src/Language/TypeScript/AST.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} + +module Language.TypeScript.AST +( module Language.TypeScript.AST +) where + +import Prelude hiding (False, Float, Integer, String, True) +import AST.GenerateSyntax +import qualified Language.TypeScript.Grammar as Grammar + +astDeclarationsForLanguage Grammar.tree_sitter_typescript "../../../vendor/tree-sitter-typescript/typescript/src/node-types.json" diff --git a/semantic-typescript/src/Language/TypeScript/Grammar.hs b/semantic-typescript/src/Language/TypeScript/Grammar.hs new file mode 100644 index 000000000..b398626fa --- /dev/null +++ b/semantic-typescript/src/Language/TypeScript/Grammar.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.TypeScript.Grammar +( tree_sitter_typescript +, Grammar(..) +) where + +import Language.Haskell.TH +import TreeSitter.TypeScript (tree_sitter_typescript) +import AST.Grammar.TH +import TreeSitter.Language (addDependentFileRelative) + +-- Regenerate template haskell code when these files change: +addDependentFileRelative "../../../vendor/tree-sitter-typescript/typescript/src/parser.c" + +-- | Statically-known rules corresponding to symbols in the grammar. +mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_typescript diff --git a/semantic-typescript/src/Language/TypeScript/Tags.hs b/semantic-typescript/src/Language/TypeScript/Tags.hs index e36b28285..40713d684 100644 --- a/semantic-typescript/src/Language/TypeScript/Tags.hs +++ b/semantic-typescript/src/Language/TypeScript/Tags.hs @@ -15,12 +15,12 @@ import Control.Effect.Writer import Data.Foldable import Data.Text as Text import GHC.Generics +import qualified Language.TypeScript.AST as Ts import Source.Loc import Source.Source as Source import Tags.Tag import qualified Tags.Tagging.Precise as Tags -import TreeSitter.Token -import qualified TreeSitter.TypeScript.AST as Ts +import AST.Token class ToTags t where tags diff --git a/semantic-typescript/vendor/tree-sitter-typescript b/semantic-typescript/vendor/tree-sitter-typescript new file mode 160000 index 000000000..40320d8e0 --- /dev/null +++ b/semantic-typescript/vendor/tree-sitter-typescript @@ -0,0 +1 @@ +Subproject commit 40320d8e0953db5e173e6dfbb596d375a085ca65 diff --git a/semantic.cabal b/semantic.cabal index 02e041c73..290fd0d5f 100644 --- a/semantic.cabal +++ b/semantic.cabal @@ -67,6 +67,7 @@ common dependencies , recursion-schemes ^>= 5.1 , scientific ^>= 0.3.6.2 , safe-exceptions ^>= 0.1.7.0 + , semantic-codegen , semantic-analysis ^>= 0 , semantic-source ^>= 0.0.2 , semilattices ^>= 0.0.0.3 @@ -150,7 +151,7 @@ library , Data.Error , Data.Flag , Data.Functor.Classes.Generic - , Data.Graph + , Data.Graph.Algebraic , Data.Graph.ControlFlowVertex , Data.Handle , Data.History @@ -304,6 +305,8 @@ library , unordered-containers ^>= 0.2.9.0 , vector ^>= 0.12.0.2 , tree-sitter-go ^>= 0.4.1.1 + , tree-sitter-java ^>= 0.6.1 + , tree-sitter-json ^>= 0.6 , tree-sitter-php ^>= 0.2 , tree-sitter-python ^>= 0.8.1 , tree-sitter-ruby ^>= 0.4.1 @@ -358,6 +361,7 @@ test-suite test , Generators , Properties build-depends: semantic + , semantic-json , tree-sitter-json ^>= 0.6 , Glob ^>= 0.10.0 , hedgehog ^>= 1 diff --git a/src/Analysis/Abstract/Graph.hs b/src/Analysis/Abstract/Graph.hs index 28137fad3..2e738f8e6 100644 --- a/src/Analysis/Abstract/Graph.hs +++ b/src/Analysis/Abstract/Graph.hs @@ -34,7 +34,7 @@ import Control.Effect.Sum.Project import Data.Abstract.BaseError import Data.Abstract.Module (Module (moduleInfo), ModuleInfo (..)) import Data.ByteString.Builder -import Data.Graph +import Data.Graph.Algebraic import Data.Graph.ControlFlowVertex import Data.Map (Map) import qualified Data.Map as Map diff --git a/src/Data/Graph.hs b/src/Data/Graph/Algebraic.hs similarity index 99% rename from src/Data/Graph.hs rename to src/Data/Graph/Algebraic.hs index cb4c1c75d..bdf09ebd0 100644 --- a/src/Data/Graph.hs +++ b/src/Data/Graph/Algebraic.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} -module Data.Graph +module Data.Graph.Algebraic ( Graph(..) , overlay , connect diff --git a/src/Data/Graph/ControlFlowVertex.hs b/src/Data/Graph/ControlFlowVertex.hs index c3c21822c..43cf6cd41 100644 --- a/src/Data/Graph/ControlFlowVertex.hs +++ b/src/Data/Graph/ControlFlowVertex.hs @@ -29,7 +29,7 @@ import Data.Abstract.Declarations import Data.Abstract.Module (ModuleInfo (..)) import Data.Abstract.Package (PackageInfo (..)) import Data.Aeson -import Data.Graph (VertexTag (..)) +import Data.Graph.Algebraic (VertexTag (..)) import Data.Hashable import Data.Proxy import Data.Quieterm (Quieterm (..)) diff --git a/src/Language/Go/Assignment.hs b/src/Language/Go/Assignment.hs index a7a4077bb..ee35acafd 100644 --- a/src/Language/Go/Assignment.hs +++ b/src/Language/Go/Assignment.hs @@ -15,7 +15,7 @@ import Assigning.Assignment hiding (Assignment, Error) import qualified Assigning.Assignment as Assignment import Control.Monad import qualified Data.Abstract.ScopeGraph as ScopeGraph (AccessControl (..)) -import Data.ImportPath (defaultAlias, importPath) +import Data.ImportPath () import Data.List.NonEmpty (NonEmpty (..), some1) import Data.Sum import Data.Syntax @@ -31,7 +31,8 @@ import qualified Data.Term as Term import Language.Go.Syntax as Go.Syntax hiding (labelName, runeLiteral) import Language.Go.Term as Go import Language.Go.Type as Go.Type -import TreeSitter.Go as Grammar +import Data.ImportPath (importPath, defaultAlias) +import Language.Go.Grammar as Grammar type Assignment = Assignment.Assignment [] Grammar diff --git a/src/Language/Python/Assignment.hs b/src/Language/Python/Assignment.hs index 4039ea55e..3bf59e143 100644 --- a/src/Language/Python/Assignment.hs +++ b/src/Language/Python/Assignment.hs @@ -40,7 +40,7 @@ import qualified Data.Syntax.Statement as Statement import qualified Data.Syntax.Type as Type import Language.Python.Syntax as Python.Syntax import Language.Python.Term as Python -import TreeSitter.Python as Grammar +import Language.Python.Grammar as Grammar type Assignment = Assignment.Assignment [] Grammar diff --git a/src/Language/Ruby/Assignment.hs b/src/Language/Ruby/Assignment.hs index af1d9954c..23acf8060 100644 --- a/src/Language/Ruby/Assignment.hs +++ b/src/Language/Ruby/Assignment.hs @@ -42,7 +42,7 @@ import qualified Data.Syntax.Statement as Statement import qualified Data.Text as Text import qualified Language.Ruby.Syntax as Ruby.Syntax import Language.Ruby.Term as Ruby -import TreeSitter.Ruby as Grammar +import Language.Ruby.Grammar as Grammar type Assignment = Assignment.Assignment [] Grammar diff --git a/src/Language/TypeScript/Assignment.hs b/src/Language/TypeScript/Assignment.hs index 770b1ead4..2fbb670c6 100644 --- a/src/Language/TypeScript/Assignment.hs +++ b/src/Language/TypeScript/Assignment.hs @@ -43,7 +43,7 @@ import qualified Data.Syntax.Type as Type import qualified Language.TypeScript.Resolution as TypeScript.Resolution import qualified Language.TypeScript.Syntax as TypeScript.Syntax import Language.TypeScript.Term as TypeScript -import TreeSitter.TypeScript as Grammar +import Language.TypeScript.Grammar as Grammar type Assignment = Assignment.Assignment [] Grammar diff --git a/src/Parsing/Parser.hs b/src/Parsing/Parser.hs index 95e2b5c40..5266766ec 100644 --- a/src/Parsing/Parser.hs +++ b/src/Parsing/Parser.hs @@ -66,14 +66,14 @@ import qualified Language.TSX.Assignment as TSXALaCarte import qualified Language.TypeScript as TypeScriptPrecise import qualified Language.TypeScript.Assignment as TypeScriptALaCarte import Prelude hiding (fail) -import TreeSitter.Go +import Language.Go.Grammar import qualified TreeSitter.Language as TS (Language, Symbol) import TreeSitter.PHP -import TreeSitter.Python -import TreeSitter.Ruby (tree_sitter_ruby) +import Language.Python.Grammar +import Language.Ruby.Grammar (tree_sitter_ruby) import TreeSitter.TSX -import TreeSitter.TypeScript -import TreeSitter.Unmarshal +import Language.TypeScript.Grammar +import AST.Unmarshal -- | A parser from 'Source' onto some term type. data Parser term where diff --git a/src/Parsing/TreeSitter.hs b/src/Parsing/TreeSitter.hs index 406a79068..d984d52fa 100644 --- a/src/Parsing/TreeSitter.hs +++ b/src/Parsing/TreeSitter.hs @@ -36,7 +36,7 @@ import qualified TreeSitter.Language as TS import qualified TreeSitter.Node as TS import qualified TreeSitter.Parser as TS import qualified TreeSitter.Tree as TS -import qualified TreeSitter.Unmarshal as TS +import qualified AST.Unmarshal as TS data TSParseException = ParserTimedOut diff --git a/src/Rendering/Graph.hs b/src/Rendering/Graph.hs index 66fd95ac9..ac1b448ce 100644 --- a/src/Rendering/Graph.hs +++ b/src/Rendering/Graph.hs @@ -20,7 +20,7 @@ import Data.Diff import Data.Edit import Data.Foldable import Data.Functor.Foldable -import Data.Graph +import Data.Graph.Algebraic import Data.ProtoLens (defMessage) import Data.String (IsString (..)) import Data.Term diff --git a/src/Semantic/Api/Diffs.hs b/src/Semantic/Api/Diffs.hs index fbb1b4aa2..3fdc570cb 100644 --- a/src/Semantic/Api/Diffs.hs +++ b/src/Semantic/Api/Diffs.hs @@ -28,7 +28,7 @@ import Data.Diff import Data.Edit import Data.Foldable import Data.Functor.Classes -import Data.Graph +import Data.Graph.Algebraic import Data.JSON.Fields (ToJSONFields1) import Data.Language import Data.Map.Strict (Map) diff --git a/src/Semantic/Api/Terms.hs b/src/Semantic/Api/Terms.hs index 6935e1ef0..1f8c76345 100644 --- a/src/Semantic/Api/Terms.hs +++ b/src/Semantic/Api/Terms.hs @@ -29,7 +29,7 @@ import Data.Either import Data.Foldable import Data.Functor.Classes import Data.Functor.Foldable -import Data.Graph +import Data.Graph.Algebraic (Edge(..), vertexList, edgeList) import Data.Language import Data.Map.Strict (Map) import Data.ProtoLens (defMessage) diff --git a/src/Semantic/Graph.hs b/src/Semantic/Graph.hs index 413b0761e..7435e5890 100644 --- a/src/Semantic/Graph.hs +++ b/src/Semantic/Graph.hs @@ -68,7 +68,7 @@ import Data.Abstract.Value.Concrete as Concrete (Value, ValueError (.. import Data.Abstract.Value.Type as Type import Data.Blob import Data.Functor.Foldable -import Data.Graph +import Data.Graph.Algebraic import Data.Graph.ControlFlowVertex (VertexDeclaration) import Data.Language as Language import Data.List (find, isPrefixOf) diff --git a/src/Semantic/Util.hs b/src/Semantic/Util.hs index f3a754047..a2ec3a106 100644 --- a/src/Semantic/Util.hs +++ b/src/Semantic/Util.hs @@ -36,7 +36,7 @@ import qualified Data.Abstract.ModuleTable as ModuleTable import Data.Abstract.Package import Data.Abstract.Value.Concrete as Concrete import Data.Blob.IO -import Data.Graph (topologicalSort) +import Data.Graph.Algebraic (topologicalSort) import qualified Data.Language as Language import Data.List (uncons) import Data.Maybe diff --git a/test/Data/Graph/Spec.hs b/test/Data/Graph/Spec.hs index 22843514c..950b1d883 100644 --- a/test/Data/Graph/Spec.hs +++ b/test/Data/Graph/Spec.hs @@ -1,10 +1,8 @@ -{-# LANGUAGE PackageImports #-} - module Data.Graph.Spec (spec) where import SpecHelpers -import "semantic" Data.Graph +import Data.Graph.Algebraic import qualified Algebra.Graph.Class as Class spec :: Spec diff --git a/test/Graphing/Calls/Spec.hs b/test/Graphing/Calls/Spec.hs index 196d922e6..7c2da7b5c 100644 --- a/test/Graphing/Calls/Spec.hs +++ b/test/Graphing/Calls/Spec.hs @@ -1,7 +1,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE PackageImports #-} {-# LANGUAGE TypeApplications #-} module Graphing.Calls.Spec ( spec ) where @@ -13,7 +12,7 @@ import Algebra.Graph import qualified Analysis.File as File import Control.Effect.Parse -import "semantic" Data.Graph (Graph (..), topologicalSort) +import Data.Graph.Algebraic (Graph (..), topologicalSort) import Data.Graph.ControlFlowVertex import qualified Data.Language as Language import Semantic.Graph diff --git a/test/Parsing/Spec.hs b/test/Parsing/Spec.hs index 741ee1d8e..ee5a8b04e 100644 --- a/test/Parsing/Spec.hs +++ b/test/Parsing/Spec.hs @@ -10,7 +10,7 @@ import Parsing.TreeSitter import Source.Source import SpecHelpers import qualified System.Path as Path -import TreeSitter.JSON (Grammar, tree_sitter_json) +import Language.JSON.Grammar (Grammar, tree_sitter_json) spec :: Spec spec = do