1
1
mirror of https://github.com/github/semantic.git synced 2024-11-22 05:43:42 +03:00

Merge branch 'master' into symbol-node-type

This commit is contained in:
Max Brunsfeld 2020-06-02 14:06:06 -07:00
commit 7f50ee1be5
147 changed files with 2246 additions and 3301 deletions

View File

@ -25,3 +25,24 @@
:seti -Wno-unsafe
:seti -Wno-star-is-type
:seti -Wno-missing-deriving-strategies
-- Turn on some language extensions you use a lot
:seti -XFlexibleContexts -XOverloadedStrings -XTypeApplications
-- Break on errors
:seti -fbreak-on-error
-- Automatically show the code around breakpoints
:set stop :list
-- Use a cyan lambda as the prompt
:set prompt "\ESC[1;36m\STXλ \ESC[m\STX"
-- Better errors
:set -ferror-spans -freverse-errors -fprint-expanded-synonyms
-- Better typed holes
:set -funclutter-valid-hole-fits -fabstract-refinement-hole-fits -frefinement-level-hole-fits=2
-- This usually impairs understanding
:seti -Wno-type-defaults

View File

@ -28,38 +28,35 @@ jobs:
cabal-version: ${{ matrix.cabal }}
- uses: actions/cache@v1
name: Cache ~/.cabal/packages
id: cache-cabal
name: Cache Cabal artifacts
with:
path: ~/.cabal/packages
key: ${{ runner.os }}-${{ matrix.ghc }}-cabal-packages
path: dist-cache
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cabal }}-cabal-cache-${{ hashFiles('**/*.cabal') }}
restore-keys: |
${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cabal }}-cabal-cache-
${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cabal }}-
${{ runner.os }}-${{ matrix.ghc }}-
${{ runner.os }}-
- uses: actions/cache@v1
name: Cache ~/.cabal/store
with:
path: ~/.cabal/store
key: ${{ runner.os }}-${{ matrix.ghc }}-v10-cabal-store
- name: Get cabal-cache
run: |
curl -L https://github.com/haskell-works/cabal-cache/releases/download/v1.0.1.8/cabal-cache_x86_64_linux.tar.gz > ./cc.tar.gz
tar -xvf ./cc.tar.gz
- uses: actions/cache@v1
name: Cache dist-newstyle
with:
path: dist-newstyle
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cabal }}-semantic-dist
# - name: hlint
# run: |
# test -f dist-newstyle/hlint || cabal install hlint --installdir=dist-newstyle
# dist-newstyle/hlint src semantic-python
- name: Install dependencies
- name: Configure project
run: |
cabal v2-update
cabal v2-configure --project-file=cabal.project.ci --disable-optimization --enable-benchmarks --enable-tests --write-ghc-environment-files=always -j2
cabal v2-build --project-file=cabal.project.ci all --only-dependencies
- name: Restore from cache
run: ./cabal-cache sync-from-archive --threads=2 --archive-uri=dist-cache || true
- name: Build & test
run: |
cabal v2-build --project-file=cabal.project.ci
cabal v2-run --project-file=cabal.project.ci semantic:test
cabal v2-run --project-file=cabal.project.ci semantic-codeql:test
cabal v2-run --project-file=cabal.project.ci semantic-core:test
cabal v2-run --project-file=cabal.project.ci semantic-go:test
cabal v2-run --project-file=cabal.project.ci semantic-java:test
@ -72,3 +69,6 @@ jobs:
cabal v2-run --project-file=cabal.project.ci semantic-typescript:test
cd semantic-source; cabal v2-run --project-file=cabal.project.ci semantic-source:test; cd ..
cd semantic-source; cabal v2-run --project-file=cabal.project.ci semantic-source:doctest -- src; cd ..
- name: Write out cache
run: ./cabal-cache sync-to-archive --threads=2 --archive-uri=dist-cache

0
.gitmodules vendored Normal file
View File

View File

@ -22,7 +22,7 @@ COPY --from=haskell /opt/ghc/8.8.1/lib/ghc-8.8.1/* /opt/ghc/8.8.1/lib/ghc-8.8.1/
ENTRYPOINT ["/protobuf/bin/protoc", "-I/protobuf", "--plugin=protoc-gen-haskell=/usr/local/bin/proto-lens-protoc"]
# Build semantic
FROM haskell:8.8 as build
FROM haskell:8.8.1 as build
WORKDIR /build
# Build all of semantic

View File

@ -35,7 +35,7 @@ Available options:
#### Diff
```
Usage: semantic diff ([--sexpression] | [--json] | [--json-graph] | [--toc] |
Usage: semantic diff ([--sexpression] | [--json] | [--json-graph] |
[--dot] | [--show]) [FILE_A] [FILE_B]
Compute changes between paths
@ -43,7 +43,6 @@ Available options:
--sexpression Output s-expression diff tree (default)
--json Output JSON diff trees
--json-graph Output JSON diff trees
--toc Output JSON table of contents diff summary
--dot Output the diff as a DOT graph
--show Output using the Show instance (debug only, format
subject to change without notice)
@ -75,19 +74,20 @@ Available options:
## Language support
| Priority | Language | Parse | Assign | Diff | ToC | Symbols | Import graph | Call graph | Control flow graph |
| :---: | :------------- | :---: | :---: | :---: | :--:| :---: | :---: | :---: | :---: |
| 1 | Ruby | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 2 | JavaScript | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 3 | TypeScript | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 4 | Python | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 5 | Go | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| | PHP | 🚧 | 🚧 | 🚧 | 🚧| 🚧 | | | |
| | Java | 🚧 | N/A | 🚧 | 🚧 | ✅ | | | |
| | JSON | ✅ | N/A | ✅ | N/A | N/A | N/A | N/A| |
| | JSX | ✅ | ✅ | ✅ | 🔶 | | | | |
| | Haskell | 🚧 | 🚧 | 🚧 | 🔶 | 🚧 | | | |
| | Markdown | 🚧 | 🚧 | 🚧 | 🚧 | N/A | N/A | N/A |   |
| Priority | Language | Parse | Assign | Diff | Symbols | Import graph | Call graph | Control flow graph |
| :---: | :------------- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 1 | Ruby | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 2 | JavaScript | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 3 | TypeScript | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 4 | Python | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| 5 | Go | ✅ | ✅ | ✅ | ✅ | ✅ | 🚧 | |
| | PHP | 🚧 | 🚧 | 🚧 | 🚧 | | | |
| | Java | 🚧 | N/A | 🚧 | ✅ | | | |
| | JSON | ✅ | N/A | ✅ | N/A | N/A | N/A | |
| | JSX | ✅ | ✅ | ✅ | | | | |
| | Haskell | 🚧 | 🚧 | 🚧 | 🚧 | | | |
| | Markdown | 🚧 | 🚧 | 🚧 | N/A | N/A | N/A |   |
| | CodeQL | ✅ | N/A | 🚧 | ✅ | | | |
* ✅ — Supported
* 🔶 — Partial support
@ -109,7 +109,7 @@ cabal v2-test
cabal v2-run semantic -- --help
```
`stack` as a build tool is not officially supported; there is an unofficial [`stack.yaml`](https://gist.github.com/jkachmar/f200caee83280f1f25e9cfa2dd2b16bb) available, though we cannot make guarantees as to its stability.
`stack` as a build tool is not officially supported; there is [unofficial `stack.yaml` support](https://github.com/jkachmar/semantic-stack-yaml) available, though we cannot make guarantees as to its stability.
[nix]: https://www.haskell.org/cabal/users-guide/nix-local-build-overview.html
[ghcup]: https://www.haskell.org/ghcup/

View File

@ -38,7 +38,7 @@ callGraphProject' :: ( Language.SLanguage lang
callGraphProject' session proxy path
| Just (SomeParser parser) <- parserForLanguage analysisParsers lang = fmap (bimap show (const ())) . runTask session $ do
blob <- readBlobFromPath (Path.toAbsRel path)
package <- fmap snd <$> runParse (Duration.fromSeconds 10) (parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] lang []))
package <- fmap snd <$> runParse (Duration.fromSeconds 10) (parsePackage parser (Project (Path.toAbsRel (Path.takeDirectory path)) [blob] lang []))
modules <- topologicalSort <$> runImportGraphToModules proxy package
runCallGraph proxy False modules package
| otherwise = error $ "Analysis not supported for: " <> show lang

View File

@ -16,7 +16,7 @@ import qualified System.Path as Path
import qualified Analysis.File as File
import Data.Flag
import Proto.Semantic as P hiding (Blob, BlobPair)
import Proto.Semantic as P hiding (Blob)
import Semantic.Api.Symbols (parseSymbols)
import Semantic.Config as Config
import Semantic.Task

View File

@ -4,6 +4,7 @@
packages: .
semantic-analysis
semantic-ast
semantic-codeql
semantic-core
semantic-go
semantic-java
@ -16,6 +17,7 @@ packages: .
semantic-tsx
semantic-typescript
semantic-tags
semantic-rust
-- Packages brought in from other repos instead of hackage
-- ATTENTION: remember to update cabal.project.ci when bumping SHAs here!
@ -28,8 +30,3 @@ source-repository-package
type: git
location: https://github.com/antitypical/fused-syntax.git
tag: d11e14581217590a5c67f79cbaeee35ac8acee6a
source-repository-package
type: git
location: https://github.com/fused-effects/fused-effects-readline.git
tag: 7a96949c77c73c6e5975c8d6171ffb63eb76b467

View File

@ -4,6 +4,7 @@
packages: .
semantic-analysis
semantic-ast
semantic-codeql
semantic-core
semantic-go
semantic-java
@ -12,6 +13,7 @@ packages: .
semantic-php
semantic-python
semantic-ruby
semantic-rust
semantic-scope-graph
semantic-tsx
semantic-typescript
@ -29,12 +31,6 @@ source-repository-package
location: https://github.com/antitypical/fused-syntax.git
tag: d11e14581217590a5c67f79cbaeee35ac8acee6a
source-repository-package
type: git
location: https://github.com/fused-effects/fused-effects-readline.git
tag: 7a96949c77c73c6e5975c8d6171ffb63eb76b467
-- Treat warnings as errors for CI builds
package semantic
ghc-options: -Werror
@ -45,6 +41,9 @@ package semantic-analysis
package semantic-ast
ghc-options: -Werror
package semantic-codeql
ghc-options: -Werror
package semantic-core
ghc-options: -Werror

View File

@ -1,19 +1,21 @@
# Adding new languages to Semantic
This document exists to outline the process associated with adding a new language to Semantic. Though the Semantic authors have architected the library such that adding new languages and syntax [requires no changes to existing code](https://en.wikipedia.org/wiki/Expression_problem), adding support for a new language is a nontrivial amount of work. Those willing to take the plunge will probably need a degree of Haskell experience.
This document outlines the process for adding a new language to Semantic. Though the Semantic authors have architected the library such that adding new languages and syntax [requires no changes to existing code](https://en.wikipedia.org/wiki/Expression_problem), adding support for a new language is a nontrivial amount of work. Those willing to take the plunge will probably need a degree of Haskell experience.
Please note that this list of steps reflects the state of Semantic as is, not where we authors are taking it: we're working on significant simplifications to this process (see the FAQs below).
Note that we recently transitioned the system to auto-generate strongly-typed ASTs using [CodeGen](https://github.com/github/semantic/blob/master/docs/codegen.md), our new language support library. More information is provided below in the [FAQs](#FAQs).
## The procedure
1. **Find or write a [tree-sitter](https://tree-sitter.github.io) parser for your language.** The tree-sitter [organization page](https://github.com/tree-sitter) has a number of parsers beyond those we currently support in Semantic; look there first to make sure you're not duplicating work. The tree-sitter [documentation on creating parsers](http://tree-sitter.github.io/tree-sitter/creating-parsers) provides an exhaustive look at the process of developing and debugging tree-sitter parsers. Though we do not support grammars written with other toolkits such as [ANTLR](https://www.antlr.org), translating an ANTLR or other BNF-style grammar into a tree-sitter grammar is usually straightforward.
2. **Create a Haskell library providing an interface to that C source.** The [`haskell-tree-sitter`](https://github.com/tree-sitter/haskell-tree-sitter) repository provides a Cabal package for each supported language. You can find an example of a pull request to add such a package here. Each package needs to provide two API surfaces:
* a bridged (via the FFI) reference to the toplevel parser in the generated file ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/internal/TreeSitter/JSON/Internal.hs))
* symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/github/semantic/blob/master/semantic-json/src/Language/JSON/Grammar.hs)). For more information, see [CodeGen docs](https://github.com/github/semantic/blob/master/semantic-ast/README.md).
3. **Identify the new syntax nodes required to represent your language.** While we provide an extensive library of reusable AST nodes for [literals](https://github.com/github/semantic/blob/master/src/Data/Syntax/Literal.hs), [expressions](https://github.com/github/semantic/blob/master/src/Data/Syntax/Expression.hs), [statements](https://github.com/github/semantic/blob/master/src/Data/Syntax/Statement.hs), and [types](https://github.com/github/semantic/blob/master/src/Data/Syntax/Type.hs), most languages will require some syntax nodes not found in other languages. You'll need to create a new module providing those data types, and those data types must be written as an open union: [here](https://github.com/github/semantic/commits/master/src/Language/Ruby/Syntax.hs?author=charliesome) is an example for Ruby's syntactic details.
4. **Write an assignment step that translates tree-sitter trees into Haskell datatypes.** More information about this can be found in the [assignment documentation](assignment.md). This is currently the most time-consuming and error-prone part of the process (see [https://github.com/github/semantic/issues/77]).
5. **Implement `Evaluatable` instances and add new [`Value` effects](https://github.com/github/semantic/blob/master/src/Control/Abstract/Value.hs) as is needed to describe the control flow of your language.** While several features of Semantic (e.g. `semantic parse --symbols` and `semantic diff`) will become fully available given a working assignment step, further features based on concrete or abstract interpretation (such as `semantic graph`) require implementing the `Evaluatable` typeclass and providing value-style effects for each control flow feature provided by the language. This means that language support is a spectrum: Semantic can provide useful information without any knowledge of a language's semantics, but each successive addition to its interpretive capabilities enables more functionality.
6. **Add tests for diffing, tagging, graphing, and evaluating code written in that language.** Because tree-sitter grammars often change, we require extensive testing so as to avoid the unhappy situation of bitrotted languages that break as soon as a new grammar comes down the line.
2. **Create a Haskell library providing an interface to that C source.** The [`haskell-tree-sitter`](https://github.com/tree-sitter/haskell-tree-sitter) repository provides a Cabal package for each supported language. You can find an example of a pull request to add such a package [here](https://github.com/tree-sitter/haskell-tree-sitter/pull/276/files), and a file providing:
- A bridged (via the FFI) reference to the toplevel parser in the generated file must be provided ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/TreeSitter/JSON.hs#L11)).
- A way to retrieve [`tree-sitter` data](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/TreeSitter/JSON.hs#L13-L14) used to auto-generate syntax datatypes using the following steps. During parser generation, tree-sitter produces a `node-types.json` file that captures the structure of a language's grammar. The autogeneration described below in Step 4 derives datatypes based on this structural representation. The `node-types.json` is a data file in `haskell-tree-sitter` that gets installed with the package. The function `getNodeTypesPath :: IO FilePath` is defined to access in the contents of this file, using `getDataFileName :: FilePath -> IO FilePath`, which is defined in the autogenerated `Paths_` module.
3. **Create a Haskell library in Semantic to auto-generate precise ASTs.** Create a `semantic-[LANGUAGE]` package. This is an example of [`semantic-python`](https://github.com/github/semantic/tree/master/semantic-python)). Each package needs to provide the following API surfaces:
- `Language.[LANGUAGE].AST` - Derives Haskell datatypes from a language and its `node-types.json` file ([example](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/AST.hs)).
- `Language.[LANGUAGE].Grammar` - Provides statically-known rules corresponding to symbols in the grammar for each syntax node, generated with the `mkStaticallyKnownRuleGrammarData` Template Haskell splice ([example](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/Grammar.hs)).
- `Language.[LANGUAGE]` - Semantic functionality for programs in a language ([example](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python.hs)).
- `Language.[LANGUAGE].Tags` - Computes tags for code nav definitions and references found in source ([example](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/Tags.hs)).
5. **Add tests for precise ASTs, tagging and graphing, and evaluating code written in that language.** Because tree-sitter grammars often change, we require extensive testing so as to avoid the unhappy situation of bitrotted languages that break as soon as a new grammar comes down the line. Here are examples of tests for [precise ASTs](https://github.com/github/semantic/blob/master/semantic-python/test/PreciseTest.hs), [tagging](https://github.com/github/semantic/blob/master/test/Tags/Spec.hs), and [graphing](https://github.com/github/semantic/blob/master/semantic-python/test-graphing/GraphTest.hs).
To summarize, each interaction made possible by the Semantic CLI corresponds to one (or more) of the above steps:
@ -21,11 +23,11 @@ To summarize, each interaction made possible by the Semantic CLI corresponds to
|------|-----------------|
| 1, 2 | `ts-parse` |
| 3, 4 | `parse`, `diff` |
| 5, 6 | `graph` |
| 5 | `graph` |
# FAQs
**This sounds hard.** You're right! It is currently a lot of work: just because the Semantic architecture is extensible in the expression-problem manner does not mean that adding new support is trivial.
**Will this get easier in the future?** Unequivocally, yes. The Semantic authors are currently working on a new architecture for language support and parsing, one that dispenses with the assignment step altogether: in the future, `haskell-tree-sitter` will generate Haskell data types from tree-sitter grammars; instead of assigning these types into an open-union of syntax functors, you'll describe how these types are translated into the [Semantic core language](https://github.com/github/semantic/blob/master/semantic-core/src/Data/Core.hs). This will decouple syntax nodes from the process of interpretation and evaluation; all evaluators will be written in terms of the Core language. We hope that this will make the process of adding new languages significantly easier than it currently is, given that it entirely obviates the third and fourth steps lifted above.
**What recent changes have been made?** The Semantic authors have introduced a new architecture for language support and parsing, one that dispenses with the [assignment](https://github.com/github/semantic/blob/master/docs/assignment.md) step altogether. The `semantic-ast` package generates Haskell data types from tree-sitter grammars; these types are then translated into the [Semantic core language](https://github.com/github/semantic/blob/master/semantic-core/src/Data/Core.hs); all evaluators will then be written in terms of the Core language. As compared with the [historic process]() used to add new languages, these changes entire obviate the process of 1) assigning types into an open-union of syntax functors, and 2) implementing `Evaluatable` instances and adding value effects to describe the control flow of your language.

View File

@ -1,3 +1,5 @@
_Note that this document describes a process that is now deprecated. For more information, see documentation on [adding a new language](https://github.com/github/semantic/blob/master/docs/adding-new-languages.md)._
### What is Assignment?
"Assignment" refers to the part of our system that parses parse trees. The step preceding assignment uses [`tree-sitter`](https://github.com/tree-sitter/tree-sitter) to parse source code and output rose trees labeled with symbols in the [language's grammar](https://github.com/github/semantic/blob/master/docs/grammar-development-guide.md) and source locations (represented as byte range and span). Assignment is a second layer of parsing required to get these ASTs in a shape appropriate for our Haskell project and to support the types of analyses we'd like to perform further along. Assignment represents a partial map from AST nodes onto another structure, typically terms.

View File

@ -1,45 +1,36 @@
@ -1,216 +0,0 @@
# CodeGen Documentation
CodeGen is the process for auto-generating language-specific, strongly-typed ASTs to be used in [Semantic](https://github.com/github/semantic-code/blob/d9f91a05dc30a61b9ff8c536d75661d417f3c506/design-docs/precise-code-navigation.md).
CodeGen is the process for auto-generating language-specific, strongly-typed ASTs to be used in Semantic. Since it is a critical component of Semantic's language support process, we recommend reading [these docs](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/docs/adding-new-languages.md) first, as they provide an overview of the pipeline CodeGen supports.
### Prerequisites
To get started, first make sure your language has:
## Table of Contents
- [CodeGen Pipeline](#codegen-pipeline)
- [Generating ASTs](#generating-asts)
- [Inspecting auto-generated datatypes](#inspecting-auto-generated-datatypes)
- [Tests](#tests)
- [Additional notes](#additional-notes)
1. An existing [tree-sitter](http://tree-sitter.github.io/tree-sitter/) parser;
2. An existing Cabal package in [tree-sitter](http://tree-sitter.github.io/tree-sitter/) for said language. This will provide an interface into tree-sitter's C source. [Here](https://github.com/tree-sitter/haskell-tree-sitter/tree/master/tree-sitter-python) is an example of a library for Python, a supported language that the remaining documentation will refer to.
## CodeGen Pipeline
### CodeGen Pipeline
The following diagram outlines the entire language support pipeline.
During parser generation, tree-sitter produces a JSON file that captures the structure of a language's grammar. Based on this, we're able to derive datatypes representing surface languages, and then use those datatypes to generically build ASTs. This automates the engineering effort [historically required for adding a new language](https://github.com/github/semantic/blob/master/docs/adding-new-languages.md).
![image](https://user-images.githubusercontent.com/875834/80392707-801e9980-887d-11ea-9c95-e004bbe04be0.png)
The following steps provide a high-level outline of the process:
1. [**Deserialize.**](https://github.com/github/semantic/blob/master/semantic-ast/src/AST/Deserialize.hs) First, we deserialize the `node-types.json` file for a given language into the desired shape of datatypes via parsing capabilities afforded by the [Aeson](http://hackage.haskell.org/package/aeson) library. There are four distinct types represented in the node-types.json file takes on: sums, products, named leaves and anonymous leaves.
2. [**Generate Syntax.**](https://github.com/github/semantic/blob/master/semantic-ast/src/AST/GenerateSyntax.hs) We then use Template Haskell to auto-generate language-specific, strongly-typed datatypes that represent various language constructs. This API exports the top-level function `astDeclarationsForLanguage` to auto-generate datatypes at compile-time, which is is invoked by a given language [AST](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/AST.hs) module.
3. [**Unmarshal.**](https://github.com/github/semantic/blob/master/semantic-ast/src/AST/Unmarshal.hs) Unmarshaling is the process of iterating over tree-sitters parse trees using its tree cursor API, and producing Haskell ASTs for the relevant nodes. We parse source code from tree-sitter and unmarshal the data we get to build these ASTs generically. This file exports the top-level function `parseByteString`, which takes source code and a language as arguments, and produces an AST.
Here is an example that describes the relationship between a Python identifier represented in the tree-sitter generated JSON file, and a datatype generated by Template Haskell based on the provided JSON:
| Type | JSON | TH-generated code |
|----------|--------------|------------|
|Named leaf|<code>{<br>"type": "identifier",<br>"named": true<br>}|<code>data TreeSitter.Python.AST.Identifier a<br>= TreeSitter.Python.AST.Identifier {TreeSitter.Python.AST.ann :: a,<br>TreeSitter.Python.AST.bytes :: text-1.2.3.1:Data.Text.Internal.Text} -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Show a => Show (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Ord a => Ord (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Eq a => Eq (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Traversable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Functor TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Foldable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Unmarshal TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance SymbolMatching TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1|
1. **Ingest source code.** The input to our system is blob data on GitHub.
2. **Write and generate tree-sitter grammar.** During parser generation, tree-sitter produces a `node-types.json` file that captures the structure of a language's grammar. Based on this JSON file, we're able to derive datatypes representing surface languages, and then use those datatypes to generically build ASTs.
3. **Provide interface to the C source.** The FFI provides us a way to bridge tree-sitter to our Haskell library. For more information, see our docs on [adding a new language](https://github.com/github/semantic/blob/master/docs/adding-new-languages.md).
4. **Automated AST generation via CodeGen APIs.** The CodeGen APIs live in the [`semantic-ast`](https://github.com/github/semantic/tree/715971067634f677bff8619add6490e03bb1825e/semantic-ast) package within [Semantic](https://github.com/github/semantic/tree/715971067634f677bff8619add6490e03bb1825e), and are explained as follows:
- [**Deserialize.**](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/semantic-ast/src/AST/Deserialize.hs) First, we deserialize the `node-types.json` file for a given language into the desired shape of datatypes via parsing capabilities afforded by the [Aeson](http://hackage.haskell.org/package/aeson) library. There are four distinct types represented in the node-types.json file takes on: sums, products, named leaves and anonymous leaves.
- [**Generate Syntax.**](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/semantic-ast/src/AST/GenerateSyntax.hs) We then use Template Haskell to auto-generate language-specific, strongly-typed datatypes that represent various language constructs at compile-time. This API exports the top-level function `astDeclarationsForLanguage` to auto-generate datatypes at compile-time, which is is invoked by a given language [AST](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/semantic-python/src/Language/Python/AST.hs) module.
- [**Unmarshal.**](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/semantic-ast/src/AST/Unmarshal.hs) Unmarshaling is the runtime process of iterating over tree-sitters parse trees using its tree cursor API, and producing Haskell ASTs for the relevant nodes. We parse source code from tree-sitter and unmarshal the data we get to build these ASTs generically. This file exports the top-level function `parseByteString`, which takes source code and a language as arguments, and produces an AST.
5. **Generate strongly-typed trees for a given language.** Finally, we create `semantic-[LANGUAGE]` packages (such as [this one](https://github.com/github/semantic/tree/715971067634f677bff8619add6490e03bb1825e/semantic-python) for Python). From here, we can call our CodeGen APIs to generate language-specific, strongly-typed trees via the following process:
1. `Language.[LANGUAGE].AST` calls `astDeclarationsForLanguage`, passing in the relevant language as the argument, and using the `getNodeTypesPath` function to access the tree-sitter generated `node-types.json` file.
2. This triggers the generation of the exhaustive syntax types contained by that language.
3. `Language.[LANGUAGE]` provides the semantic functionality for Python programs, and calls the unmarshal API.
4. Finally, the unmarshaling process takes the source code input, and auto-generates a tree using the syntax nodes generated in step 2.
The remaining document provides more details on generating ASTs, inspecting datatypes, tests, and information on decisions pertaining to relevant APIs.
___
### Table of Contents
- [CodeGen Documentation](#codegen-documentation)
- [Prerequisites](#prerequisites)
- [CodeGen Pipeline](#codegen-pipeline)
- [Table of Contents](#table-of-contents)
- [Generating ASTs](#generating-asts)
- [Inspecting auto-generated datatypes](#inspecting-auto-generated-datatypes)
- [Tests](#tests)
- [Additional notes](#additional-notes)
___
### Generating ASTs
## Generating ASTs
To parse source code and produce ASTs locally:
@ -72,7 +63,9 @@ This generates the following AST:
Right (Module {ann = (Span {start = Pos {line = 0, column = 0}, end = Pos {line = 0, column = 1}},Range {start = 0, end = 1}), extraChildren = [R1 (SimpleStatement {getSimpleStatement = L1 (R1 (R1 (L1 (ExpressionStatement {ann = (Span {start = Pos {line = 0, column = 0}, end = Pos {line = 0, column = 1}},Range {start = 0, end = 1}), extraChildren = L1 (L1 (Expression {getExpression = L1 (L1 (L1 (PrimaryExpression {getPrimaryExpression = R1 (L1 (L1 (L1 (Integer {ann = (Span {start = Pos {line = 0, column = 0}, end = Pos {line = 0, column = 1}},Range {start = 0, end = 1}), text = "1"}))))})))})) :| []}))))})]})
```
### Inspecting auto-generated datatypes
`Unmarshal` defines both generic and non-generic classes. This is because generic behaviors are different than what we get non-generically, and in the case of ` Maybe`, `[]`, and `NonEmpty`, we prefer non-generic behavior. Since `[]` is a sum, the generic behavior for `:+:` would be invoked. The generic `:+:` expects repetitions represented in the parse tree as right-nested singly-linked lists (ex., `(a (b (c (d…))))`), rather than as consecutive sibling nodes (ex., `(a b c ...d)`, which is what our trees have. We want to match the latter.
## Inspecting auto-generated datatypes
Datatypes are derived from a language and its `node-types.json` file using the `GenerateSyntax` API. These datatypes can be viewed in the REPL just as they would for any other datatype, using `:i` after loading the language-specific `AST.hs` module for a given language.
@ -104,7 +97,17 @@ instance Foldable Module
-- Defined at /Users/aymannadeem/github/semantic/semantic-python/src/Language/Python/AST.hs:23:1
```
### Tests
Here is an example that describes the relationship between a Python identifier represented in the tree-sitter generated JSON file, and a datatype generated by Template Haskell based on the provided JSON:
| Type | JSON | TH-generated code |
|----------|--------------|------------|
|Named leaf|<pre>{<br>"type": "identifier",<br>"named": true<br>}|<code>data TreeSitter.Python.AST.Identifier a<br>= TreeSitter.Python.AST.Identifier {TreeSitter.Python.AST.ann :: a,<br>TreeSitter.Python.AST.bytes :: text-1.2.3.1:Data.Text.Internal.Text} -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Show a => Show (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Ord a => Ord (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Eq a => Eq (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Traversable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Functor TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Foldable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Unmarshal TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance SymbolMatching TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1|
Annotations are captured by a polymorphic parameter `a` instead of range/span values.
[Examples](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/semantic-ast/src/AST/Grammar/Examples.hs) contains a set of pre-defined, hand-written datatypes for which Template Haskell is not used. Any datatypes among the node types defined here will be skipped when the splice is run, allowing customization of the representation of parts of the tree. While this gives us flexibility, we encourage that this is used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into `Integer`s), and may require defining `TS.UnmarshalAnn` or `TS.SymbolMatching` instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual `Foldable`, `Functor`, etc. instances provided for generated datatypes.
## Tests
As of right now, Hedgehog tests are minimal and only in place for the Python library.
@ -112,8 +115,16 @@ To run tests:
`cabal v2-test semantic-python`
### Additional notes
## Background and Motivation for CodeGen
- [GenerateSyntax](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/GenerateSyntax.hs) provides a way to pre-define certain datatypes for which Template Haskell is not used. Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. While this gives us flexibility, we encourage that this is used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into `Integer`s), and may require defining `TS.UnmarshalAnn` or `TS.SymbolMatching` instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual `Foldable`, `Functor`, etc. instances provided for generated datatypes.
- Annotations are captured by a polymorphic parameter `a`
- [Unmarshal](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/Unmarshal.hs) defines both generic and non-generic classes. This is because generic behaviors are different than what we get non-generically, and in the case of ` Maybe`, `[]`—we actually preference doing things non-generically. Since `[]` is a sum, the generic behavior for `:+:` would be invoked and expect that wed have repetitions represented in the parse tree as right-nested singly-linked lists (ex., `(a (b (c (d…))))`) rather than as just consecutive sibling nodes (ex., `(a b c ...d)`, which is what our trees have). We want to match the latter.
CodeGen automates the engineering effort historically required for adding a new language, which included writing a second [assignment](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/docs/assignment.md) grammar, along with manually defining [data types à la carte](http://www.cs.ru.nl/~W.Swierstra/Publications/DataTypesALaCarte.pdf).
CodeGen addresses the following challenges posed by the old system:
**1. No named child nodes.** Tree-sitters syntax nodes didnt provide us with named child nodes, just ordered-lists. In other words, the children were structured as an ordered list, without any name indicating the role of each child. This didnt match Semantics internal representation of syntax nodes, where each type of node has a specific set of named children. This created concerns which meant more Assignment work was necessary to compensate for this discrepancy. For instance, one concern being the way we represent comments, which could be any arbitrary node attached to any part of the AST. But if we had named child nodes, this would allow us to associate comments relative to their parent nodes (for example, if a comment appeared in an if statement, it could be the first child for that if-statement node). However in the old system, comments as well as heredocs could appear anywhere are a source of errors.
**2. Time and effort.** Our system involves a two-step parsing process, which requires writing two separate language-specific grammars by hand. This is super time-consuming, very developer-intensive, error-prone, and extremely tedious. [Assignment](https://github.com/github/semantic/blob/715971067634f677bff8619add6490e03bb1825e/docs/assignment.md) requires writing a grammar using parser combinators in Haskell that are really close to the tree-sitter grammar specification. The mechanical nature of this work has, for a long time, begged the question of whether we could automate parts of it. Although weve open-sourced Semantic, its still tough to leverage community support for adding languages with such a grueling process behind it and a brittle system.
**3. Brittle.** Each language's Assignment code was tightly coupled to the language's Tree-sitter grammar, and it could break at runtime if we changed the structure of the grammar, without any compile-time error. This meant tracking ongoing changes in tree-sitter. This was also tedious, manual, and error prone. Bumping grammars meant making changes to assignment to accommodate new tree-structures, like nodes that have changed names or positions, etc.
**4. Evaluation and a la carte sum types.** This also gave us an opportunity to re-think our [à la carte datatypes](http://www.cs.ru.nl/~W.Swierstra/Publications/DataTypesALaCarte.pdf), as well as the evaluation machinery. À la carte syntax types were motivated by a desire to migrate away from a previous representation of syntax in favor of creating a way to better share effort and tooling involved in diffing, and especially in evaluating common fragments of languages: for example, most languages share if statements, functions, while loops, etc. However, the introduction of these syntax types (and the design of the `Evaluatable` typeclass) made it hard for us to make our analysis sensitive to minor linguistic differences, or even to relate different pieces of syntax together. This is because our à la carte syntax is essentially untyped, in that it enforces only a minimal structure on the tree; but any given subterm can be any element of the syntax, and not some limited subset. This means that a number of `Evaluatable` instances have to deal with error conditions that in practice cant occur. For example, `function`, `method`, and `class` declarations have a term for their name field, and thus have to deal with the possibility that the term doesnt have a `declaredName` by throwing an error if this arises. 

View File

@ -58,40 +58,6 @@ message ParseError {
string error = 1;
}
message DiffTreeRequest {
repeated BlobPair blobs = 1;
}
message DiffTreeTOCResponse {
repeated TOCSummaryFile files = 1;
}
message TOCSummaryFile {
string path = 1;
string language = 2;
repeated TOCSummaryChange changes = 3;
repeated TOCSummaryError errors = 4;
}
message TOCSummaryChange {
string category = 1;
string term = 2;
Span span = 3;
ChangeType change_type = 4;
}
message TOCSummaryError {
string error = 1;
Span span = 2;
}
enum ChangeType {
NONE = 0;
ADDED = 1;
REMOVED = 2;
MODIFIED = 3;
}
message DiffTreeGraphResponse {
repeated DiffTreeFileGraph files = 1;
}
@ -148,11 +114,6 @@ message Blob {
string language = 3;
}
message BlobPair {
Blob before = 1;
Blob after = 2;
}
message File {
string path = 1;
string language = 2;

View File

@ -1,3 +1,3 @@
#!/bin/bash
#!/usr/bin/env bash
cabal v2-update

106
script/build-and-upload Executable file
View File

@ -0,0 +1,106 @@
#!/usr/bin/env bash
# Usage: script/build-and-upload PROJECT_NAME
# where PROJECT_NAME is one of the packages present in this repo:
# semantic-source, etc.
set -e
PROJECT="$1"
ROOT_DIR="$(dirname "$0")/.."
CABAL_PATH="$ROOT_DIR/$PROJECT/$PROJECT.cabal"
if [ -z "$PROJECT" ]
then echo "USAGE: build_and_upload PROJECT_NAME"; exit 1
fi
if [ ! -f "$CABAL_PATH" ]
then echo "Couldn't find .cabal file at $CABAL_PATH; is $PROJECT a valid package?"; exit 1
fi
set -x
cabal v2-build "$PROJECT"
TGZ_LOC="$(cabal v2-sdist "$PROJECT" | tail -n 1)"
DOCS_LOC="$(cabal v2-haddock --haddock-for-hackage "$PROJECT" | tail -n 1)"
PACKAGE_VERSION="$(basename "$TGZ_LOC" .tar.gz)"
if [ ! -f "$TGZ_LOC" ]
then echo "Bug in build_and_upload: $PACKAGE_FN doesn't point to a valid path"; exit 1
fi
set +x
echo "You are planning to upload '$PACKAGE_VERSION'."
read -rp "Is this correct? [y/n] " choice
if [ "$choice" != "y" ]
then echo "Aborting."; exit 1
fi
echo "Attempting to build $PACKAGE_VERSION from source"
TEMP_PATH=$(mktemp -d)
tar -xvf "$TGZ_LOC" -C "$TEMP_PATH"
set -x
(
cd "$TEMP_PATH/$PACKAGE_VERSION"
pwd
cabal v2-update
cabal v2-build --disable-optimization
)
set +x
if wget -q --spider "https://hackage.haskell.org/package/$PACKAGE_VERSION"
then
echo "The package $PACKAGE_VERSION already exists on Hackage."
echo "If you need to upload code changes, then bump the version number in $PROJECT/$PROJECT.cabal, make a PR, and run this script again."
echo "Otherwise, if you need _only_ to loosen existing constraints in $PROJECT.cabal file, then you can create a new revision of this package on Hackage."
echo "You'll need to make your changes by hand. Be sure to click the 'Review changes' button to check your work."
read -rp "Do you want to open a browser so as to do this? [y/N]" choice
if [ "$choice" == "y" ]
then
echo "Opening…"
sleep 1
open "https://hackage.haskell.org/package/$PACKAGE_VERSION/$PROJECT.cabal/edit"
exit 0
else
echo "Aborting"
exit 1
fi
fi
echo "******************"
echo "Uploading packages"
echo "******************"
echo -n "Hackage username: "
read HACKAGE_USER
echo
echo -n "Hackage password: "
read -s HACKAGE_PASS
cabal upload --username="$HACKAGE_USER" --password="$HACKAGE_PASS" "$TGZ_LOC"
cabal upload --username="$HACKAGE_USER" --password="$HACKAGE_PASS" --documentation "$DOCS_LOC"
URL="https://hackage.haskell.org/package/$PACKAGE_VERSION/candidate"
echo "Opening candidate URL in browser…"
sleep 1
open "$URL"
echo "About to upload final version. Do you want to proceed?"
echo "Full-fledged package uploads cannot be undone!"
read -rp "Type 'yes' to continue. " choice
if [ "$choice" != "yes" ]
then echo "Aborting."; exit 1
fi
set -x
cabal upload --username="$HACKAGE_USER" --password="$HACKAGE_PASS" --publish "$TGZ_LOC"
cabal upload --username="$HACKAGE_USER" --password="$HACKAGE_PASS" --publish --documentation "$DOCS_LOC"
echo "Tagging $PACKAGE_VERSION"
git tag "$PACKAGE_VERSION"
git push --tags

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
#/ Usage: script/clone-example-repos
#/
#/ Clone some example repositories for smoke testing parsing, assignment, and precise ASTs.

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
store_dir="$HOME/.cabal/store/ghc-$(ghc --numeric-version)"

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
#/ Usage: script/generate-example fileA fileB
#/ script/generate-example directory
#/

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Computes the flags for ghcide to pass to ghci. You probably wont be running this yourself, but rather ghcide will via configuration in hie.yaml.
set -e
@ -50,6 +50,8 @@ function flags {
# TODO: would be nice to figure this out from cabal.project & the .cabal files
echo "-isemantic-analysis/src"
echo "-isemantic-ast/src"
echo "-isemantic-codeql/src"
echo "-isemantic-codeql/test"
echo "-isemantic-core/src"
echo "-isemantic-go/src"
echo "-isemantic-java/src"
@ -59,6 +61,7 @@ function flags {
echo "-isemantic-python/src"
echo "-isemantic-python/test"
echo "-isemantic-ruby/src"
echo "-isemantic-rust/src"
echo "-isemantic-scope-graph/src"
echo "-isemantic-tsx/src"
echo "-isemantic-typescript/src"

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Computes the paths to files causing changes to the ghci flags. You probably wont be running this yourself, but rather ghcide will via configuration in hie.yaml.
set -e

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Usage: script/profile FILE_A FILE_B
# Builds and runs semantic on the given files with profiling enabled.

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
#/ Usage: script/publish
#/
#/ Build a docker image of the semantic CLI and publish to the GitHub Package Registry

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Usage: script/repl [ARGS...]
# Run a repl session capable of loading all of the packages and their individual components. Any passed arguments, e.g. module names or flags, will be passed to ghci.

View File

@ -65,16 +65,16 @@ library
, containers ^>= 0.6
, filepath
, fused-effects ^>= 1.0
, fused-effects-readline
, fused-effects-readline ^>= 0
, fused-syntax
, hashable
, haskeline ^>= 0.7.5
, hashable
, lingo ^>= 0.3
, lingo ^>= 0.3.2.0
, pathtype ^>= 0.8.1
, prettyprinter >= 1.2 && < 2
, prettyprinter-ansi-terminal ^>= 1.1.1
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semilattices
, terminal-size ^>= 0.3
, text ^>= 1.2.3.1

View File

@ -4,6 +4,7 @@ module Analysis.Blob
, fromSource
, blobLanguage
, blobPath
, blobFilePath
, nullBlob
) where
@ -38,8 +39,12 @@ fromSource filepath language source
blobLanguage :: Blob -> Language
blobLanguage = Analysis.File.fileBody . blobFile
blobPath :: Blob -> FilePath
blobPath = Path.toString . Analysis.File.filePath . blobFile
blobPath :: Blob -> Path.AbsRelFile
blobPath = Analysis.File.filePath . blobFile
-- | Show FilePath for error or json outputs.
blobFilePath :: Blob -> String
blobFilePath = Path.toString . blobPath
nullBlob :: Blob -> Bool
nullBlob = Source.null . blobSource

View File

@ -7,7 +7,6 @@ module Analysis.File
) where
import Data.Maybe (fromJust, listToMaybe)
import Data.Semilattice.Lower
import GHC.Stack
import Source.Language as Language
import Source.Span
@ -30,4 +29,4 @@ fileLanguage :: File a -> Language
fileLanguage = Language.forPath . filePath
fromPath :: Path.PartClass.AbsRel ar => Path.File ar -> File Language
fromPath p = File (Path.toAbsRel p) lowerBound (Language.forPath p)
fromPath p = File (Path.toAbsRel p) (point (Pos 0 0)) (Language.forPath p)

View File

@ -12,19 +12,19 @@ import Analysis.File
import Data.Text (Text)
import qualified Data.Text as T
import Source.Language
import System.FilePath.Posix
import qualified System.Path as Path
-- | A 'Project' contains all the information that semantic needs
-- to execute an analysis, diffing, or graphing pass.
data Project = Project
{ projectRootDir :: FilePath
{ projectRootDir :: Path.AbsRelDir
, projectBlobs :: [Blob]
, projectLanguage :: Language
, projectExcludeDirs :: [FilePath]
, projectExcludeDirs :: [Path.AbsRelDir]
} deriving (Eq, Show)
projectName :: Project -> Text
projectName = T.pack . dropExtensions . takeFileName . projectRootDir
projectName = T.pack . maybe "" Path.toString . Path.takeDirName . projectRootDir
projectExtensions :: Project -> [String]
projectExtensions = extensionsForLanguage . projectLanguage

View File

@ -55,13 +55,13 @@ library
build-depends: base ^>= 4.13
, aeson ^>= 1.4.2.0
, aeson-pretty ^>= 0.8.8
, bytestring ^>= 0.10.9.2
, bytestring ^>= 0.10.9
, containers >= 0.6.0.1
, directory ^>= 1.3.3.2
, filepath ^>= 1.4.1
, fused-effects ^>= 1.0
, tree-sitter ^>= 0.9.0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, template-haskell ^>= 2.15
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16

View File

@ -10,6 +10,7 @@ module AST.Test
import Control.Applicative
import Control.Monad
import Data.Attoparsec.ByteString.Char8
import Data.Attoparsec.ByteString.Char8 as Attoparsec
import Data.ByteString (ByteString, readFile)
import Data.ByteString.Char8 (pack, unpack)
import Data.Either
@ -76,7 +77,7 @@ exampleParser = do
code <- manyTill anyChar outputSepParser
_out <- manyTill anyChar (choice [endOfInput, char '=' $> ()])
pure (CorpusExample name (pack code))
where outputSepParser = choice [string "\n---\n", string "\r\n---\r\n"]
where outputSepParser = (Attoparsec.take 3) *> (Attoparsec.char '-') *> endOfLine
exampleNameParser :: Parser String
exampleNameParser = do

21
semantic-codeql/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,3 @@
# Semantic support for CodeQL
This package implements `semantic` support for CodeQL using the `semantic-core` intermediate language.

2
semantic-codeql/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,74 @@
cabal-version: 2.4
name: semantic-codeql
version: 0.0.0.0
synopsis: Semantic support for CodeQL.
description: Semantic support for CodeQL using the semantic-core intermediate language.
homepage: https://github.com/github/semantic/tree/master/semantic-codeql#readme
bug-reports: https://github.com/github/semantic/issues
license: MIT
license-file: LICENSE
author: The Semantic authors
maintainer: opensource+semantic@github.com
copyright: (c) 2019 GitHub, Inc.
category: Language
build-type: Simple
stability: alpha
extra-source-files: README.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
build-depends: base ^>= 4.13
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.9
, tree-sitter-ql ^>= 0.1.0.2
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
library
import: haskell
exposed-modules:
Language.CodeQL
Language.CodeQL.AST
Language.CodeQL.Grammar
Language.CodeQL.Tags
hs-source-dirs: src
test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: PreciseTest.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, hedgehog >= 0.6 && <2
, pathtype ^>= 0.8.1
, semantic-ast
, semantic-codeql
, tasty
, tasty-hedgehog
, tasty-hunit
, text

View File

@ -0,0 +1,24 @@
-- | Semantic functionality for CodeQL programs.
module Language.CodeQL
( Term(..)
, TreeSitter.QL.tree_sitter_ql
) where
import qualified AST.Unmarshal as TS
import Data.Proxy
import qualified Language.CodeQL.AST as CodeQL
import qualified Language.CodeQL.Tags as CodeQLTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.QL (tree_sitter_ql)
newtype Term a = Term { getTerm :: CodeQL.Ql a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy CodeQL.Ql)
showFailure _ = TS.showFailure (Proxy :: Proxy CodeQL.Ql)
instance TS.Unmarshal Term where
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . CodeQLTags.tags . getTerm

View File

@ -0,0 +1,24 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.CodeQL.AST
( module Language.CodeQL.AST
, CodeQL.getTestCorpusDir
) where
import AST.GenerateSyntax
import AST.Token()
import Language.Haskell.TH.Syntax (runIO)
import Prelude hiding (Bool, Eq, Float, Integer, String)
import qualified TreeSitter.QL as CodeQL (getNodeTypesPath, getTestCorpusDir, tree_sitter_ql)
runIO CodeQL.getNodeTypesPath >>= astDeclarationsForLanguage CodeQL.tree_sitter_ql

View File

@ -0,0 +1,12 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.CodeQL.Grammar
( tree_sitter_ql
, Grammar(..)
) where
import AST.Grammar.TH
import Language.Haskell.TH
import TreeSitter.QL (tree_sitter_ql)
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_ql

View File

@ -0,0 +1,253 @@
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DisambiguateRecordFields #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.CodeQL.Tags
( ToTags (..),
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable (for_)
import Data.Text (Text)
import qualified Language.CodeQL.AST as CodeQL
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance ToTags (Token sym n) where tags _ = pure ()
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
tags (L1 l) = tags l
tags (R1 r) = tags r
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name kind loc srcLineRange = do
src <- ask @Source
Tags.yield (Tag name kind loc (Tags.firstLine src srcLineRange) Nothing)
instance ToTags CodeQL.Module where
tags
t@CodeQL.Module
{ ann = Loc {byteRange},
name = CodeQL.ModuleName {extraChildren = CodeQL.SimpleId {text, ann}}
} = yieldTag text Module ann byteRange >> gtags t
instance ToTags CodeQL.ClasslessPredicate where
tags
t@CodeQL.ClasslessPredicate
{ ann = Loc {byteRange},
name = CodeQL.PredicateName {text, ann}
} = yieldTag text Function ann byteRange >> gtags t
instance ToTags CodeQL.AritylessPredicateExpr where
tags
t@CodeQL.AritylessPredicateExpr
{ ann = Loc {byteRange},
name = CodeQL.LiteralId {text, ann}
} = yieldTag text Call ann byteRange >> gtags t
instance ToTags CodeQL.Dataclass where
tags
t@CodeQL.Dataclass
{ ann = Loc {byteRange},
name = CodeQL.ClassName {text, ann}
} = yieldTag text Class ann byteRange >> gtags t
instance ToTags CodeQL.MemberPredicate where
tags
t@CodeQL.MemberPredicate
{ ann = Loc {byteRange},
name = CodeQL.PredicateName {text, ann}
} = yieldTag text Method ann byteRange >> gtags t
instance ToTags CodeQL.Datatype where
tags
t@CodeQL.Datatype
{ ann = Loc {byteRange},
name = CodeQL.ClassName {text, ann}
} = yieldTag text Class ann byteRange >> gtags t
instance ToTags CodeQL.DatatypeBranch where
tags
t@CodeQL.DatatypeBranch
{ ann = Loc {byteRange},
name = CodeQL.ClassName {text, ann}
} = yieldTag text Class ann byteRange >> gtags t
instance ToTags CodeQL.ClasslessPredicateCall where
tags
CodeQL.ClasslessPredicateCall
{ extraChildren
} = for_ extraChildren $ \x -> case x of
Prj t@CodeQL.AritylessPredicateExpr {} -> tags t
_ -> pure ()
instance ToTags CodeQL.QualifiedRhs where
tags
t@CodeQL.QualifiedRhs
{ ann = Loc {byteRange},
name = expr
} = case expr of
Just (Prj CodeQL.PredicateName {text, ann}) -> yieldTag text Call ann byteRange >> gtags t
_ -> gtags t
instance ToTags CodeQL.TypeExpr where
tags
t@CodeQL.TypeExpr
{ ann = Loc {byteRange},
name = expr
} = case expr of
Just (Prj CodeQL.ClassName {text, ann}) -> yieldTag text Type ann byteRange >> gtags t
_ -> gtags t
instance ToTags CodeQL.AddExpr
instance ToTags CodeQL.Addop
instance ToTags CodeQL.AggId
instance ToTags CodeQL.Aggregate
instance ToTags CodeQL.AnnotArg
instance ToTags CodeQL.Annotation
instance ToTags CodeQL.AnnotName
instance ToTags CodeQL.Any
instance ToTags CodeQL.As
instance ToTags CodeQL.Asc
instance ToTags CodeQL.AsExpr
instance ToTags CodeQL.AsExprs
instance ToTags CodeQL.Avg
instance ToTags CodeQL.Body
instance ToTags CodeQL.Bool
instance ToTags CodeQL.Boolean
instance ToTags CodeQL.Charpred
instance ToTags CodeQL.ClassMember
instance ToTags CodeQL.ClassName
instance ToTags CodeQL.Closure
instance ToTags CodeQL.Compop
instance ToTags CodeQL.CompTerm
instance ToTags CodeQL.Concat
instance ToTags CodeQL.Conjunction
instance ToTags CodeQL.Count
instance ToTags CodeQL.Class
instance ToTags CodeQL.DatatypeBranches
instance ToTags CodeQL.Date
instance ToTags CodeQL.Dbtype
instance ToTags CodeQL.Desc
instance ToTags CodeQL.Direction
instance ToTags CodeQL.Disjunction
instance ToTags CodeQL.Empty
instance ToTags CodeQL.Eq
instance ToTags CodeQL.Exists
instance ToTags CodeQL.ExprAggregateBody
instance ToTags CodeQL.Extends
instance ToTags CodeQL.False
instance ToTags CodeQL.Field
instance ToTags CodeQL.Float
instance ToTags CodeQL.Forall
instance ToTags CodeQL.Forex
instance ToTags CodeQL.FullAggregateBody
instance ToTags CodeQL.Ge
instance ToTags CodeQL.Gt
instance ToTags CodeQL.HigherOrderTerm
instance ToTags CodeQL.IfTerm
instance ToTags CodeQL.Implication
instance ToTags CodeQL.Import
instance ToTags CodeQL.ImportModuleExpr
instance ToTags CodeQL.Imprt
instance ToTags CodeQL.In
instance ToTags CodeQL.InExpr
instance ToTags CodeQL.InstanceOf
instance ToTags CodeQL.Instanceof
instance ToTags CodeQL.Integer
instance ToTags CodeQL.Le
instance ToTags CodeQL.Literal
instance ToTags CodeQL.LiteralId
instance ToTags CodeQL.Lt
instance ToTags CodeQL.Max
instance ToTags CodeQL.Mod
instance ToTags CodeQL.ModuleAliasBody
instance ToTags CodeQL.ModuleExpr
instance ToTags CodeQL.ModuleMember
instance ToTags CodeQL.ModuleName
instance ToTags CodeQL.Min
instance ToTags CodeQL.Minus
instance ToTags CodeQL.MulExpr
instance ToTags CodeQL.Mulop
instance ToTags CodeQL.Ne
instance ToTags CodeQL.Negation
instance ToTags CodeQL.Newtype
instance ToTags CodeQL.None
instance ToTags CodeQL.Not
instance ToTags CodeQL.OrderBy
instance ToTags CodeQL.OrderBys
instance ToTags CodeQL.ParExpr
instance ToTags CodeQL.Plus
instance ToTags CodeQL.Predicate
instance ToTags CodeQL.PredicateAliasBody
instance ToTags CodeQL.PredicateExpr
instance ToTags CodeQL.PredicateName
instance ToTags CodeQL.PrefixCast
instance ToTags CodeQL.Ql
instance ToTags CodeQL.Qldoc
instance ToTags CodeQL.QualifiedExpr
instance ToTags CodeQL.QualModuleExpr
instance ToTags CodeQL.Quantified
instance ToTags CodeQL.Quantifier
instance ToTags CodeQL.Range
instance ToTags CodeQL.Rank
instance ToTags CodeQL.Result
instance ToTags CodeQL.ReturnType
instance ToTags CodeQL.Select
instance ToTags CodeQL.SimpleId
instance ToTags CodeQL.Slash
instance ToTags CodeQL.SpecialCall
instance ToTags CodeQL.SpecialId
instance ToTags CodeQL.Star
instance ToTags CodeQL.Strictconcat
instance ToTags CodeQL.Strictcount
instance ToTags CodeQL.Strictsum
instance ToTags CodeQL.String
instance ToTags CodeQL.Sum
instance ToTags CodeQL.Super
instance ToTags CodeQL.SuperRef
instance ToTags CodeQL.This
instance ToTags CodeQL.True
instance ToTags CodeQL.TypeAliasBody
instance ToTags CodeQL.TypeLiteral
instance ToTags CodeQL.UnaryExpr
instance ToTags CodeQL.Underscore
instance ToTags CodeQL.Unop
instance ToTags CodeQL.VarDecl
instance ToTags CodeQL.Variable
instance ToTags CodeQL.VarName

View File

@ -0,0 +1,20 @@
{-# LANGUAGE DisambiguateRecordFields, OverloadedStrings, TypeApplications #-}
module Main (main) where
import qualified System.Path as Path
import Test.Tasty
import qualified Language.CodeQL.AST as CodeQL
import Language.CodeQL.Grammar
import AST.Test
import AST.Unmarshal
main :: IO ()
main
= Path.absDir <$> CodeQL.getTestCorpusDir
>>= readCorpusFiles'
>>= traverse (testCorpus parse)
>>= defaultMain . tests
where parse = parseByteString @CodeQL.Ql @() tree_sitter_ql
tests :: [TestTree] -> TestTree
tests = testGroup "tree-sitter-ql corpus tests"

View File

@ -55,7 +55,7 @@ library
, prettyprinter >= 1.2.1 && < 2
, prettyprinter-ansi-terminal ^>= 1.1.1
, semantic-analysis ^>= 0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, text ^>= 1.2.3.1
, trifecta >= 2 && < 2.2
, unordered-containers ^>= 0.2.10

View File

@ -26,7 +26,7 @@ common haskell
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
@ -61,7 +61,7 @@ test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
main-is: PreciseTest.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, hedgehog >= 0.6 && <2

View File

@ -1,66 +1,70 @@
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DisambiguateRecordFields #-}
{-# LANGUAGE DefaultSignatures #-}{-# LANGUAGE DisambiguateRecordFields #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.Go.Tags
( ToTags(..)
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Text as Text
module Language.Go.Tags
( ToTags (..),
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Text as Text
import qualified Language.Go.AST as Go
import Source.Loc
import Source.Source as Source
import Tags.Tag
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance ToTags Go.FunctionDeclaration where
tags t@Go.FunctionDeclaration
{ ann = loc@Loc { byteRange }
, name = Go.Identifier { text }
} = yieldTag text Function loc byteRange >> gtags t
tags
t@Go.FunctionDeclaration
{ ann = Loc {byteRange},
name = Go.Identifier {text, ann}
} = yieldTag text Function ann byteRange >> gtags t
instance ToTags Go.MethodDeclaration where
tags t@Go.MethodDeclaration
{ ann = loc@Loc { byteRange }
, name = Go.FieldIdentifier { text }
} = yieldTag text Method loc byteRange >> gtags t
tags
t@Go.MethodDeclaration
{ ann = Loc {byteRange},
name = Go.FieldIdentifier {text, ann}
} = yieldTag text Method ann byteRange >> gtags t
instance ToTags Go.CallExpression where
tags t@Go.CallExpression
{ ann = loc@Loc { byteRange }
, function = Go.Expression expr
} = match expr
tags
t@Go.CallExpression
{ ann = Loc {byteRange},
function = Go.Expression expr
} = match expr
where
match expr = case expr of
Prj Go.SelectorExpression { field = Go.FieldIdentifier { text }} -> yield text
Prj Go.Identifier { text } -> yield text
Prj Go.CallExpression { function = Go.Expression e } -> match e
Prj Go.ParenthesizedExpression { extraChildren = Go.Expression e } -> match e
_ -> gtags t
yield name = yieldTag name Call loc byteRange >> gtags t
Prj Go.SelectorExpression {field = Go.FieldIdentifier {text, ann}} -> yield text ann
Prj Go.Identifier {text, ann} -> yield text ann
Prj Go.CallExpression {function = Go.Expression e} -> match e
Prj Go.ParenthesizedExpression {extraChildren = Go.Expression e} -> match e
_ -> gtags t
yield name loc = yieldTag name Call loc byteRange >> gtags t
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
tags (L1 l) = tags l
@ -68,19 +72,19 @@ instance (ToTags l, ToTags r) => ToTags (l :+: r) where
instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name kind loc range = do
yieldTag name kind loc srcLineRange = do
src <- ask @Source
Tags.yield (Tag name kind loc (Tags.firstLine src range) Nothing)
Tags.yield (Tag name kind loc (Tags.firstLine src srcLineRange) Nothing)
instance ToTags Go.ArgumentList

View File

@ -26,7 +26,7 @@ common haskell
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
@ -61,7 +61,7 @@ test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
main-is: PreciseTest.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, pathtype ^>= 0.8.1

View File

@ -4,36 +4,39 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.Java.Tags
( ToTags(..)
) where
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import GHC.Generics ((:+:)(..))
module Language.Java.Tags
( ToTags (..),
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import qualified Language.Java.AST as Java
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
@ -43,47 +46,74 @@ instance (ToTags l, ToTags r) => ToTags (l :+: r) where
instance ToTags (Token sym n) where tags _ = pure ()
instance ToTags Java.MethodDeclaration where
tags t@Java.MethodDeclaration
{ ann = loc@Loc { byteRange = range }
, name = Java.Identifier { text = name }
, body
} = do
tags
t@Java.MethodDeclaration
{ ann = Loc {byteRange = range},
name = Java.Identifier {text, ann},
body
} = do
src <- ask @Source
let line = Tags.firstLine src range
{ end = case body of
Just Java.Block { ann = Loc Range { end } _ } -> end
Nothing -> end range
}
Tags.yield (Tag name Method loc line Nothing)
let line =
Tags.firstLine
src
range
{ end = case body of
Just Java.Block {ann = Loc Range {end} _} -> end
Nothing -> end range
}
Tags.yield (Tag text Method ann line Nothing)
gtags t
-- TODO: we can coalesce a lot of these instances given proper use of HasField
-- to do the equivalent of type-generic pattern-matching.
instance ToTags Java.ClassDeclaration where
tags t@Java.ClassDeclaration
{ ann = loc@Loc { byteRange = Range { start } }
, name = Java.Identifier { text = name }
, body = Java.ClassBody { ann = Loc Range { start = end } _ }
} = do
tags
t@Java.ClassDeclaration
{ ann = Loc {byteRange = Range {start}},
name = Java.Identifier {text, ann},
body = Java.ClassBody {ann = Loc Range {start = end} _}
} = do
src <- ask @Source
Tags.yield (Tag name Class loc (Tags.firstLine src (Range start end)) Nothing)
Tags.yield (Tag text Class ann (Tags.firstLine src (Range start end)) Nothing)
gtags t
instance ToTags Java.MethodInvocation where
tags t@Java.MethodInvocation
{ ann = loc@Loc { byteRange = range }
, name = Java.Identifier { text = name }
} = do
tags
t@Java.MethodInvocation
{ ann = Loc {byteRange = range},
name = Java.Identifier {text, ann}
} = do
src <- ask @Source
Tags.yield (Tag name Call loc (Tags.firstLine src range) Nothing)
Tags.yield (Tag text Call ann (Tags.firstLine src range) Nothing)
gtags t
instance ToTags Java.InterfaceDeclaration where
tags
t@Java.InterfaceDeclaration
{ ann = Loc {byteRange},
name = Java.Identifier {text, ann}
} = do
src <- ask @Source
Tags.yield (Tag text Interface ann (Tags.firstLine src byteRange) Nothing)
gtags t
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
instance ToTags Java.InterfaceTypeList where
tags t@Java.InterfaceTypeList {extraChildren = interfaces} = do
src <- ask @Source
for_ interfaces $ \x -> case x of
Java.Type (Prj (Java.UnannotatedType (Prj (Java.SimpleType (Prj Java.TypeIdentifier {ann = loc@Loc {byteRange = range}, text = name}))))) ->
Tags.yield (Tag name Implementation loc (Tags.firstLine src range) Nothing)
_ -> pure ()
gtags t
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
instance ToTags Java.AnnotatedType
@ -153,8 +183,8 @@ instance ToTags Java.InferredParameters
instance ToTags Java.InstanceofExpression
instance ToTags Java.IntegralType
instance ToTags Java.InterfaceBody
instance ToTags Java.InterfaceDeclaration
instance ToTags Java.InterfaceTypeList
--instance ToTags Java.InterfaceDeclaration
-- instance ToTags Java.InterfaceTypeList
instance ToTags Java.LabeledStatement
instance ToTags Java.LambdaExpression
instance ToTags Java.Literal

View File

@ -26,7 +26,7 @@ common haskell
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
@ -61,7 +61,7 @@ test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
main-is: PreciseTest.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, hedgehog >= 0.6 && <2

View File

@ -43,7 +43,7 @@ executable semantic-parse
build-depends: base
, semantic-ast
, tree-sitter ^>= 0.9.0.0
, semantic-source
, semantic-source ^>= 0.1.0
, tree-sitter-python ^>= 0.9.0.1
, bytestring
, optparse-applicative

21
semantic-php/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -26,7 +26,7 @@ common haskell
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3

View File

@ -5,34 +5,38 @@
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.PHP.Tags (tags) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Text (Text)
module Language.PHP.Tags
( tags,
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Text (Text)
import qualified Language.PHP.AST as PHP
import Source.Loc
import Source.Source as Source
import Tags.Tag
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance ToTags (Token sym n) where tags _ = pure ()
@ -41,55 +45,56 @@ instance (ToTags l, ToTags r) => ToTags (l :+: r) where
tags (L1 l) = tags l
tags (R1 r) = tags r
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name kind loc range = do
yieldTag name kind loc srcLineRange = do
src <- ask @Source
Tags.yield (Tag name kind loc (Tags.firstLine src range) Nothing)
Tags.yield (Tag name kind loc (Tags.firstLine src srcLineRange) Nothing)
instance ToTags PHP.FunctionDefinition where
tags t@PHP.FunctionDefinition
{ PHP.ann = loc@Loc { byteRange }
, PHP.name = PHP.Name { text }
} = yieldTag text Method loc byteRange >> gtags t
tags
t@PHP.FunctionDefinition
{ PHP.ann = Loc {byteRange},
PHP.name = PHP.Name {text, ann}
} = yieldTag text Method ann byteRange >> gtags t
instance ToTags PHP.MethodDeclaration where
tags t@PHP.MethodDeclaration
{ PHP.ann = loc@Loc { byteRange }
, PHP.name = PHP.Name { text }
} = yieldTag text Function loc byteRange >> gtags t
tags
t@PHP.MethodDeclaration
{ PHP.ann = Loc {byteRange},
PHP.name = PHP.Name {text, ann}
} = yieldTag text Function ann byteRange >> gtags t
instance ToTags PHP.FunctionCallExpression where
tags t@PHP.FunctionCallExpression
{ PHP.ann = loc@Loc { byteRange }
, PHP.function = func
} = match func
tags
t@PHP.FunctionCallExpression
{ PHP.ann = Loc {byteRange},
PHP.function = func
} = match func
where
yield name = yieldTag name Call loc byteRange >> gtags t
yield name loc = yieldTag name Call loc byteRange >> gtags t
match expr = case expr of
Prj (PHP.VariableName { extraChildren = PHP.Name { text } })
-> yield text *> gtags t
Prj (PHP.QualifiedName { extraChildren = [Prj (PHP.Name { text })] })
-> yield text *> gtags t
_
-> gtags t
Prj PHP.VariableName {extraChildren = PHP.Name {text, ann}} -> yield text ann *> gtags t
Prj PHP.QualifiedName {extraChildren = [Prj PHP.Name {text, ann}]} -> yield text ann *> gtags t
_ -> gtags t
instance ToTags PHP.MemberCallExpression where
tags t@PHP.MemberCallExpression
{ PHP.ann = loc@Loc { byteRange }
, PHP.name = item
} = case item of
Prj (PHP.Name { text }) -> yieldTag text Call loc byteRange >> gtags t
_ -> gtags t
tags
t@PHP.MemberCallExpression
{ PHP.ann = Loc {byteRange},
PHP.name = Prj PHP.Name {text, ann}
} = yieldTag text Call ann byteRange >> gtags t
tags t = gtags t
instance ToTags PHP.AnonymousFunctionCreationExpression

View File

@ -27,7 +27,7 @@ common haskell
, semantic-analysis ^>= 0
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, semantic-scope-graph ^>= 0.0
, semilattices ^>= 0
@ -70,10 +70,10 @@ test-suite compiling
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
main-is: CoreTest.hs
ghc-options: -threaded
build-depends: semantic-python == 0.0.0.0
build-depends: semantic-python
, aeson ^>= 1.4.4
, aeson-pretty ^>= 0.8.7
, bytestring ^>= 0.10.8.2

View File

@ -33,7 +33,6 @@ import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe
import Data.Monoid
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import Data.Traversable
import GHC.Records
import GHC.TypeLits
@ -42,7 +41,7 @@ import Language.Python.Patterns
import Scope.Graph.Convert (Result (..), complete, todo)
import Scope.Types
import Source.Loc (Loc)
import Source.Span (Span, span_)
import Source.Span (Span, Pos (..), span_, point)
-- This typeclass is internal-only, though it shares the same interface
-- as the one defined in semantic-scope-graph. The somewhat-unconventional
@ -197,7 +196,7 @@ instance ToScopeGraph Py.FunctionDefinition where
{ Props.kind = ScopeGraph.Parameter
, Props.relation = ScopeGraph.Default
, Props.associatedScope = Nothing
, Props.span = lowerBound
, Props.span = point (Pos 0 0)
}
let param (Py.Parameter (Prj (Py.Identifier pann pname))) = Just (pann, Name.name pname)
param _ = Nothing

View File

@ -5,40 +5,42 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.Python.Tags
( ToTags(..)
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (listToMaybe)
import Data.Text as Text
module Language.Python.Tags
( ToTags (..),
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (listToMaybe)
import Data.Text as Text
import qualified Language.Python.AST as Py
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
@ -47,98 +49,105 @@ instance (ToTags l, ToTags r) => ToTags (l :+: r) where
instance ToTags (Token sym n) where tags _ = pure ()
keywordFunctionCall
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc -> Loc -> Range -> Text -> m ()
keywordFunctionCall ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
Loc ->
Range ->
Text ->
m ()
keywordFunctionCall t loc range name = yieldTag name Function loc range Nothing >> gtags t
instance ToTags Py.String where
tags Py.String { extraChildren } = for_ extraChildren $ \ x -> case x of
Prj t@Py.Interpolation { } -> tags t
_ -> pure ()
tags Py.String {extraChildren} = for_ extraChildren $ \x -> case x of
Prj t@Py.Interpolation {} -> tags t
_ -> pure ()
instance ToTags Py.Interpolation where
tags Py.Interpolation { extraChildren } = for_ extraChildren $ \ x -> case x of
tags Py.Interpolation {extraChildren} = for_ extraChildren $ \x -> case x of
Prj (Py.Expression expr) -> tags expr
_ -> pure ()
_ -> pure ()
instance ToTags Py.AssertStatement where
tags t@Py.AssertStatement { ann = loc@Loc { byteRange } } = keywordFunctionCall t loc byteRange "assert"
tags t@Py.AssertStatement {ann = loc@Loc {byteRange}} = keywordFunctionCall t loc byteRange "assert"
instance ToTags Py.Await where
tags t@Py.Await { ann = loc@Loc { byteRange } } = keywordFunctionCall t loc byteRange "await"
tags t@Py.Await {ann = loc@Loc {byteRange}} = keywordFunctionCall t loc byteRange "await"
instance ToTags Py.DeleteStatement where
tags t@Py.DeleteStatement { ann = loc@Loc { byteRange } } = keywordFunctionCall t loc byteRange "del"
tags t@Py.DeleteStatement {ann = loc@Loc {byteRange}} = keywordFunctionCall t loc byteRange "del"
instance ToTags Py.ExecStatement where
tags t@Py.ExecStatement { ann = loc@Loc { byteRange } } = keywordFunctionCall t loc byteRange "exec"
tags t@Py.ExecStatement {ann = loc@Loc {byteRange}} = keywordFunctionCall t loc byteRange "exec"
instance ToTags Py.GlobalStatement where
tags t@Py.GlobalStatement { ann = loc@Loc { byteRange } } = keywordFunctionCall t loc byteRange "global"
tags t@Py.GlobalStatement {ann = loc@Loc {byteRange}} = keywordFunctionCall t loc byteRange "global"
instance ToTags Py.NonlocalStatement where
tags t@Py.NonlocalStatement { ann = loc@Loc { byteRange } } = keywordFunctionCall t loc byteRange "nonlocal"
tags t@Py.NonlocalStatement {ann = loc@Loc {byteRange}} = keywordFunctionCall t loc byteRange "nonlocal"
instance ToTags Py.PrintStatement where
tags t@Py.PrintStatement { ann = loc@Loc { byteRange } } = keywordFunctionCall t loc byteRange "print"
tags t@Py.PrintStatement {ann = loc@Loc {byteRange}} = keywordFunctionCall t loc byteRange "print"
instance ToTags Py.FunctionDefinition where
tags t@Py.FunctionDefinition
{ ann = loc@Loc { byteRange = Range { start } }
, name = Py.Identifier { text = name }
, body = Py.Block { ann = Loc Range { start = end } _, extraChildren }
} = do
tags
t@Py.FunctionDefinition
{ ann = Loc {byteRange = Range {start}},
name = Py.Identifier {text, ann},
body = Py.Block {ann = Loc Range {start = end} _, extraChildren}
} = do
src <- ask @Source
let docs = listToMaybe extraChildren >>= docComment src
yieldTag name Function loc (Range start end) docs >> gtags t
yieldTag text Function ann (Range start end) docs >> gtags t
instance ToTags Py.ClassDefinition where
tags t@Py.ClassDefinition
{ ann = loc@Loc { byteRange = Range { start } }
, name = Py.Identifier { text = name }
, body = Py.Block { ann = Loc Range { start = end } _, extraChildren }
} = do
tags
t@Py.ClassDefinition
{ ann = Loc {byteRange = Range {start}},
name = Py.Identifier {text, ann},
body = Py.Block {ann = Loc Range {start = end} _, extraChildren}
} = do
src <- ask @Source
let docs = listToMaybe extraChildren >>= docComment src
yieldTag name Class loc (Range start end) docs >> gtags t
yieldTag text Class ann (Range start end) docs >> gtags t
instance ToTags Py.Call where
tags t@Py.Call
{ ann = loc@Loc { byteRange = range }
, function = Py.PrimaryExpression expr
} = match expr
where
match expr = case expr of
(Prj Py.Attribute { attribute = Py.Identifier _ name }) -> yield name
(Prj (Py.Identifier _ name)) -> yield name
(Prj Py.Call { function = Py.PrimaryExpression expr' }) -> match expr' -- Nested call expression like this in Python represent creating an instance of a class and calling it: e.g. AClass()()
(Prj (Py.ParenthesizedExpression _ (Prj (Py.Expression (Prj (Py.PrimaryExpression expr')))))) -> match expr' -- Parenthesized expressions
_ -> gtags t
yield name = yieldTag name Call loc range Nothing >> gtags t
tags
t@Py.Call
{ ann = Loc {byteRange},
function = Py.PrimaryExpression expr
} = match expr
where
match expr = case expr of
Prj Py.Attribute {attribute = Py.Identifier {text, ann}} -> yield text ann
Prj Py.Identifier {text, ann} -> yield text ann
Prj Py.Call {function = Py.PrimaryExpression expr'} -> match expr' -- Nested call expression like this in Python represent creating an instance of a class and calling it: e.g. AClass()()
Prj (Py.ParenthesizedExpression _ (Prj (Py.Expression (Prj (Py.PrimaryExpression expr'))))) -> match expr' -- Parenthesized expressions
_ -> gtags t
yield name loc = yieldTag name Call loc byteRange Nothing >> gtags t
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> Maybe Text -> m ()
yieldTag name kind loc range docs = do
yieldTag name kind loc srcLineRange docs = do
src <- ask @Source
Tags.yield (Tag name kind loc (Tags.firstLine src range) docs)
Tags.yield (Tag name kind loc (Tags.firstLine src srcLineRange) docs)
docComment :: Source -> (Py.CompoundStatement :+: Py.SimpleStatement) Loc -> Maybe Text
docComment src (R1 (Py.SimpleStatement (Prj Py.ExpressionStatement { extraChildren = L1 (Prj (Py.Expression (Prj (Py.PrimaryExpression (Prj Py.String { ann }))))) :|_ }))) = Just (toText (slice src (byteRange ann)))
docComment src (R1 (Py.SimpleStatement (Prj Py.ExpressionStatement {extraChildren = L1 (Prj (Py.Expression (Prj (Py.PrimaryExpression (Prj Py.String {ann}))))) :| _}))) = Just (toText (slice src (byteRange ann)))
docComment _ _ = Nothing
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
instance ToTags Py.AliasedImport
instance ToTags Py.ArgumentList
-- instance ToTags Py.AssertStatement

View File

@ -58,7 +58,7 @@ The graph should be
runScopeGraph :: ToScopeGraph t => Path.AbsRelFile -> Source.Source -> t Loc -> (ScopeGraph.ScopeGraph Name, Result)
runScopeGraph p _src item = run . runSketch info $ scopeGraph item
where
info = ModuleInfo (Path.toString p) "Python" mempty
info = ModuleInfo p "Python" mempty
sampleGraphThing :: ScopeGraphEff sig m => m Result
sampleGraphThing = do
@ -66,24 +66,24 @@ sampleGraphThing = do
declare "goodbye" (Props.Declaration ScopeGraph.Assignment ScopeGraph.Default Nothing (Span (Pos 3 0) (Pos 3 12)))
pure Complete
graphFile :: FilePath -> IO (ScopeGraph.ScopeGraph Name, Result)
graphFile :: Path.AbsRelFile -> IO (ScopeGraph.ScopeGraph Name, Result)
graphFile fp = do
file <- ByteString.readFile fp
file <- ByteString.readFile $ Path.toString fp
tree <- TS.parseByteString @Py.Term @Loc TSP.tree_sitter_python file
pyModule <- either die pure tree
pure $ runScopeGraph (Path.absRel fp) (Source.fromUTF8 file) pyModule
pure $ runScopeGraph fp (Source.fromUTF8 file) pyModule
assertSimpleAssignment :: HUnit.Assertion
assertSimpleAssignment = do
let path = "semantic-python/test/fixtures/1-04-toplevel-assignment.py"
let path = Path.absRel "semantic-python/test/fixtures/1-04-toplevel-assignment.py"
(result, Complete) <- graphFile path
(expecto, Complete) <- runM $ runSketch (ModuleInfo path "Python" mempty) sampleGraphThing
HUnit.assertEqual "Should work for simple case" expecto result
assertSimpleReference :: HUnit.Assertion
assertSimpleReference = do
let path = "semantic-python/test/fixtures/5-01-simple-reference.py"
let path = Path.absRel "semantic-python/test/fixtures/5-01-simple-reference.py"
(result, Complete) <- graphFile path
(expecto, Complete) <- runM $ runSketch (ModuleInfo path "Python" mempty) expectedReference
@ -115,7 +115,7 @@ expectedImportHole = do
assertLexicalScope :: HUnit.Assertion
assertLexicalScope = do
let path = "semantic-python/test/fixtures/5-02-simple-function.py"
let path = Path.absRel "semantic-python/test/fixtures/5-02-simple-function.py"
let info = ModuleInfo path "Python" mempty
(graph, _) <- graphFile path
case run (runSketch info expectedLexicalScope) of
@ -132,7 +132,7 @@ expectedLexicalScope = do
assertFunctionArg :: HUnit.Assertion
assertFunctionArg = do
let path = "semantic-python/test/fixtures/5-03-function-argument.py"
let path = Path.absRel "semantic-python/test/fixtures/5-03-function-argument.py"
(graph, _) <- graphFile path
let info = ModuleInfo path "Python" mempty
case run (runSketch info expectedFunctionArg) of
@ -154,7 +154,7 @@ expectedFunctionArg = do
assertImportHole :: HUnit.Assertion
assertImportHole = do
let path = "semantic-python/test/fixtures/cheese/6-01-imports.py"
let path = Path.absRel "semantic-python/test/fixtures/cheese/6-01-imports.py"
(graph, _) <- graphFile path
let info = ModuleInfo path "Python" mempty
case run (runSketch info expectedImportHole) of
@ -163,7 +163,7 @@ assertImportHole = do
assertQualifiedImport :: HUnit.Assertion
assertQualifiedImport = do
let path = "semantic-python/test/fixtures/cheese/6-01-qualified-imports.py"
let path = Path.absRel "semantic-python/test/fixtures/cheese/6-01-qualified-imports.py"
(graph, _) <- graphFile path
let info = ModuleInfo path "Python" mempty
case run (runSketch info expectedQualifiedImport) of

View File

@ -26,7 +26,7 @@ common haskell
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
@ -61,7 +61,7 @@ test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
main-is: PreciseTest.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, hedgehog >= 0.6 && <2

View File

@ -5,45 +5,48 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# HLINT ignore "Reduce duplication" #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
module Language.Ruby.Tags
( ToTags(..)
) where
import AST.Element
import AST.Token
import AST.Traversable1
{-# HLINT ignore "Reduce duplication" #-}
module Language.Ruby.Tags
( ToTags (..),
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import qualified AST.Unmarshal as TS
import Control.Effect.Reader
import Control.Effect.State
import Control.Effect.Writer
import Control.Monad
import Data.Foldable
import Data.Text as Text
import Control.Effect.Reader
import Control.Effect.State
import Control.Effect.Writer
import Control.Monad
import Data.Foldable
import Data.Text as Text
import qualified Language.Ruby.AST as Rb
import Source.Loc
import Source.Range as Range
import Source.Source as Source
import Tags.Tag
import Source.Loc
import Source.Range as Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Has (State [Text]) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Has (State [Text]) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance ToTags (Token sym n) where tags _ = pure ()
@ -58,94 +61,101 @@ instance (ToTags l, ToTags r) => ToTags (l :+: r) where
-- current tags output.
nameBlacklist :: [Text]
nameBlacklist =
[ "alias"
, "load"
, "require_relative"
, "require"
, "super"
, "undef"
, "__FILE__"
, "__LINE__"
, "lambda"
[ "alias",
"load",
"require_relative",
"require",
"super",
"undef",
"__FILE__",
"__LINE__",
"lambda"
]
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name Call _ _ | name `elem` nameBlacklist = pure ()
yieldTag name kind loc range = do
yieldTag name kind loc srcLineRange = do
src <- ask @Source
Tags.yield (Tag name kind loc (Tags.firstLine src range) Nothing)
Tags.yield (Tag name kind loc (Tags.firstLine src srcLineRange) Nothing)
instance ToTags Rb.Class where
tags t@Rb.Class
{ ann = loc@Loc { byteRange = Range { start } }
, name = expr
, extraChildren
} = enterScope True $ case expr of
Prj Rb.Constant { text } -> yield text
Prj Rb.ScopeResolution { name = Prj Rb.Constant { text } } -> yield text
Prj Rb.ScopeResolution { name = Prj Rb.Identifier { text } } -> yield text
_ -> gtags t
where
range' = case extraChildren of
Prj Rb.Superclass { ann = Loc { byteRange = Range { end }}} : _ -> Range start end
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
yield name = yieldTag name Class loc range' >> gtags t
tags
t@Rb.Class
{ ann = Loc {byteRange = Range {start}},
name = expr,
extraChildren
} = enterScope True $ case expr of
Prj Rb.Constant {text, ann} -> yield text ann
Prj Rb.ScopeResolution {name = Prj Rb.Constant {text, ann}} -> yield text ann
Prj Rb.ScopeResolution {name = Prj Rb.Identifier {text, ann}} -> yield text ann
_ -> gtags t
where
range' = case extraChildren of
Prj Rb.Superclass {ann = Loc {byteRange = Range {end}}} : _ -> Range start end
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
yield name loc = yieldTag name Class loc range' >> gtags t
instance ToTags Rb.SingletonClass where
tags t@Rb.SingletonClass
{ ann = loc@Loc { byteRange = range@Range { start } }
, value = Rb.Arg expr
, extraChildren
} = enterScope True $ case expr of
Prj (Rb.Primary (Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Constant { text })))))) -> yield text
Prj (Rb.Primary (Prj (Rb.Lhs (Prj Rb.ScopeResolution { name = Prj Rb.Constant { text } })))) -> yield text
Prj (Rb.Primary (Prj (Rb.Lhs (Prj Rb.ScopeResolution { name = Prj Rb.Identifier { text } })))) -> yield text
_ -> gtags t
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> range
getStart = Range.start . byteRange . TS.gann
yield name = yieldTag name Class loc range' >> gtags t
tags
t@Rb.SingletonClass
{ ann = Loc {byteRange = range@Range {start}},
value = Rb.Arg expr,
extraChildren
} = enterScope True $ case expr of
Prj (Rb.Primary (Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Constant {text, ann})))))) -> yield text ann
Prj (Rb.Primary (Prj (Rb.Lhs (Prj Rb.ScopeResolution {name = Prj Rb.Constant {text, ann}})))) -> yield text ann
Prj (Rb.Primary (Prj (Rb.Lhs (Prj Rb.ScopeResolution {name = Prj Rb.Identifier {text, ann}})))) -> yield text ann
_ -> gtags t
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> range
getStart = Range.start . byteRange . TS.gann
yield name loc = yieldTag name Class loc range' >> gtags t
instance ToTags Rb.Module where
tags t@Rb.Module
{ ann = loc@Loc { byteRange = Range { start } }
, name = expr
, extraChildren
} = enterScope True $ case expr of
Prj Rb.Constant { text = name } -> yield name
Prj Rb.ScopeResolution { name = Prj Rb.Constant { text = name } } -> yield name
Prj Rb.ScopeResolution { name = Prj Rb.Identifier { text = name } } -> yield name
_ -> gtags t
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
getStart = Range.start . byteRange . TS.gann
yield name = yieldTag name Module loc range' >> gtags t
tags
t@Rb.Module
{ ann = Loc {byteRange = Range {start}},
name = expr,
extraChildren
} = enterScope True $ case expr of
Prj Rb.Constant {text, ann} -> yield text ann
Prj Rb.ScopeResolution {name = Prj Rb.Constant {text, ann}} -> yield text ann
Prj Rb.ScopeResolution {name = Prj Rb.Identifier {text, ann}} -> yield text ann
_ -> gtags t
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
getStart = Range.start . byteRange . TS.gann
yield name loc = yieldTag name Module loc range' >> gtags t
yieldMethodNameTag
:: ( Has (State [Text]) sig m
, Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
) => t Loc -> Loc -> Range -> Rb.MethodName Loc -> m ()
yieldMethodNameTag t loc range (Rb.MethodName expr) = enterScope True $ case expr of
Prj Rb.Identifier { text = name } -> yield name
Prj Rb.Constant { text = name } -> yield name
yieldMethodNameTag ::
( Has (State [Text]) sig m,
Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
Range ->
Rb.MethodName Loc ->
m ()
yieldMethodNameTag t range (Rb.MethodName expr) = enterScope True $ case expr of
Prj Rb.Identifier {text, ann} -> yield text ann
Prj Rb.Constant {text, ann} -> yield text ann
-- Prj Rb.ClassVariable { text = name } -> yield name
Prj Rb.Operator { text = name } -> yield name
Prj Rb.Operator {text, ann} -> yield text ann
-- Prj Rb.GlobalVariable { text = name } -> yield name
-- Prj Rb.InstanceVariable { text = name } -> yield name
Prj Rb.Setter { extraChildren = Rb.Identifier { text = name } } -> yield (name <> "=") -- NB: Matches existing tags output, TODO: Remove this.
-- TODO: Should we report symbol method names as tags?
-- Prj Rb.Symbol { extraChildren = [Prj Rb.EscapeSequence { text = name }] } -> yield name
_ -> gtags t
Prj Rb.Setter {extraChildren = Rb.Identifier {text, ann}} -> yield (text <> "=") ann-- NB: Matches existing tags output, TODO: Remove this.
-- TODO: Should we report symbol method names as tags?
-- Prj Rb.Symbol { extraChildren = [Prj Rb.EscapeSequence { text = name }] } -> yield name
_ -> gtags t
where
yield name = yieldTag name Method loc range >> gtags t
yield name loc = yieldTag name Method loc range >> gtags t
enterScope :: (Has (State [Text]) sig m) => Bool -> m () -> m ()
enterScope createNew m = do
@ -155,28 +165,30 @@ enterScope createNew m = do
put locals
instance ToTags Rb.Method where
tags t@Rb.Method
{ ann = loc@Loc { byteRange = Range { start } }
, name
, parameters
} = yieldMethodNameTag t loc range' name
where
range' = case parameters of
Just Rb.MethodParameters { ann = Loc { byteRange = Range { end } }} -> Range start end
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
tags
t@Rb.Method
{ ann = Loc {byteRange = Range {start}},
name,
parameters
} = yieldMethodNameTag t range' name
where
range' = case parameters of
Just Rb.MethodParameters {ann = Loc {byteRange = Range {end}}} -> Range start end
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
instance ToTags Rb.SingletonMethod where
tags t@Rb.SingletonMethod
{ ann = loc@Loc { byteRange = Range { start } }
, name
, parameters
} = yieldMethodNameTag t loc range' name
where
range' = case parameters of
Just Rb.MethodParameters { ann = Loc { byteRange = Range { end } }} -> Range start end
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
tags
t@Rb.SingletonMethod
{ ann = Loc {byteRange = Range {start}},
name,
parameters
} = yieldMethodNameTag t range' name
where
range' = case parameters of
Just Rb.MethodParameters {ann = Loc {byteRange = Range {end}}} -> Range start end
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
instance ToTags Rb.Block where
tags = enterScope False . gtags
@ -185,54 +197,54 @@ instance ToTags Rb.DoBlock where
tags = enterScope False . gtags
instance ToTags Rb.Lambda where
tags Rb.Lambda { body, parameters } = enterScope False $ do
tags Rb.Lambda {body, parameters} = enterScope False $ do
maybe (pure ()) tags parameters
tags body
instance ToTags Rb.If where
tags Rb.If { condition, consequence, alternative } = do
tags Rb.If {condition, consequence, alternative} = do
tags condition
maybe (pure ()) tags consequence
maybe (pure ()) tags alternative
instance ToTags Rb.Elsif where
tags Rb.Elsif { condition, consequence, alternative } = do
tags Rb.Elsif {condition, consequence, alternative} = do
tags condition
maybe (pure ()) tags consequence
maybe (pure ()) tags alternative
instance ToTags Rb.Unless where
tags Rb.Unless { condition, consequence, alternative } = do
tags Rb.Unless {condition, consequence, alternative} = do
tags condition
maybe (pure ()) tags consequence
maybe (pure ()) tags alternative
instance ToTags Rb.While where
tags Rb.While { condition, body } = tags condition >> tags body
tags Rb.While {condition, body} = tags condition >> tags body
instance ToTags Rb.Until where
tags Rb.Until { condition, body } = tags condition >> tags body
tags Rb.Until {condition, body} = tags condition >> tags body
instance ToTags Rb.Regex where
tags Rb.Regex { } = pure ()
tags Rb.Regex {} = pure ()
instance ToTags Rb.Subshell where
tags Rb.Subshell { } = pure ()
tags Rb.Subshell {} = pure ()
-- TODO: Line of source produced here could be better.
instance ToTags Rb.Lhs where
tags t@(Rb.Lhs expr) = case expr of
-- NOTE: Calls do not look for locals
Prj Rb.Call { ann = loc@Loc { byteRange }, method } -> case method of
Prj Rb.Identifier { text } -> yieldCall text loc byteRange
Prj Rb.Constant { text } -> yieldCall text loc byteRange
Prj Rb.Operator { text } -> yieldCall text loc byteRange
_ -> gtags t
Prj Rb.Call {ann = Loc {byteRange}, method} -> case method of
Prj Rb.Identifier {text, ann} -> yieldCall text ann byteRange
Prj Rb.Constant {text, ann} -> yieldCall text ann byteRange
Prj Rb.Operator {text, ann} -> yieldCall text ann byteRange
_ -> gtags t
-- These do check for locals before yielding a call tag
Prj (Rb.Variable (Prj Rb.Identifier { ann = loc@Loc { byteRange }, text })) -> yield text Call loc byteRange
Prj Rb.ScopeResolution { ann = loc@Loc { byteRange }, name = Prj Rb.Identifier { text } } -> yield text Call loc byteRange
-- TODO: These would be great to track, but doesn't match current a la carte tags output
-- Prj (Rb.Variable (Prj Rb.Constant { ann = loc@Loc { byteRange }, text })) -> yield text Constant loc byteRange
-- Prj Rb.ScopeResolution { ann = loc@Loc { byteRange }, name = Prj Rb.Constant { text } } -> yield text Constant loc byteRange
Prj (Rb.Variable (Prj Rb.Identifier {ann = loc@Loc {byteRange}, text})) -> yield text Call loc byteRange
Prj Rb.ScopeResolution {ann = loc@Loc {byteRange}, name = Prj Rb.Identifier {text}} -> yield text Call loc byteRange
Prj (Rb.Variable (Prj Rb.Constant { ann = loc@Loc { byteRange }, text })) -> yield text Call loc byteRange -- TODO: Should yield Constant
Prj Rb.ScopeResolution { ann = loc@Loc { byteRange }, name = Prj Rb.Constant { text } } -> yield text Call loc byteRange -- TODO: Should yield Constant
_ -> gtags t
where
yieldCall name loc range = yieldTag name Call loc range >> gtags t
@ -241,105 +253,114 @@ instance ToTags Rb.Lhs where
unless (name `elem` locals) $ yieldTag name kind loc range
gtags t
-- TODO: Line of source produced here could be better.
instance ToTags Rb.MethodCall where
tags t@Rb.MethodCall
{ ann = loc@Loc { byteRange = byteRange@Range {} }
, method = expr
} = case expr of
Prj (Rb.Variable (Prj Rb.Identifier { text = name })) -> yield name Call
Prj (Rb.Variable (Prj Rb.Constant { text = name })) -> yield name Call -- TODO: Should yield Constant
Prj Rb.ScopeResolution { name = Prj Rb.Identifier { text } } -> yield text Call
Prj Rb.ScopeResolution { name = Prj Rb.Constant { text } } -> yield text Call -- TODO: Should yield Constant
Prj Rb.Call { method } -> case method of
Prj Rb.Identifier { text } -> yield text Call
Prj Rb.Constant { text } -> yield text Call
Prj Rb.Operator { text } -> yield text Call
_ -> gtags t
tags
t@Rb.MethodCall
{ ann = Loc {byteRange = byteRange@Range {}},
method = expr
} = case expr of
Prj (Rb.Variable (Prj Rb.Identifier {text, ann})) -> yield text Call ann
Prj (Rb.Variable (Prj Rb.Constant {text, ann})) -> yield text Call ann -- TODO: Should yield Constant
Prj Rb.ScopeResolution {name = Prj Rb.Identifier {text, ann}} -> yield text Call ann
Prj Rb.ScopeResolution {name = Prj Rb.Constant {text, ann}} -> yield text Call ann -- TODO: Should yield Constant
Prj Rb.Call {method} -> case method of
Prj Rb.Identifier {text, ann} -> yield text Call ann
Prj Rb.Constant {text, ann} -> yield text Call ann
Prj Rb.Operator {text, ann} -> yield text Call ann
_ -> gtags t
_ -> gtags t
where
yield name kind = yieldTag name kind loc byteRange >> gtags t
where
yield name kind loc = yieldTag name kind loc byteRange >> gtags t
instance ToTags Rb.Alias where
tags t@Rb.Alias
{ alias = Rb.MethodName aliasExpr
, name = Rb.MethodName nameExpr
} = do
tags
t@Rb.Alias
{ alias = Rb.MethodName aliasExpr,
name = Rb.MethodName nameExpr,
ann = Loc {byteRange}
} = do
case aliasExpr of
Prj Rb.Identifier { ann = loc@Loc { byteRange}, text } -> yieldTag text Function loc byteRange
_ -> tags aliasExpr
Prj Rb.Identifier {ann, text} -> yieldTag text Function ann byteRange
_ -> tags aliasExpr
case nameExpr of
Prj Rb.Identifier { ann = loc@Loc { byteRange}, text } -> yieldTag text Call loc byteRange
_ -> tags nameExpr
Prj Rb.Identifier {ann, text} -> yieldTag text Call ann byteRange
_ -> tags nameExpr
gtags t
instance ToTags Rb.Undef where
tags t@Rb.Undef
{ extraChildren
} = for_ extraChildren $ \(Rb.MethodName expr) -> do
tags
t@Rb.Undef
{ extraChildren,
ann = Loc {byteRange}
} = for_ extraChildren $ \(Rb.MethodName expr) -> do
case expr of
Prj Rb.Identifier { ann = loc@Loc { byteRange }, text } -> yieldTag text Call loc byteRange
_ -> tags expr
Prj Rb.Identifier {ann, text} -> yieldTag text Call ann byteRange
_ -> tags expr
gtags t
introduceLocals
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
)
=> [((Rb.BlockParameter :+: Rb.DestructuredParameter :+: Rb.HashSplatParameter) :+:
((Rb.Identifier :+: Rb.KeywordParameter) :+: (Rb.OptionalParameter :+: Rb.SplatParameter)))
Loc ]
-> m ()
introduceLocals ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Has (State [Text]) sig m
) =>
[ ( (Rb.BlockParameter :+: Rb.DestructuredParameter :+: Rb.HashSplatParameter)
:+: ((Rb.Identifier :+: Rb.KeywordParameter) :+: (Rb.OptionalParameter :+: Rb.SplatParameter))
)
Loc
] ->
m ()
introduceLocals params = for_ params $ \param -> case param of
Prj Rb.BlockParameter { name = Rb.Identifier { text = lvar } } -> modify (lvar :)
Prj Rb.DestructuredParameter { extraChildren } -> introduceLocals extraChildren
Prj Rb.HashSplatParameter { name = Just Rb.Identifier { text = lvar } } -> modify (lvar :)
Prj Rb.Identifier { text = lvar } -> modify (lvar :)
Prj Rb.KeywordParameter { name = Rb.Identifier { text = lvar }} -> modify (lvar :)
Prj Rb.OptionalParameter { name = Rb.Identifier { text = lvar }} -> modify (lvar :)
Prj Rb.SplatParameter { name = Just Rb.Identifier { text = lvar } } -> modify (lvar :)
_ -> pure ()
Prj Rb.BlockParameter {name = Rb.Identifier {text = lvar}} -> modify (lvar :)
Prj Rb.DestructuredParameter {extraChildren} -> introduceLocals extraChildren
Prj Rb.HashSplatParameter {name = Just Rb.Identifier {text = lvar}} -> modify (lvar :)
Prj Rb.Identifier {text = lvar} -> modify (lvar :)
Prj Rb.KeywordParameter {name = Rb.Identifier {text = lvar}} -> modify (lvar :)
Prj Rb.OptionalParameter {name = Rb.Identifier {text = lvar}} -> modify (lvar :)
Prj Rb.SplatParameter {name = Just Rb.Identifier {text = lvar}} -> modify (lvar :)
_ -> pure ()
instance ToTags Rb.MethodParameters where
tags t@Rb.MethodParameters{ extraChildren } = introduceLocals extraChildren >> gtags t
tags t@Rb.MethodParameters {extraChildren} = introduceLocals extraChildren >> gtags t
instance ToTags Rb.LambdaParameters where
tags t@Rb.LambdaParameters{ extraChildren } = introduceLocals extraChildren >> gtags t
tags t@Rb.LambdaParameters {extraChildren} = introduceLocals extraChildren >> gtags t
instance ToTags Rb.BlockParameters where
tags t@Rb.BlockParameters{ extraChildren } = introduceLocals extraChildren >> gtags t
tags t@Rb.BlockParameters {extraChildren} = introduceLocals extraChildren >> gtags t
instance ToTags Rb.Assignment where
tags t@Rb.Assignment{ left } = do
tags t@Rb.Assignment {left} = do
case left of
Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier { text })))) -> modify (text :)
Prj Rb.LeftAssignmentList { extraChildren } -> introduceLhsLocals extraChildren
_ -> pure ()
Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier {text})))) -> modify (text :)
Prj Rb.LeftAssignmentList {extraChildren} -> introduceLhsLocals extraChildren
_ -> pure ()
gtags t
where
introduceLhsLocals xs = for_ xs $ \x -> case x of
Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier { text })))) -> modify (text :)
Prj Rb.DestructuredLeftAssignment { extraChildren } -> introduceLhsLocals extraChildren
Prj Rb.RestAssignment { extraChildren = Just (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier { text })))) } -> modify (text :)
Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier {text})))) -> modify (text :)
Prj Rb.DestructuredLeftAssignment {extraChildren} -> introduceLhsLocals extraChildren
Prj Rb.RestAssignment {extraChildren = Just (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier {text}))))} -> modify (text :)
_ -> pure ()
instance ToTags Rb.OperatorAssignment where
tags t@Rb.OperatorAssignment{ left } = do
tags t@Rb.OperatorAssignment {left} = do
case left of
Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier { text })))) -> modify (text :)
_ -> pure ()
Prj (Rb.Lhs (Prj (Rb.Variable (Prj Rb.Identifier {text})))) -> modify (text :)
_ -> pure ()
gtags t
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Has (State [Text]) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
-- instance ToTags Rb.Alias
instance ToTags Rb.Arg
instance ToTags Rb.ArgumentList

21
semantic-rust/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2020 GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

18
semantic-rust/README.md Normal file
View File

@ -0,0 +1,18 @@
# Semantic support for Rust
This package implements `semantic` support for [Rust](https://www.rust-lang.org/) using the `semantic-core` intermediate language.
## Generating AST
```
cd semantic-rust
cabal v2-repl
λ> :seti -XOverloadedStrings
λ> :seti -XTypeApplications
λ> import Source.Span
λ> import Source.Range
λ> import AST.Unmarshal
λ> TS.parseByteString @Language.Rust.AST.SourceFile @(Source.Span.Span, Source.Range.Range) Language.Rust.Grammar.tree_sitter_rust "let x = 1;"
Right (SourceFile {ann = (Span {start = Pos {line = 0, column = 0}, end = Pos {line = 0, column = 10}},Range {start = 0, end = 10}), extraChildren = [L1 (DeclarationStatement {getDeclarationStatement = R1 (L1 (L1 (R1 (LetDeclaration {ann = (Span {start = Pos {line = 0, column = 0}, end = Pos {line = 0, column = 10}},Range {start = 0, end = 10}), pattern = Pattern {getPattern = L1 (R1 (L1 (L1 (Identifier {ann = (Span {start = Pos {line = 0, column = 4}, end = Pos {line = 0, column = 5}},Range
{start = 4, end = 5}), text = "x"}))))}, value = Just (Expression {getExpression = L1 (L1 (L1 (L1 (L1 (Literal {getLiteral = R1 (L1 (IntegerLiteral {ann = (Span {start = Pos {line = 0, column = 8}, end = Pos {line = 0, column = 9}},Range {start = 8, end = 9}), text = "1"}))})))))}), type' = Nothing, extraChildren = Nothing}))))})]})
```

2
semantic-rust/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,87 @@
cabal-version: 2.4
name: semantic-rust
version: 0.0.0.0
synopsis: Semantic support for Rust
description: Semantic support for Rust.
homepage: https://github.com/github/semantic/tree/master/semantic-rust#readme
bug-reports: https://github.com/github/semantic/issues
license: MIT
license-file: LICENSE
author: The Semantic authors, Alexei Pastuchov
maintainer: opensource+semantic@github.com
copyright: (c) 2020 GitHub, Inc.
category: Language
build-type: Simple
stability: alpha
extra-source-files: README.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
build-depends: base ^>= 4.13
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.9
, tree-sitter-rust ^>= 0.1.0.0
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
library
import: haskell
exposed-modules:
Language.Rust
Language.Rust.AST
Language.Rust.Grammar
Language.Rust.Tags
hs-source-dirs: src
test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, pathtype ^>= 0.8.1
, semantic-ast
, semantic-rust
, tasty
, tasty-hunit
, text
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies

View File

@ -0,0 +1,24 @@
-- | Semantic functionality for Rust programs.
module Language.Rust
( Term(..)
, Language.Rust.Grammar.tree_sitter_rust
) where
import Data.Proxy
import qualified Language.Rust.AST as Rust
import qualified Language.Rust.Tags as RustTags
import qualified Tags.Tagging.Precise as Tags
import qualified Language.Rust.Grammar (tree_sitter_rust)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Rust.SourceFile a }
instance TS.SymbolMatching Term where
matchedSymbols _ = TS.matchedSymbols (Proxy :: Proxy Rust.SourceFile)
showFailure _ = TS.showFailure (Proxy :: Proxy Rust.SourceFile)
instance TS.Unmarshal Term where
matchers = fmap (fmap (TS.hoist Term)) TS.matchers
instance Tags.ToTags Term where
tags src = Tags.runTagging src . RustTags.tags . getTerm

View File

@ -0,0 +1,23 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Rust.AST
( module Language.Rust.AST
, Rust.getTestCorpusDir
) where
import AST.GenerateSyntax
import AST.Token
import Language.Haskell.TH.Syntax (runIO)
import qualified TreeSitter.Rust as Rust (getNodeTypesPath, getTestCorpusDir, tree_sitter_rust)
runIO Rust.getNodeTypesPath >>= astDeclarationsForLanguage Rust.tree_sitter_rust

View File

@ -0,0 +1,12 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Rust.Grammar
( tree_sitter_rust
, Grammar(..)
) where
import AST.Grammar.TH
import Language.Haskell.TH
import TreeSitter.Rust (tree_sitter_rust)
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_rust

View File

@ -0,0 +1,208 @@
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DisambiguateRecordFields #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.Rust.Tags
( ToTags(..)
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import qualified Language.Rust.AST as Rust
import Source.Loc
import Source.Source as Source
import Tags.Tag()
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags = gtags
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
tags (L1 l) = tags l
tags (R1 r) = tags r
instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
instance ToTags Rust.AbstractType
instance ToTags Rust.Arguments
instance ToTags Rust.ArrayExpression
instance ToTags Rust.ArrayType
instance ToTags Rust.AssignmentExpression
instance ToTags Rust.AssociatedType
instance ToTags Rust.AsyncBlock
instance ToTags Rust.AttributeItem
instance ToTags Rust.AwaitExpression
instance ToTags Rust.BaseFieldInitializer
instance ToTags Rust.BinaryExpression
instance ToTags Rust.Block
instance ToTags Rust.BlockComment
instance ToTags Rust.BooleanLiteral
instance ToTags Rust.BoundedType
instance ToTags Rust.BracketedType
instance ToTags Rust.BreakExpression
instance ToTags Rust.CallExpression
instance ToTags Rust.CapturedPattern
instance ToTags Rust.CharLiteral
instance ToTags Rust.ClosureExpression
instance ToTags Rust.ClosureParameters
instance ToTags Rust.CompoundAssignmentExpr
instance ToTags Rust.ConstItem
instance ToTags Rust.ConstParameter
instance ToTags Rust.ConstrainedTypeParameter
instance ToTags Rust.ContinueExpression
instance ToTags Rust.Crate
instance ToTags Rust.DeclarationList
instance ToTags Rust.DeclarationStatement
instance ToTags Rust.DynamicType
instance ToTags Rust.EmptyStatement
instance ToTags Rust.EmptyType
instance ToTags Rust.EnumItem
instance ToTags Rust.EnumVariant
instance ToTags Rust.EnumVariantList
instance ToTags Rust.EscapeSequence
instance ToTags Rust.Expression
instance ToTags Rust.ExternCrateDeclaration
instance ToTags Rust.ExternModifier
instance ToTags Rust.FieldDeclaration
instance ToTags Rust.FieldDeclarationList
instance ToTags Rust.FieldExpression
instance ToTags Rust.FieldIdentifier
instance ToTags Rust.FieldInitializer
instance ToTags Rust.FieldInitializerList
instance ToTags Rust.FieldPattern
instance ToTags Rust.FloatLiteral
instance ToTags Rust.ForExpression
instance ToTags Rust.ForLifetimes
instance ToTags Rust.ForeignModItem
instance ToTags Rust.FragmentSpecifier
instance ToTags Rust.FunctionItem
instance ToTags Rust.FunctionModifiers
instance ToTags Rust.FunctionSignatureItem
instance ToTags Rust.FunctionType
instance ToTags Rust.GenericFunction
instance ToTags Rust.GenericType
instance ToTags Rust.GenericTypeWithTurbofish
instance ToTags Rust.HigherRankedTraitBound
instance ToTags Rust.Identifier
instance ToTags Rust.IfExpression
instance ToTags Rust.IfLetExpression
instance ToTags Rust.ImplItem
instance ToTags Rust.IndexExpression
instance ToTags Rust.InnerAttributeItem
instance ToTags Rust.IntegerLiteral
instance ToTags Rust.LetDeclaration
instance ToTags Rust.Lifetime
instance ToTags Rust.LineComment
instance ToTags Rust.Literal
instance ToTags Rust.LiteralPattern
instance ToTags Rust.LoopExpression
instance ToTags Rust.LoopLabel
instance ToTags Rust.MacroDefinition
instance ToTags Rust.MacroInvocation
instance ToTags Rust.MacroRule
instance ToTags Rust.MatchArm
instance ToTags Rust.MatchBlock
instance ToTags Rust.MatchExpression
instance ToTags Rust.MatchPattern
instance ToTags Rust.MetaArguments
instance ToTags Rust.MetaItem
instance ToTags Rust.Metavariable
instance ToTags Rust.ModItem
instance ToTags Rust.MutPattern
instance ToTags Rust.MutableSpecifier
instance ToTags Rust.NegativeLiteral
instance ToTags Rust.OptionalTypeParameter
instance ToTags Rust.OrderedFieldDeclarationList
instance ToTags Rust.Parameter
instance ToTags Rust.Parameters
instance ToTags Rust.ParenthesizedExpression
instance ToTags Rust.Pattern
instance ToTags Rust.PointerType
instance ToTags Rust.PrimitiveType
instance ToTags Rust.QualifiedType
instance ToTags Rust.RangeExpression
instance ToTags Rust.RangePattern
instance ToTags Rust.RawStringLiteral
instance ToTags Rust.RefPattern
instance ToTags Rust.ReferenceExpression
instance ToTags Rust.ReferencePattern
instance ToTags Rust.ReferenceType
instance ToTags Rust.RemainingFieldPattern
instance ToTags Rust.RemovedTraitBound
instance ToTags Rust.ReturnExpression
instance ToTags Rust.ScopedIdentifier
instance ToTags Rust.ScopedTypeIdentifier
instance ToTags Rust.ScopedUseList
instance ToTags Rust.Self
instance ToTags Rust.SelfParameter
instance ToTags Rust.ShorthandFieldIdentifier
instance ToTags Rust.ShorthandFieldInitializer
instance ToTags Rust.SlicePattern
instance ToTags Rust.SourceFile
instance ToTags Rust.StaticItem
instance ToTags Rust.StringLiteral
instance ToTags Rust.StructExpression
instance ToTags Rust.StructItem
instance ToTags Rust.StructPattern
instance ToTags Rust.Super
instance ToTags Rust.TokenBindingPattern
instance ToTags Rust.TokenRepetition
instance ToTags Rust.TokenRepetitionPattern
instance ToTags Rust.TokenTree
instance ToTags Rust.TokenTreePattern
instance ToTags Rust.TraitBounds
instance ToTags Rust.TraitItem
instance ToTags Rust.TryExpression
instance ToTags Rust.TupleExpression
instance ToTags Rust.TuplePattern
instance ToTags Rust.TupleStructPattern
instance ToTags Rust.TupleType
instance ToTags Rust.Type
instance ToTags Rust.TypeArguments
instance ToTags Rust.TypeBinding
instance ToTags Rust.TypeCastExpression
instance ToTags Rust.TypeIdentifier
instance ToTags Rust.TypeItem
instance ToTags Rust.TypeParameters
instance ToTags Rust.UnaryExpression
instance ToTags Rust.UnionItem
instance ToTags Rust.UnitExpression
instance ToTags Rust.UnitType
instance ToTags Rust.UnsafeBlock
instance ToTags Rust.UseAsClause
instance ToTags Rust.UseDeclaration
instance ToTags Rust.UseList
instance ToTags Rust.UseWildcard
instance ToTags Rust.VariadicParameter
instance ToTags Rust.VisibilityModifier
instance ToTags Rust.WhereClause
instance ToTags Rust.WherePredicate
instance ToTags Rust.WhileExpression
instance ToTags Rust.WhileLetExpression

View File

@ -0,0 +1,25 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module Main (main) where
import AST.Test
import AST.Unmarshal (parseByteString)
import qualified Language.Rust.AST as Rust
import Language.Rust.Grammar
import qualified System.Path as Path
import Test.Tasty
import Control.Monad (liftM)
main :: IO ()
main
= Path.absDir <$> Rust.getTestCorpusDir
>>= excludeMacrosCorpus . readCorpusFiles'
>>= traverse (testCorpus parse)
>>= defaultMain . tests
where
parse = parseByteString @Rust.SourceFile @() tree_sitter_rust
excludeMacrosCorpus l = liftM (filter (f "expressions") ) l
where f p bn = p /= (Path.toString . Path.takeBaseName) bn
tests :: [TestTree] -> TestTree
tests = testGroup "tree-sitter-rust corpus tests"

View File

@ -47,7 +47,7 @@ library
, lens
, pathtype
, semantic-analysis
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semilattices
, text ^>= 1.2.3.1
hs-source-dirs: src

View File

@ -14,6 +14,7 @@ import Data.Maybe
import Data.Semilattice.Lower
import Data.Text (Text)
import GHC.Stack
import qualified System.Path as Path
data Module body = Module { moduleInfo :: ModuleInfo, moduleBody :: body }
deriving (Eq, Foldable, Functor, Ord, Traversable)
@ -22,20 +23,20 @@ instance Show body => Show (Module body) where
showsPrec d Module{..} = showsBinaryWith showsPrec showsPrec "Module" d (modulePath moduleInfo) moduleBody
type ModulePath = FilePath
type ModulePath = Path.AbsRelFile
data ModuleInfo = ModuleInfo { modulePath :: ModulePath, moduleLanguage :: Text, moduleOid :: Text }
deriving (Eq, Ord)
instance Lower ModuleInfo where
lowerBound = ModuleInfo mempty "Unknown" mempty
lowerBound = ModuleInfo (Path.toAbsRel Path.emptyFile) "Unknown" mempty
instance Show ModuleInfo where
showsPrec d = showsUnaryWith showsPrec "ModuleInfo" d . modulePath
moduleInfoFromSrcLoc :: SrcLoc -> ModuleInfo
moduleInfoFromSrcLoc loc = ModuleInfo (srcLocModule loc) "Unknown" mempty
moduleInfoFromSrcLoc loc = ModuleInfo (Path.absRel $ srcLocModule loc) "Unknown" mempty
-- | Produce 'ModuleInfo' from the top location on the Haskell call stack (i.e. the file where the call to 'moduleInfoFromCallStack' was made).
moduleInfoFromCallStack :: HasCallStack => ModuleInfo
moduleInfoFromCallStack = maybe (ModuleInfo "?" "Unknown" mempty) (moduleInfoFromSrcLoc . snd) (listToMaybe (getCallStack callStack))
moduleInfoFromCallStack = maybe (ModuleInfo (Path.absRel "?") "Unknown" mempty) (moduleInfoFromSrcLoc . snd) (listToMaybe (getCallStack callStack))

View File

@ -37,7 +37,7 @@ instance HasSpan (Info scopeAddress) where
{-# INLINE span_ #-}
instance Lower (Info scopeAddress) where
lowerBound = Info lowerBound lowerBound lowerBound Public lowerBound lowerBound Nothing
lowerBound = Info lowerBound lowerBound lowerBound Public (point (Pos 0 0)) lowerBound Nothing
instance AbstractHole (Info address) where
hole = lowerBound

View File

@ -1,3 +1,9 @@
# 0.1.0.0
- Adds `CodeQL` language constructor.
- Bumps `lingo-haskell` to 0.3.2.
- Removes Span and Pos lower bound instances. This makes callers responsible for defining whether Span / Pos are 0 or 1 indexed.
# 0.0.2.0
- Adds `Source.Language`.

View File

@ -55,7 +55,7 @@ library
, containers ^>= 0.6.2
, generic-monoid ^>= 0.1.0.0
, hashable >= 1.2.7 && < 1.4
, lingo ^>= 0.3
, lingo ^>= 0.3.2.0
, pathtype ^>= 0.8.1
, semilattices ^>= 0.0.0.3
, text ^>= 1.2.3.1

View File

@ -33,11 +33,12 @@ data Language
| JSON
| JSX
| Markdown
| PHP
| Python
| Ruby
| TypeScript
| PHP
| TSX
| CodeQL
deriving (Eq, Generic, Ord, Read, Show, Bounded, Hashable, ToJSON, Enum)
-- | Reifies a proxied type-level 'Language' to a value.
@ -47,6 +48,9 @@ class SLanguage (lang :: Language) where
instance SLanguage 'Unknown where
reflect _ = Unknown
instance SLanguage 'CodeQL where
reflect _ = CodeQL
instance SLanguage 'Go where
reflect _ = Go
@ -68,6 +72,9 @@ instance SLanguage 'JSX where
instance SLanguage 'Markdown where
reflect _ = Markdown
instance SLanguage 'PHP where
reflect _ = PHP
instance SLanguage 'Python where
reflect _ = Python
@ -77,9 +84,6 @@ instance SLanguage 'Ruby where
instance SLanguage 'TypeScript where
reflect _ = TypeScript
instance SLanguage 'PHP where
reflect _ = PHP
instance FromJSON Language where
parseJSON = withText "Language" $ \l ->
pure $ textToLanguage l
@ -106,6 +110,7 @@ forPath path =
languageToText :: Language -> T.Text
languageToText = \case
Unknown -> "Unknown"
CodeQL -> "CodeQL"
Go -> "Go"
Haskell -> "Haskell"
Java -> "Java"
@ -113,14 +118,15 @@ languageToText = \case
JSON -> "JSON"
JSX -> "JSX"
Markdown -> "Markdown"
PHP -> "PHP"
Python -> "Python"
Ruby -> "Ruby"
TypeScript -> "TypeScript"
TSX -> "TSX"
PHP -> "PHP"
textToLanguage :: T.Text -> Language
textToLanguage = \case
"CodeQL" -> CodeQL
"Go" -> Go
"Haskell" -> Haskell
"Java" -> Java
@ -128,9 +134,9 @@ textToLanguage = \case
"JSON" -> JSON
"JSX" -> JSX
"Markdown" -> Markdown
"PHP" -> PHP
"Python" -> Python
"Ruby" -> Ruby
"TypeScript" -> TypeScript
"TSX" -> TSX
"PHP" -> PHP
_ -> Unknown

View File

@ -25,7 +25,7 @@ library
build-depends:
base >= 4.13 && < 5
, fused-effects ^>= 1.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, text ^>= 1.2.3.1
hs-source-dirs: src
default-language: Haskell2010

View File

@ -23,5 +23,11 @@ data Kind
| Module
-- References
| Call
| Type
-- Just as Call is to Class and Function, Implementation is to Interface.
-- This suggests that perhaps we should have an Instantiation kind that
-- we use for Class.
| Interface
| Implementation
-- Constant -- TODO: New kind for constant references
deriving (Bounded, Enum, Eq, Show)

View File

@ -26,7 +26,7 @@ common haskell
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
@ -61,7 +61,7 @@ test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
main-is: PreciseTest.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, hedgehog >= 0.6 && <2

View File

@ -5,155 +5,145 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.TSX.Tags
( ToTags(..)
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
module Language.TSX.Tags
( ToTags (..),
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
import qualified Language.TSX.AST as Tsx
import Source.Loc
import Source.Source as Source
import Tags.Tag
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance ToTags Tsx.Function where
tags t@Tsx.Function
{ ann = loc@Loc { byteRange }
, name = Just Tsx.Identifier { text }
} = yieldTag text Function loc byteRange >> gtags t
tags t@Tsx.Function {ann = Loc {byteRange}, name = Just Tsx.Identifier {text, ann}} =
yieldTag text Function ann byteRange >> gtags t
tags t = gtags t
instance ToTags Tsx.FunctionSignature where
tags t@Tsx.FunctionSignature
{ ann = loc@Loc { byteRange }
, name = Tsx.Identifier { text }
} = yieldTag text Function loc byteRange >> gtags t
tags t@Tsx.FunctionSignature {ann = Loc {byteRange}, name = Tsx.Identifier {text, ann}} =
yieldTag text Function ann byteRange >> gtags t
instance ToTags Tsx.FunctionDeclaration where
tags t@Tsx.FunctionDeclaration
{ ann = loc@Loc { byteRange }
, name = Tsx.Identifier { text }
} = yieldTag text Function loc byteRange >> gtags t
tags t@Tsx.FunctionDeclaration {ann = Loc {byteRange}, name = Tsx.Identifier {text, ann}} =
yieldTag text Function ann byteRange >> gtags t
instance ToTags Tsx.MethodDefinition where
tags t@Tsx.MethodDefinition
{ ann = loc@Loc { byteRange }
, name
} = case name of
Prj Tsx.PropertyIdentifier { text } -> yield text
-- TODO: There are more here
_ -> gtags t
where
yield name = yieldTag name Method loc byteRange >> gtags t
tags t@Tsx.MethodDefinition {ann = Loc {byteRange}, name} = case name of
Prj Tsx.PropertyIdentifier {text, ann} -> yieldTag text Method ann byteRange >> gtags t
_ -> gtags t
instance ToTags Tsx.Pair where
tags t@Tsx.Pair {ann = Loc {byteRange}, key, value = Tsx.Expression expr} = case (key, expr) of
(Prj Tsx.PropertyIdentifier {text, ann}, Prj Tsx.Function {}) -> yield text ann
(Prj Tsx.PropertyIdentifier {text, ann}, Prj Tsx.ArrowFunction {}) -> yield text ann
_ -> gtags t
where
yield text loc = yieldTag text Function loc byteRange >> gtags t
instance ToTags Tsx.ClassDeclaration where
tags t@Tsx.ClassDeclaration
{ ann = loc@Loc { byteRange }
, name = Tsx.TypeIdentifier { text }
} = yieldTag text Class loc byteRange >> gtags t
tags t@Tsx.ClassDeclaration {ann = Loc {byteRange}, name = Tsx.TypeIdentifier {text, ann}} =
yieldTag text Class ann byteRange >> gtags t
instance ToTags Tsx.CallExpression where
tags t@Tsx.CallExpression
{ ann = loc@Loc { byteRange }
, function = Tsx.Expression expr
} = match expr
tags t@Tsx.CallExpression {ann = Loc {byteRange}, function = Tsx.Expression expr} = match expr
where
match expr = case expr of
Prj Tsx.Identifier { text } -> yield text
Prj Tsx.NewExpression { constructor = Prj Tsx.Identifier { text } } -> yield text
Prj Tsx.CallExpression { function = Tsx.Expression expr } -> match expr
Prj Tsx.MemberExpression { property = Tsx.PropertyIdentifier { text } } -> yield text
Prj Tsx.Function { name = Just Tsx.Identifier { text }} -> yield text
Prj Tsx.ParenthesizedExpression { extraChildren } -> for_ extraChildren $ \ x -> case x of
Prj Tsx.Identifier {text, ann} -> yield text ann
Prj Tsx.NewExpression {constructor = Prj Tsx.Identifier {text, ann}} -> yield text ann
Prj Tsx.CallExpression {function = Tsx.Expression expr} -> match expr
Prj Tsx.MemberExpression {property = Tsx.PropertyIdentifier {text, ann}} -> yield text ann
Prj Tsx.Function {name = Just Tsx.Identifier {text, ann}} -> yield text ann
Prj Tsx.ParenthesizedExpression {extraChildren} -> for_ extraChildren $ \x -> case x of
Prj (Tsx.Expression expr) -> match expr
_ -> tags x
_ -> tags x
_ -> gtags t
yield name = yieldTag name Call loc byteRange >> gtags t
yield name loc = yieldTag name Call loc byteRange >> gtags t
instance ToTags Tsx.Class where
tags t@Tsx.Class
{ ann = loc@Loc { byteRange }
, name = Just Tsx.TypeIdentifier { text }
} = yieldTag text Class loc byteRange >> gtags t
tags t@Tsx.Class {ann = Loc {byteRange}, name = Just Tsx.TypeIdentifier {text, ann}} =
yieldTag text Class ann byteRange >> gtags t
tags t = gtags t
instance ToTags Tsx.Module where
tags t@Tsx.Module
{ ann = loc@Loc { byteRange }
, name
} = match name
where
match expr = case expr of
Prj Tsx.Identifier { text } -> yield text
-- TODO: Handle NestedIdentifiers and Strings
-- Prj Tsx.NestedIdentifier { extraChildren } -> match
_ -> gtags t
yield text = yieldTag text Module loc byteRange >> gtags t
tags t@Tsx.Module {ann = Loc {byteRange}, name} = case name of
Prj Tsx.Identifier {text, ann} -> yieldTag text Module ann byteRange >> gtags t
_ -> gtags t
instance ToTags Tsx.VariableDeclarator where
tags t@Tsx.VariableDeclarator
{ ann = loc@Loc { byteRange }
, name
, value = Just (Tsx.Expression expr)
} = case (expr, name) of
(Prj Tsx.Function{}, Prj Tsx.Identifier { text }) -> yield text
(Prj Tsx.ArrowFunction{}, Prj Tsx.Identifier { text }) -> yield text
_ -> gtags t
tags t@Tsx.VariableDeclarator {ann = Loc {byteRange}, name, value = Just (Tsx.Expression expr)} =
case (expr, name) of
(Prj Tsx.Function {}, Prj Tsx.Identifier {text, ann}) -> yield text ann
(Prj Tsx.ArrowFunction {}, Prj Tsx.Identifier {text, ann}) -> yield text ann
_ -> gtags t
where
yield text = yieldTag text Function loc byteRange >> gtags t
yield text loc = yieldTag text Function loc byteRange >> gtags t
tags t = gtags t
instance ToTags Tsx.AssignmentExpression where
tags t@Tsx.AssignmentExpression {ann = Loc {byteRange}, left, right = (Tsx.Expression expr)} =
case (left, expr) of
(Prj Tsx.Identifier {text, ann}, Prj Tsx.Function {}) -> yield text ann
(Prj Tsx.Identifier {text, ann}, Prj Tsx.ArrowFunction {}) -> yield text ann
(Prj Tsx.MemberExpression {property = Tsx.PropertyIdentifier {text, ann}}, Prj Tsx.Function {}) -> yield text ann
(Prj Tsx.MemberExpression {property = Tsx.PropertyIdentifier {text, ann}}, Prj Tsx.ArrowFunction {}) -> yield text ann
_ -> gtags t
where
yield text loc = yieldTag text Function loc byteRange >> gtags t
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
tags (L1 l) = tags l
tags (R1 r) = tags r
instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
-- These are all valid, but point to built-in functions (e.g. require) that a la
-- carte doesn't display and since we have nothing to link to yet (can't
-- jump-to-def), we hide them from the current tags output.
nameBlacklist :: [Text]
nameBlacklist =
[ "require"
]
nameBlacklist = ["require"]
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name Call _ _ | name `elem` nameBlacklist = pure ()
yieldTag name kind loc range = do
yieldTag name kind loc srcLineRange = do
src <- ask @Source
Tags.yield (Tag name kind loc (Tags.firstLine src range) Nothing)
Tags.yield (Tag name kind loc (Tags.firstLine src srcLineRange) Nothing)
{- ORMOLU_DISABLE -}
instance ToTags Tsx.AbstractClassDeclaration
instance ToTags Tsx.AbstractMethodSignature
instance ToTags Tsx.AccessibilityModifier
@ -164,7 +154,7 @@ instance ToTags Tsx.ArrayPattern
instance ToTags Tsx.ArrayType
instance ToTags Tsx.ArrowFunction
instance ToTags Tsx.AsExpression
instance ToTags Tsx.AssignmentExpression
-- instance ToTags Tsx.AssignmentExpression
instance ToTags Tsx.AssignmentPattern
instance ToTags Tsx.AugmentedAssignmentExpression
instance ToTags Tsx.AwaitExpression
@ -259,7 +249,7 @@ instance ToTags Tsx.Object
instance ToTags Tsx.ObjectPattern
instance ToTags Tsx.ObjectType
instance ToTags Tsx.OptionalParameter
instance ToTags Tsx.Pair
-- instance ToTags Tsx.Pair
instance ToTags Tsx.ParenthesizedExpression
instance ToTags Tsx.ParenthesizedType
instance ToTags Tsx.PredefinedType
@ -312,3 +302,4 @@ instance ToTags Tsx.VariableDeclaration
instance ToTags Tsx.WhileStatement
instance ToTags Tsx.WithStatement
instance ToTags Tsx.YieldExpression
{- ORMOLU_ENABLE -}

View File

@ -26,7 +26,7 @@ common haskell
, parsers ^>= 0.12.10
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
@ -61,7 +61,7 @@ test-suite test
import: haskell
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
main-is: PreciseTest.hs
build-depends: base
, bytestring ^>= 0.10.8.2
, hedgehog >= 0.6 && <2

View File

@ -5,148 +5,145 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Language.TypeScript.Tags
( ToTags(..)
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
module Language.TypeScript.Tags
( ToTags (..),
)
where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Foldable
import Data.Text as Text
import qualified Language.TypeScript.AST as Ts
import Source.Loc
import Source.Source as Source
import Tags.Tag
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
class ToTags t where
tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
)
=> t Loc
-> m ()
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m
) =>
t Loc ->
m ()
default tags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
tags = gtags
instance ToTags Ts.Function where
tags t@Ts.Function
{ ann = loc@Loc { byteRange }
, name = Just Ts.Identifier { text }
} = yieldTag text Function loc byteRange >> gtags t
tags t@Ts.Function {ann = Loc {byteRange}, name = Just Ts.Identifier {text, ann}} =
yieldTag text Function ann byteRange >> gtags t
tags t = gtags t
instance ToTags Ts.FunctionSignature where
tags t@Ts.FunctionSignature
{ ann = loc@Loc { byteRange }
, name = Ts.Identifier { text }
} = yieldTag text Function loc byteRange >> gtags t
tags t@Ts.FunctionSignature {ann = Loc {byteRange}, name = Ts.Identifier {text, ann}} =
yieldTag text Function ann byteRange >> gtags t
instance ToTags Ts.FunctionDeclaration where
tags t@Ts.FunctionDeclaration
{ ann = loc@Loc { byteRange }
, name = Ts.Identifier { text }
} = yieldTag text Function loc byteRange >> gtags t
tags t@Ts.FunctionDeclaration {ann = Loc {byteRange}, name = Ts.Identifier {text, ann}} =
yieldTag text Function ann byteRange >> gtags t
instance ToTags Ts.MethodDefinition where
tags t@Ts.MethodDefinition
{ ann = loc@Loc { byteRange }
, name
} = case name of
Prj Ts.PropertyIdentifier { text } -> yield text
-- TODO: There are more here
_ -> gtags t
where
yield name = yieldTag name Method loc byteRange >> gtags t
tags t@Ts.MethodDefinition {ann = Loc {byteRange}, name} = case name of
Prj Ts.PropertyIdentifier {text, ann} -> yieldTag text Method ann byteRange >> gtags t
_ -> gtags t
instance ToTags Ts.Pair where
tags t@Ts.Pair {ann = Loc {byteRange}, key, value = Ts.Expression expr} = case (key, expr) of
(Prj Ts.PropertyIdentifier {text, ann}, Prj Ts.Function {}) -> yield text ann
(Prj Ts.PropertyIdentifier {text, ann}, Prj Ts.ArrowFunction {}) -> yield text ann
_ -> gtags t
where
yield text loc = yieldTag text Function loc byteRange >> gtags t
instance ToTags Ts.ClassDeclaration where
tags t@Ts.ClassDeclaration
{ ann = loc@Loc { byteRange }
, name = Ts.TypeIdentifier { text }
} = yieldTag text Class loc byteRange >> gtags t
tags t@Ts.ClassDeclaration {ann = Loc {byteRange}, name = Ts.TypeIdentifier {text, ann}} =
yieldTag text Class ann byteRange >> gtags t
instance ToTags Ts.CallExpression where
tags t@Ts.CallExpression
{ ann = loc@Loc { byteRange }
, function = Ts.Expression expr
} = match expr
tags t@Ts.CallExpression {ann = Loc {byteRange}, function = Ts.Expression expr} = match expr
where
match expr = case expr of
Prj Ts.Identifier { text } -> yield text
Prj Ts.NewExpression { constructor = Prj Ts.Identifier { text } } -> yield text
Prj Ts.CallExpression { function = Ts.Expression expr } -> match expr
Prj Ts.MemberExpression { property = Ts.PropertyIdentifier { text } } -> yield text
Prj Ts.Function { name = Just Ts.Identifier { text }} -> yield text
Prj Ts.ParenthesizedExpression { extraChildren } -> for_ extraChildren $ \ x -> case x of
Prj Ts.Identifier {text, ann} -> yield text ann
Prj Ts.NewExpression {constructor = Prj Ts.Identifier {text, ann}} -> yield text ann
Prj Ts.CallExpression {function = Ts.Expression expr} -> match expr
Prj Ts.MemberExpression {property = Ts.PropertyIdentifier {text, ann}} -> yield text ann
Prj Ts.Function {name = Just Ts.Identifier {text, ann}} -> yield text ann
Prj Ts.ParenthesizedExpression {extraChildren} -> for_ extraChildren $ \x -> case x of
Prj (Ts.Expression expr) -> match expr
_ -> tags x
_ -> tags x
_ -> gtags t
yield name = yieldTag name Call loc byteRange >> gtags t
yield name loc = yieldTag name Call loc byteRange >> gtags t
instance ToTags Ts.Class where
tags t@Ts.Class {ann = Loc {byteRange}, name = Just Ts.TypeIdentifier {text, ann}} =
yieldTag text Class ann byteRange >> gtags t
tags t = gtags t
instance ToTags Ts.Module where
tags t@Ts.Module
{ ann = loc@Loc { byteRange }
, name
} = match name
where
match expr = case expr of
Prj Ts.Identifier { text } -> yield text
-- TODO: Handle NestedIdentifiers and Strings
-- Prj Tsx.NestedIdentifier { extraChildren } -> match
_ -> gtags t
yield text = yieldTag text Module loc byteRange >> gtags t
tags t@Ts.Module {ann = Loc {byteRange}, name} = case name of
Prj Ts.Identifier {text, ann} -> yieldTag text Module ann byteRange >> gtags t
_ -> gtags t
instance ToTags Ts.VariableDeclarator where
tags t@Ts.VariableDeclarator
{ ann = loc@Loc { byteRange }
, name
, value = Just (Ts.Expression expr)
} = case (expr, name) of
(Prj Ts.Function{}, Prj Ts.Identifier { text }) -> yield text
(Prj Ts.ArrowFunction{}, Prj Ts.Identifier { text }) -> yield text
_ -> gtags t
tags t@Ts.VariableDeclarator {ann = Loc {byteRange}, name, value = Just (Ts.Expression expr)} =
case (expr, name) of
(Prj Ts.Function {}, Prj Ts.Identifier {text, ann}) -> yield text ann
(Prj Ts.ArrowFunction {}, Prj Ts.Identifier {text, ann}) -> yield text ann
_ -> gtags t
where
yield text = yieldTag text Function loc byteRange >> gtags t
yield text loc = yieldTag text Function loc byteRange >> gtags t
tags t = gtags t
instance ToTags Ts.AssignmentExpression where
tags t@Ts.AssignmentExpression {ann = Loc {byteRange}, left, right = (Ts.Expression expr)} =
case (left, expr) of
(Prj Ts.Identifier {text, ann}, Prj Ts.Function {}) -> yield text ann
(Prj Ts.Identifier {text, ann}, Prj Ts.ArrowFunction {}) -> yield text ann
(Prj Ts.MemberExpression {property = Ts.PropertyIdentifier {text, ann}}, Prj Ts.Function {}) -> yield text ann
(Prj Ts.MemberExpression {property = Ts.PropertyIdentifier {text, ann}}, Prj Ts.ArrowFunction {}) -> yield text ann
_ -> gtags t
where
yield text loc = yieldTag text Function loc byteRange >> gtags t
instance (ToTags l, ToTags r) => ToTags (l :+: r) where
tags (L1 l) = tags l
tags (R1 r) = tags r
instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags ::
( Has (Reader Source) sig m,
Has (Writer Tags.Tags) sig m,
Traversable1 ToTags t
) =>
t Loc ->
m ()
gtags = traverse1_ @ToTags (const (pure ())) tags
-- These are all valid, but point to built-in functions (e.g. require) that a la
-- carte doesn't display and since we have nothing to link to yet (can't
-- jump-to-def), we hide them from the current tags output.
nameBlacklist :: [Text]
nameBlacklist =
[ "require"
]
nameBlacklist = ["require"]
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name Call _ _ | name `elem` nameBlacklist = pure ()
yieldTag name kind loc range = do
yieldTag name kind loc srcLineRange = do
src <- ask @Source
Tags.yield (Tag name kind loc (Tags.firstLine src range) Nothing)
Tags.yield (Tag name kind loc (Tags.firstLine src srcLineRange) Nothing)
{- ORMOLU_DISABLE -}
instance ToTags Ts.AbstractClassDeclaration
instance ToTags Ts.AbstractMethodSignature
instance ToTags Ts.AccessibilityModifier
@ -157,7 +154,7 @@ instance ToTags Ts.ArrayPattern
instance ToTags Ts.ArrayType
instance ToTags Ts.ArrowFunction
instance ToTags Ts.AsExpression
instance ToTags Ts.AssignmentExpression
-- instance ToTags Ts.AssignmentExpression
instance ToTags Ts.AssignmentPattern
instance ToTags Ts.AugmentedAssignmentExpression
instance ToTags Ts.AwaitExpression
@ -166,7 +163,7 @@ instance ToTags Ts.BreakStatement
-- instance ToTags Ts.CallExpression
instance ToTags Ts.CallSignature
instance ToTags Ts.CatchClause
instance ToTags Ts.Class
-- instance ToTags Ts.Class
instance ToTags Ts.ClassBody
-- instance ToTags Ts.ClassDeclaration
instance ToTags Ts.ClassHeritage
@ -252,7 +249,7 @@ instance ToTags Ts.Object
instance ToTags Ts.ObjectPattern
instance ToTags Ts.ObjectType
instance ToTags Ts.OptionalParameter
instance ToTags Ts.Pair
-- instance ToTags Ts.Pair
instance ToTags Ts.ParenthesizedExpression
instance ToTags Ts.ParenthesizedType
instance ToTags Ts.PredefinedType
@ -306,3 +303,4 @@ instance ToTags Ts.VariableDeclaration
instance ToTags Ts.WhileStatement
instance ToTags Ts.WithStatement
instance ToTags Ts.YieldExpression
{- ORMOLU_ENABLE -}

View File

@ -1,7 +1,7 @@
cabal-version: 2.4
name: semantic
version: 0.10.0.0
version: 0.11.0.0
synopsis: Framework and executable for analyzing and diffing untrusted code.
description: Semantic is a library for parsing, analyzing, and comparing source code across many languages.
homepage: http://github.com/github/semantic#readme
@ -59,7 +59,7 @@ common dependencies
, fused-effects-exceptions ^>= 1
, fused-effects-resumable ^>= 0.1
, hashable >= 1.2.7 && < 1.4
, tree-sitter ^>= 0.9.0.0
, tree-sitter ^>= 0.9.0.1
, mtl ^>= 2.2.2
, network ^>= 2.8.0.0
, pathtype ^>= 0.8.1
@ -69,12 +69,12 @@ common dependencies
, safe-exceptions ^>= 0.1.7.0
, semantic-analysis ^>= 0
, semantic-ast
, semantic-source ^>= 0.0.2
, semantic-source ^>= 0.1.0
, semilattices ^>= 0.0.0.3
, streaming ^>= 0.2.2.0
, text ^>= 1.2.3.1
, unix ^>= 2.7.2.2
, lingo ^>= 0.3.0.0
, lingo ^>= 0.3.2.0
common executable-flags
ghc-options: -threaded -rtsopts "-with-rtsopts=-N -A4m -n2m"
@ -92,7 +92,6 @@ library
, Analysis.Abstract.Tracing
, Analysis.ConstructorName
, Analysis.CyclomaticComplexity
, Analysis.TOCSummary
, Analysis.Decorator
, Analysis.HasTextElement
, Analysis.PackageDef
@ -212,7 +211,6 @@ library
-- Rendering formats
, Rendering.Graph
, Rendering.JSON
, Rendering.TOC
-- High-level flow & operational functionality (logging, stats, etc.)
, Semantic.Analysis
-- API
@ -226,7 +224,6 @@ library
, Semantic.Api.StackGraph
, Semantic.Api.Symbols
, Semantic.Api.Terms
, Semantic.Api.TOCSummaries
, Semantic.CLI
, Semantic.Config
, Semantic.Distribute
@ -285,6 +282,7 @@ library
, semantic-json ^>= 0
, semantic-php ^>= 0
, semantic-python ^>= 0
, semantic-codeql ^>= 0
, semantic-ruby ^>= 0
, semantic-scope-graph ^>= 0
, semantic-tags ^>= 0
@ -304,7 +302,9 @@ library
, tree-sitter-json ^>= 0.7.0.0
, tree-sitter-php ^>= 0.4.0.0
, tree-sitter-python ^>= 0.9.0.1
, tree-sitter-ql ^>= 0.1.0.1
, tree-sitter-ruby ^>= 0.5.0.0
, tree-sitter-rust ^>= 0.1.0.0
, tree-sitter-typescript ^>= 0.5.0.0
, tree-sitter-tsx ^>= 0.5.0.0
@ -345,7 +345,6 @@ test-suite test
, Integration.Spec
, Numeric.Spec
, Parsing.Spec
, Rendering.TOC.Spec
, Semantic.Spec
, Semantic.CLI.Spec
, Semantic.IO.Spec

View File

@ -132,9 +132,8 @@ graphingModules recur m = do
where
-- NB: path is null for Languages like Ruby that have module imports that require concrete value semantics.
includeModule path
= let path' = if Prelude.null path then "unknown, concrete semantics required" else path
info = moduleInfo m
in moduleInclusion (moduleVertex (ModuleInfo path' (moduleLanguage info) (moduleOid info)))
= let info = moduleInfo m
in moduleInclusion (moduleVertex (ModuleInfo path (moduleLanguage info) (moduleOid info)))
-- | Add vertices to the graph for imported modules.
graphingModuleInfo :: ( Has (Reader ModuleInfo) sig m

View File

@ -1,159 +0,0 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ViewPatterns #-}
module Analysis.TOCSummary
( Declaration(..)
, formatIdentifier
, Kind(..)
, formatKind
, HasDeclaration
, declarationAlgebra
) where
import Data.Algebra
import Data.Blob
import qualified Data.Error as Error
import Data.Flag
import Data.Foldable (toList)
import Data.Language as Language
import Data.Sum
import qualified Data.Syntax as Syntax
import qualified Data.Syntax.Declaration as Declaration
import Data.Term
import Data.Text (Text)
import qualified Data.Text as T
import Source.Loc as Loc
import Source.Range
import Source.Source as Source
-- | A declarations identifier and type.
data Declaration = Declaration
{ kind :: Kind
, identifier :: Text
, span :: Span
, language :: Language
}
deriving (Eq, Show)
formatIdentifier :: Declaration -> Text
formatIdentifier (Declaration kind identifier _ lang) = case kind of
Method (Just receiver)
| Language.Go <- lang -> "(" <> receiver <> ") " <> identifier
| otherwise -> receiver <> "." <> identifier
_ -> identifier
data Kind
= Method (Maybe Text)
| Function
| Heading Int
| Error
deriving (Eq, Ord, Show)
formatKind :: Kind -> T.Text
formatKind = \case
Function -> "Function"
Method _ -> "Method"
Heading l -> "Heading " <> T.pack (show l)
Error -> "ParseError"
-- | An r-algebra producing 'Just' a 'Declaration' for syntax nodes corresponding to high-level declarations, or 'Nothing' otherwise.
--
-- Customizing this for a given syntax type involves two steps:
--
-- 1. Defining a @'HasDeclarationBy' ''Custom'@ instance for the type.
-- 2. Adding the type to the 'DeclarationStrategy' type family.
--
-- If youre getting errors about missing a @'HasDeclarationBy' ''Custom'@ instance for your syntax type, you probably forgot step 1.
--
-- If youre getting 'Nothing' for your syntax node at runtime, you probably forgot step 2.
declarationAlgebra :: (Foldable (Syntax term), HasDeclaration (Syntax term), IsTerm term)
=> Blob -> RAlgebra (TermF (Syntax term) Loc) (term Loc) (Maybe Declaration)
declarationAlgebra blob (In ann syntax) = toDeclaration blob ann syntax
-- | Types for which we can produce a 'Declaration' in 'Maybe'. There is exactly one instance of this typeclass; adding customized 'Declaration's for a new type is done by defining an instance of @'HasDeclarationBy' ''Custom'@ instead.
--
-- This typeclass employs the Advanced Overlap techniques designed by Oleg Kiselyov & Simon Peyton Jones: https://wiki.haskell.org/GHC/AdvancedOverlap.
class HasDeclaration syntax where
-- | Compute a 'Declaration' for a syntax type using its @'HasDeclarationBy' ''Custom'@ instance, if any, or else falling back to the default definition (which simply returns 'Nothing').
toDeclaration :: (Foldable (Syntax term), IsTerm term) => Blob -> Loc -> syntax (term Loc, Maybe Declaration) -> Maybe Declaration
-- | Define 'toDeclaration' using the @'HasDeclarationBy' ''Custom'@ instance for a type if there is one or else use the default definition.
--
-- This instance determines whether or not there is an instance for @syntax@ by looking it up in the 'DeclarationStrategy' type family. Thus producing a 'Declaration' for a node requires both defining a @'HasDeclarationBy' ''Custom'@ instance _and_ adding a definition for the type to the 'DeclarationStrategy' type family to return 'Custom'.
--
-- Note that since 'DeclarationStrategy' has a fallback case for its final entry, this instance will hold for all types of kind @* -> *@. Thus, this must be the only instance of 'HasDeclaration', as any other instance would be indistinguishable.
instance (DeclarationStrategy syntax ~ strategy, HasDeclarationBy strategy syntax) => HasDeclaration syntax where
toDeclaration = toDeclarationBy @strategy
-- | Produce a 'Declaration' for a syntax node using either the 'Default' or 'Custom' strategy.
class HasDeclarationBy (strategy :: Strategy) syntax where
toDeclarationBy :: (Foldable (Syntax term), IsTerm term) => Blob -> Loc -> syntax (term Loc, Maybe Declaration) -> Maybe Declaration
-- | The 'Default' strategy produces 'Nothing'.
instance HasDeclarationBy 'Default syntax where
toDeclarationBy _ _ _ = Nothing
-- | Produce an 'Error' for 'Syntax.Error' nodes.
instance HasDeclarationBy 'Custom Syntax.Error where
toDeclarationBy blob@Blob{..} ann err@Syntax.Error{}
= Just $ Declaration Error (T.pack (formatTOCError (Syntax.unError (Loc.span ann) err))) (Loc.span ann) (blobLanguage blob)
where formatTOCError e = Error.showExpectation (flag Error.Colourize False) (Error.errorExpected e) (Error.errorActual e) ""
-- | Produce a 'Function' for 'Declaration.Function' nodes so long as their identifier is non-empty (defined as having a non-empty 'Range').
instance HasDeclarationBy 'Custom Declaration.Function where
toDeclarationBy blob@Blob{..} ann (Declaration.Function _ (termAnnotation -> identifierAnn, _) _ _)
-- Do not summarize anonymous functions
| isEmpty identifierAnn = Nothing
-- Named functions
| otherwise = Just $ Declaration Function (getSource blobSource identifierAnn) (Loc.span ann) (blobLanguage blob)
where isEmpty = (== 0) . rangeLength . byteRange
-- | Produce a 'Method' for 'Declaration.Method' nodes. If the methods receiver is non-empty (defined as having a non-empty 'Range'), the 'identifier' will be formatted as 'receiver.method_name'; otherwise it will be simply 'method_name'.
instance HasDeclarationBy 'Custom Declaration.Method where
toDeclarationBy blob@Blob{..} ann (Declaration.Method _ (toTermF -> In receiverAnn receiverF, _) (termAnnotation -> identifierAnn, _) _ _ _)
-- Methods without a receiver
| isEmpty receiverAnn = Just $ Declaration (Method Nothing) (getSource blobSource identifierAnn) (Loc.span ann) (blobLanguage blob)
-- Methods with a receiver type and an identifier (e.g. (a *Type) in Go).
| blobLanguage blob == Go
, [ _, termAnnotation -> receiverType ] <- toList receiverF = Just $ Declaration (Method (Just (getSource blobSource receiverType))) (getSource blobSource identifierAnn) (Loc.span ann) (blobLanguage blob)
-- Methods with a receiver (class methods) are formatted like `receiver.method_name`
| otherwise = Just $ Declaration (Method (Just (getSource blobSource receiverAnn))) (getSource blobSource identifierAnn) (Loc.span ann) (blobLanguage blob)
where
isEmpty = (== 0) . rangeLength . byteRange
getSource :: Source -> Loc -> Text
getSource blobSource = toText . Source.slice blobSource . byteRange
-- | Produce a 'Declaration' for 'Sum's using the 'HasDeclaration' instance & therefore using a @'HasDeclarationBy' ''Custom'@ instance when one exists & the type is listed in 'DeclarationStrategy'.
instance Apply HasDeclaration fs => HasDeclarationBy 'Custom (Sum fs) where
toDeclarationBy blob ann = apply @HasDeclaration (toDeclaration blob ann)
-- | A strategy for defining a 'HasDeclaration' instance. Intended to be promoted to the kind level using @-XDataKinds@.
data Strategy = Default | Custom
-- | A predicate on syntax types selecting either the 'Custom' or 'Default' strategy.
--
-- Only entries for which we want to use the 'Custom' strategy should be listed, with the exception of the final entry which maps all other types onto the 'Default' strategy.
--
-- If youre seeing errors about missing a @'HasDeclarationBy' ''Custom'@ instance for a given type, youve probably listed it in here but not defined a @'HasDeclarationBy' ''Custom'@ instance for it, or else youve listed the wrong type in here. Conversely, if your @'HasDeclarationBy' ''Custom'@ method is never being called, you may have forgotten to list the type in here.
type family DeclarationStrategy syntax where
DeclarationStrategy Declaration.Function = 'Custom
DeclarationStrategy Declaration.Method = 'Custom
DeclarationStrategy Syntax.Error = 'Custom
DeclarationStrategy (Sum _) = 'Custom
DeclarationStrategy _ = 'Default

View File

@ -77,7 +77,7 @@ import Data.Semilattice.Lower
import Data.Set (Set)
import GHC.Generics (Generic1)
import GHC.Stack
import Source.Span (Span)
import Source.Span (Pos (..), Span, point)
-- | Evaluates an action locally the scope and frame of the given frame address.
@ -191,7 +191,7 @@ define :: ( HasCallStack
-> Evaluator term address value m ()
define declaration rel accessControl def = withCurrentCallStack callStack $ do
-- TODO: This span is still wrong.
declare declaration rel accessControl lowerBound Unknown Nothing
declare declaration rel accessControl (point (Pos 1 1)) Unknown Nothing
slot <- lookupSlot declaration
value <- def
assign slot value

View File

@ -40,12 +40,11 @@ import Control.Monad.IO.Class
import Data.Foldable
import Data.Functor.Classes
import Data.Maybe.Exts
import Data.Semilattice.Lower
import Data.Set (Set)
import qualified Data.Set as Set
import GHC.Generics (Generic1)
import Source.Span
import System.FilePath.Posix (takeDirectory)
import qualified System.Path as Path
import Control.Abstract.Evaluator
import Data.Abstract.BaseError
@ -63,10 +62,10 @@ lookupModule :: Has (Modules address value) sig m => ModulePath -> Evaluator ter
lookupModule = sendModules . flip Lookup pure
-- | Resolve a list of module paths to a possible module table entry.
resolve :: Has (Modules address value) sig m => [FilePath] -> Evaluator term address value m (Maybe ModulePath)
resolve :: Has (Modules address value) sig m => [Path.AbsRelFile] -> Evaluator term address value m (Maybe ModulePath)
resolve = sendModules . flip Resolve pure
listModulesInDir :: Has (Modules address value) sig m => FilePath -> Evaluator term address value m [ModulePath]
listModulesInDir :: Has (Modules address value) sig m => Path.AbsRelDir -> Evaluator term address value m [ModulePath]
listModulesInDir = sendModules . flip List pure
@ -86,8 +85,8 @@ load path = sendModules (Load path pure)
data Modules address value (m :: * -> *) k
= Load ModulePath (ModuleResult address value -> m k)
| Lookup ModulePath (Maybe (ModuleResult address value) -> m k)
| Resolve [FilePath] (Maybe ModulePath -> m k)
| List FilePath ([ModulePath] -> m k)
| Resolve [Path.AbsRelFile] (Maybe ModulePath -> m k)
| List Path.AbsRelDir ([ModulePath] -> m k)
deriving (Functor, Generic1)
instance HFunctor (Modules address value)
@ -117,7 +116,7 @@ instance ( Has (Reader (ModuleTable (Module (ModuleResult address value)))) sig
Load name k -> askModuleTable >>= maybeM (throwLoadError (ModuleNotFoundError name)) . fmap moduleBody . ModuleTable.lookup name >>= k
Lookup path k -> askModuleTable >>= k . fmap moduleBody . ModuleTable.lookup path
Resolve names k -> k (find (`Set.member` paths) names)
List dir k -> k (filter ((dir ==) . takeDirectory) (toList paths))
List dir k -> k (filter ((dir ==) . Path.takeDirectory) (toList paths))
alg (R other) = ModulesC (alg (R (handleCoercible other)))
askModuleTable :: Has (Reader (ModuleTable (Module (ModuleResult address value)))) sig m => m (ModuleTable (Module (ModuleResult address value)))
@ -147,18 +146,20 @@ runLoadErrorWith f = raiseHandler $ With.runResumable (runEvaluator . f)
throwLoadError :: Has (Resumable (BaseError (LoadError address value))) sig m
=> LoadError address value resume
-> m resume
throwLoadError err@(ModuleNotFoundError name) = throwResumable $ BaseError (ModuleInfo name "Unknown" mempty) lowerBound err
throwLoadError err@(ModuleNotFoundError name) = throwResumable $ BaseError (ModuleInfo name "Unknown" mempty) (point (Pos 1 1)) err
-- TODO: Might be able to get rest of ModuleInfo from the env ^.
-- | An error thrown when we can't resolve a module from a qualified name.
data ResolutionError resume where
NotFoundError :: String -- The path that was not found.
-> [String] -- List of paths searched that shows where semantic looked for this module.
NotFoundError :: Path.AbsRelFileDir -- The path that was not found.
-> [Path.AbsRelFile] -- List of paths searched that shows where semantic looked for this module.
-> Language -- Language.
-> ResolutionError ModulePath
GoImportError :: FilePath -> ResolutionError [ModulePath]
-- Go Lang may have its package import path as an uri like https://github.com/packagename rather than an file path
-- TODO: A typed path can be used here to represent the uri
GoImportError :: String -> ResolutionError [ModulePath]
deriving instance Eq (ResolutionError b)
deriving instance Show (ResolutionError b)

View File

@ -18,9 +18,9 @@ import Data.Abstract.BaseError
import qualified Data.Abstract.ScopeGraph as ScopeGraph
import Data.Map.Strict as Map
import Data.Maybe
import Data.Semilattice.Lower
import Data.Traversable
import GHC.Stack
import Source.Span (Pos (..), point)
defineBuiltIn :: ( HasCallStack
, Has (Deref value) sig m
@ -47,11 +47,11 @@ defineBuiltIn declaration rel accessControl value = withCurrentCallStack callSta
let lexicalEdges = Map.singleton Lexical [ currentScope' ]
associatedScope <- newPreludeScope lexicalEdges
-- TODO: This span is still wrong.
declare declaration rel accessControl lowerBound ScopeGraph.Unknown (Just associatedScope)
declare declaration rel accessControl (point (Pos 1 1)) ScopeGraph.Unknown (Just associatedScope)
withScope associatedScope $ do
param <- gensym
declare (Declaration param) ScopeGraph.Gensym accessControl lowerBound ScopeGraph.Unknown Nothing
declare (Declaration param) ScopeGraph.Gensym accessControl (point (Pos 1 1)) ScopeGraph.Unknown Nothing
slot <- lookupSlot declaration
value <- builtIn associatedScope value

View File

@ -71,10 +71,14 @@ runParser blob@Blob{..} parser = case parser of
config <- asks config
executeParserAction (parseToAST (configTreeSitterParseTimeout config) language blob)
UnmarshalParser language ->
time "parse.tree_sitter_precise_ast_parse" languageTag $ do
UnmarshalParser language -> do
(time "parse.tree_sitter_precise_ast_parse" languageTag $ do
config <- asks config
executeParserAction (parseToPreciseAST (configTreeSitterParseTimeout config) (configTreeSitterUnmarshalTimeout config) language blob)
executeParserAction (parseToPreciseAST (configTreeSitterParseTimeout config) (configTreeSitterUnmarshalTimeout config) language blob))
`catchError` (\(SomeException e) -> do
writeStat (increment "parse.precise_ast_parse_failures" languageTag)
writeLog Error "precise parsing failed" (("task", "parse") : ("exception", "\"" <> displayException e <> "\"") : languageTag)
throwError (SomeException e))
AssignmentParser parser assignment -> runAssignment Assignment.assign parser blob assignment
@ -112,7 +116,7 @@ runAssignment assign parser blob@Blob{..} assignment = do
let requestID' = ("github_request_id", requestID taskSession)
let isPublic' = ("github_is_public", show (isPublic taskSession))
let logPrintFlag = configLogPrintSource . config $ taskSession
let blobFields = ("path", if isPublic taskSession || Flag.toBool LogPrintSource logPrintFlag then blobPath blob else "<filtered>")
let blobFields = ("path", if isPublic taskSession || Flag.toBool LogPrintSource logPrintFlag then blobFilePath blob else "<filtered>")
let logFields = requestID' : isPublic' : blobFields : languageTag
let shouldFailForTesting = configFailParsingForTesting $ config taskSession
let shouldFailOnParsing = optionsFailOnParseError . configOptions $ config taskSession

View File

@ -12,13 +12,15 @@ import Control.Abstract.Evaluator
import qualified Data.Abstract.Module as M
import Data.Functor.Classes
import qualified Source.Span as S
import qualified System.Path as Path
data BaseError (exc :: * -> *) resume = BaseError { baseErrorModuleInfo :: ModuleInfo, baseErrorSpan :: Span, baseErrorException :: exc resume }
instance (Show (exc resume)) => Show (BaseError exc resume) where
showsPrec _ BaseError{..} = shows baseErrorException <> showString " " <> showString errorLocation
where errorLocation | startErrorLine == endErrorLine = M.modulePath baseErrorModuleInfo <> " " <> startErrorLine <> ":" <> startErrorCol <> "-" <> endErrorCol
| otherwise = M.modulePath baseErrorModuleInfo <> " " <> startErrorLine <> ":" <> startErrorCol <> "-" <> endErrorLine <> ":" <> endErrorCol
where errorLocation | startErrorLine == endErrorLine = baseModuleFilePath <> " " <> startErrorLine <> ":" <> startErrorCol <> "-" <> endErrorCol
| otherwise = baseModuleFilePath <> " " <> startErrorLine <> ":" <> startErrorCol <> "-" <> endErrorLine <> ":" <> endErrorCol
baseModuleFilePath = Path.toString $ M.modulePath baseErrorModuleInfo
startErrorLine = show $ S.line (S.start baseErrorSpan)
endErrorLine = show $ S.line (S.end baseErrorSpan)
startErrorCol = show $ S.column (S.start baseErrorSpan)

View File

@ -35,11 +35,10 @@ import Data.Functor.Classes
import Data.List.NonEmpty (nonEmpty)
import Data.Scientific (Scientific)
import Data.Semigroup.Foldable
import Data.Semilattice.Lower
import Data.Sum
import Data.Text
import GHC.Stack
import Source.Span (HasSpan (..))
import Source.Span (HasSpan (..), Pos (..), point)
import Analysis.Name as X
import Control.Abstract hiding (Load, String)
@ -229,7 +228,7 @@ defineSelf :: ( Has (State (ScopeGraph address)) sig m
=> Evaluator term address value m ()
defineSelf = do
let self = Declaration __self
declare self ScopeGraph.Prelude Public lowerBound ScopeGraph.Unknown Nothing
declare self ScopeGraph.Prelude Public (point (Pos 1 1)) ScopeGraph.Unknown Nothing
slot <- lookupSlot self
assign slot =<< object =<< currentFrame

View File

@ -20,7 +20,7 @@ import qualified Data.Map as Map
import Data.Semilattice.Lower
import Data.Set (Set)
import Prelude hiding (lookup)
import System.FilePath.Posix
import qualified System.Path as Path
newtype ModuleTable a = ModuleTable { unModuleTable :: Map.Map ModulePath a }
deriving (Eq, Foldable, Functor, Lower, Monoid, Ord, Semigroup, Traversable)
@ -32,7 +32,7 @@ modulePaths :: ModuleTable a -> Set ModulePath
modulePaths = Map.keysSet . unModuleTable
modulePathsInDir :: FilePath -> ModuleTable a -> [ModulePath]
modulePathsInDir k = filter (\e -> k == takeDirectory e) . Map.keys . unModuleTable
modulePathsInDir k = filter (\e -> Path.absRel k == Path.takeDirectory e) . Map.keys . unModuleTable
lookup :: ModulePath -> ModuleTable a -> Maybe a
lookup k = Map.lookup k . unModuleTable

View File

@ -6,7 +6,8 @@ module Data.Abstract.Path
import Data.Text (Text)
import qualified Data.Text as T
import System.FilePath.Posix
import qualified System.Path as Path
import System.Path.PartClass (FileDir(..))
-- | Join two paths a and b. Handles walking up relative directories in b. e.g.
--
@ -14,13 +15,31 @@ import System.FilePath.Posix
-- joinPaths "a/b" "./c" == "a/b/c"
--
-- Walking beyond the beginning of a just stops when you get to the root of a.
joinPaths :: FilePath -> FilePath -> FilePath
joinPaths a b = let bs = splitPath (normalise b)
n = length (filter (== "../") bs)
in normalise $ walkup n a </> joinPath (drop n bs)
joinPaths :: FileDir fd => Path.AbsRelDir -> Path.Rel fd -> Path.AbsRel fd
joinPaths = runJP $ switchFileDir (JP joinFilePaths) (JP joinDirPaths) (JP joinFDPaths)
newtype JP fd = JP {runJP :: Path.AbsRelDir -> Path.Rel fd -> Path.AbsRel fd }
joinDirPaths :: Path.AbsRelDir -> Path.RelDir -> Path.AbsRelDir
joinDirPaths x y = result isAbs
where
walkup 0 str = str
walkup n str = walkup (pred n) (takeDirectory str)
(isAbs, rels, _) = Path.splitPath (Path.normalise $ x Path.</> y)
(_, fRel) = foldr go (0, Path.currentDir) rels
go :: Path.RelDir -> (Integer, Path.RelDir) -> (Integer, Path.RelDir)
go rel (i, r)
| rel == Path.rel ".." = (i + 1, r)
| i == 0 = (0, rel Path.</> r)
| otherwise = (i - 1, r)
result True = Path.toAbsRel $ Path.rootDir Path.</> fRel
result False = Path.toAbsRel $ fRel
joinFilePaths :: Path.AbsRelDir -> Path.RelFile -> Path.AbsRelFile
joinFilePaths x y = let (d, f) = Path.splitFileName y in joinDirPaths x d Path.</> f
joinFDPaths :: Path.AbsRelDir -> Path.RelFileDir -> Path.AbsRelFileDir
joinFDPaths x = Path.toFileDir . joinDirPaths x . Path.dirFromFileDir
stripQuotes :: Text -> Text
stripQuotes = T.dropAround (`elem` ("\'\"" :: String))

View File

@ -32,12 +32,12 @@ import Data.Bifunctor
import qualified Data.ByteString.Lazy as BL
import Data.Edit
import Data.JSON.Fields
import Data.Maybe
import Data.Maybe.Exts
import Data.Module
import Data.List (stripPrefix)
import GHC.Generics (Generic)
import Source.Language as Language
import qualified System.FilePath as FP
import qualified System.Path as Path
newtype Blobs a = Blobs { blobs :: [a] }
@ -47,10 +47,10 @@ decodeBlobs :: BL.ByteString -> Either String [Blob]
decodeBlobs = fmap blobs <$> eitherDecode
-- | An exception indicating that weve tried to diff or parse a blob of unknown language.
newtype NoLanguageForBlob = NoLanguageForBlob FilePath
newtype NoLanguageForBlob = NoLanguageForBlob Path.AbsRelFile
deriving (Eq, Exception, Ord, Show)
noLanguageForBlob :: Has (Error SomeException) sig m => FilePath -> m a
noLanguageForBlob :: Has (Error SomeException) sig m => Path.AbsRelFile -> m a
noLanguageForBlob blobPath = throwError (SomeException (NoLanguageForBlob blobPath))
-- | Construct a 'Module' for a 'Blob' and @term@, relative to some root 'FilePath'.
@ -59,8 +59,16 @@ moduleForBlob :: Maybe FilePath -- ^ The root directory relative to which the mo
-> term -- ^ The @term@ representing the body of the module.
-> Module term -- ^ A 'Module' named appropriate for the 'Blob', holding the @term@, and constructed relative to the root 'FilePath', if any.
moduleForBlob rootDir b = Module info
where root = fromMaybe (FP.takeDirectory (blobPath b)) rootDir
info = ModuleInfo (FP.makeRelative root (blobPath b)) (languageToText (blobLanguage b)) mempty
where root = maybe (Path.takeDirectory $ blobPath b) Path.absRel rootDir
info = ModuleInfo (dropRelative root (blobPath b)) (languageToText (blobLanguage b)) mempty
dropRelative :: Path.AbsRelDir -> Path.AbsRelFile -> Path.AbsRelFile
dropRelative a' b' = case as `stripPrefix` bs of
Just rs | ra == rb -> Path.toAbsRel $ (foldl (Path.</>) Path.currentDir rs) Path.</> bf
_ -> b'
where (ra, as, _) = Path.splitPath $ Path.normalise a'
(rb, bs, _) = Path.splitPath $ Path.normalise $ Path.takeDirectory b'
bf = Path.takeFileName b'
-- | Represents a blobs suitable for diffing which can be either a blob to
-- delete, a blob to insert, or a pair of blobs to diff.
@ -80,7 +88,7 @@ languageForBlobPair = mergeEdit combine . bimap blobLanguage blobLanguage where
| a == Unknown || b == Unknown = Unknown
| otherwise = b
pathForBlobPair :: BlobPair -> FilePath
pathForBlobPair :: BlobPair -> Path.AbsRelFile
pathForBlobPair = blobPath . mergeEdit (const id)
languageTagForBlobPair :: BlobPair -> [(String, String)]
@ -88,12 +96,12 @@ languageTagForBlobPair pair = showLanguage (languageForBlobPair pair)
where showLanguage = pure . (,) "language" . show
pathKeyForBlobPair :: BlobPair -> FilePath
pathKeyForBlobPair = mergeEdit combine . bimap blobPath blobPath where
pathKeyForBlobPair = mergeEdit combine . bimap blobFilePath blobFilePath where
combine before after | before == after = after
| otherwise = before <> " -> " <> after
instance ToJSONFields Blob where
toJSONFields p = [ "path" .= blobPath p, "language" .= blobLanguage p]
toJSONFields p = [ "path" .= blobFilePath p, "language" .= blobLanguage p]
decodeBlobPairs :: BL.ByteString -> Either String [BlobPair]
decodeBlobPairs = fmap blobs <$> eitherDecode

View File

@ -5,7 +5,6 @@ module Data.Blob.IO
( readBlobFromFile
, readBlobFromFile'
, readBlobFromPath
, readBlobsFromDir
, readFilePair
, readProjectFromPaths
) where
@ -13,15 +12,14 @@ module Data.Blob.IO
import Analysis.Blob
import Analysis.File as File
import Analysis.Project
import qualified Control.Concurrent.Async as Async
import Control.Monad.IO.Class
import Data.Blob
import qualified Data.ByteString as B
import Data.Language
import Data.Maybe.Exts
import Data.Semilattice.Lower
import Semantic.IO
import qualified Source.Source as Source
import Source.Span
import qualified System.Path as Path
-- | Deprecated: this has very weird semantics.
@ -44,9 +42,9 @@ readProjectFromPaths maybeRoot path lang excludeDirs = do
paths <- liftIO $ findFilesInDir rootDir exts excludeDirs
blobs <- liftIO $ traverse (readBlobFromFile' . toFile) paths
pure $ Project (Path.toString rootDir) blobs lang (fmap Path.toString excludeDirs)
pure $ Project rootDir blobs lang excludeDirs
where
toFile path = File path lowerBound lang
toFile path = File path (point (Pos 1 1)) lang
exts = extensionsForLanguage lang
@ -68,11 +66,6 @@ readBlobFromFile' file = do
readBlobFromPath :: (MonadFail m, MonadIO m) => Path.AbsRelFile -> m Blob
readBlobFromPath = readBlobFromFile' . File.fromPath
-- | Read all blobs in the directory with Language.supportedExts.
readBlobsFromDir :: MonadIO m => Path.AbsRelDir -> m [Blob]
readBlobsFromDir path = liftIO . fmap catMaybes $
findFilesInDir path supportedExts mempty >>= Async.mapConcurrently (readBlobFromFile . File.fromPath)
readFilePair :: MonadIO m => File Language -> File Language -> m BlobPair
readFilePair a b = do
before <- readBlobFromFile a

View File

@ -59,7 +59,7 @@ formatError includeSource colourize blob@Blob{..} Error{..}
. (if Flag.toBool LogPrintSource includeSource then showExcerpt colourize errorSpan blob else id)
. showCallStack colourize callStack . showChar '\n'
where
path = Just $ if Flag.toBool LogPrintSource includeSource then blobPath blob else "<filtered>"
path = Just $ if Flag.toBool LogPrintSource includeSource then blobFilePath blob else "<filtered>"
showExcerpt :: Flag Colourize -> Span -> Blob -> ShowS
showExcerpt colourize Span{..} Blob{..}

View File

@ -45,6 +45,7 @@ import GHC.Generics (V1)
import Prelude hiding (span)
import qualified Source.Loc as Loc
import Source.Span
import qualified System.Path as Path
-- | A vertex of representing some node in a control flow graph.
data ControlFlowVertex
@ -60,19 +61,19 @@ packageVertex :: PackageInfo -> ControlFlowVertex
packageVertex (PackageInfo name _) = Package (formatName name)
moduleVertex :: ModuleInfo -> ControlFlowVertex
moduleVertex = Module . T.pack . modulePath
moduleVertex = Module . T.pack . Path.toString . modulePath
unknownModuleVertex :: ModuleInfo -> ControlFlowVertex
unknownModuleVertex = UnknownModule . T.pack . modulePath
unknownModuleVertex = UnknownModule . T.pack . Path.toString . modulePath
variableVertex :: Text -> ModuleInfo -> Span -> ControlFlowVertex
variableVertex name ModuleInfo{..} = Variable name (T.pack modulePath)
variableVertex name ModuleInfo{..} = Variable name (T.pack $ Path.toString modulePath)
methodVertex :: Text -> ModuleInfo -> Span -> ControlFlowVertex
methodVertex name ModuleInfo{..} = Method name (T.pack modulePath)
methodVertex name ModuleInfo{..} = Method name (T.pack $ Path.toString modulePath)
functionVertex :: Text -> ModuleInfo -> Span -> ControlFlowVertex
functionVertex name ModuleInfo{..} = Function name (T.pack modulePath)
functionVertex name ModuleInfo{..} = Function name (T.pack $ Path.toString modulePath)
vertexIdentifier :: ControlFlowVertex -> Text
vertexIdentifier v = case v of

View File

@ -5,26 +5,9 @@ module Data.Language
, defaultLanguageModes
, preciseLanguageModes
, aLaCarteLanguageModes
, codeNavLanguages
, supportedExts
) where
import qualified Data.Languages as Lingo
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import Source.Language
codeNavLanguages :: [Language]
codeNavLanguages = [Go, Java, Ruby, Python, JavaScript, TypeScript, PHP]
supportedExts :: [String]
supportedExts = foldr append mempty supportedLanguages
where
append (Just l) b = fmap T.unpack (Lingo.languageExtensions l) <> b
append Nothing b = b
supportedLanguages = fmap lookup (languageToText <$> codeNavLanguages)
lookup k = Map.lookup k Lingo.languages
import Source.Language
data PerLanguageModes = PerLanguageModes
{ pythonMode :: LanguageMode

View File

@ -18,7 +18,6 @@ import Data.Functor.Classes.Generic
import Data.Hashable.Lifted
import qualified Data.Map.Strict as Map
import Data.Maybe.Exts
import Data.Semilattice.Lower
import qualified Data.Set as Set
import Data.Traversable
import GHC.Generics (Generic1)
@ -106,7 +105,7 @@ instance Evaluatable Method where
params <- withScope associatedScope $ do
-- TODO: Should we give `self` a special Relation?
declare (Declaration __self) ScopeGraph.Prelude ScopeGraph.Public lowerBound ScopeGraph.Unknown Nothing
declare (Declaration __self) ScopeGraph.Prelude ScopeGraph.Public (point (Pos 1 1)) ScopeGraph.Unknown Nothing
for methodParameters $ \paramNode -> declareMaybeName (declaredName paramNode) Default ScopeGraph.Public (paramNode^.span_) ScopeGraph.Parameter Nothing
addr <- lookupSlot (Declaration name)

View File

@ -15,6 +15,7 @@ import qualified Data.Text as T
import Diffing.Algorithm
import GHC.Generics (Generic1)
import Source.Span
import qualified System.Path as Path
-- A file directive like the Ruby constant `__FILE__`.
data File a = File
@ -25,7 +26,7 @@ instance Ord1 File where liftCompare = genericLiftCompare
instance Show1 File where liftShowsPrec = genericLiftShowsPrec
instance Evaluatable File where
eval _ _ File = currentModule >>= string . T.pack . modulePath
eval _ _ File = currentModule >>= string . T.pack . Path.toString . modulePath
-- A line directive like the Ruby constant `__LINE__`.

Some files were not shown because too many files have changed in this diff Show More