1
1
mirror of https://github.com/github/semantic.git synced 2024-11-27 12:57:49 +03:00

Merge remote-tracking branch 'origin/master' into precise-parsing-by-default

This commit is contained in:
Patrick Thomson 2020-02-12 19:17:09 -05:00
commit 764e172b98
252 changed files with 5321 additions and 2257 deletions

View File

@ -37,7 +37,7 @@ jobs:
name: Cache ~/.cabal/store
with:
path: ~/.cabal/store
key: ${{ runner.os }}-${{ matrix.ghc }}-v3-cabal-store
key: ${{ runner.os }}-${{ matrix.ghc }}-v9-cabal-store
- uses: actions/cache@v1
name: Cache dist-newstyle
@ -52,7 +52,7 @@ jobs:
- name: Install dependencies
run: |
cabal v2-update
script/bootstrap
cabal v2-configure --project-file=cabal.project.ci --disable-optimization --enable-benchmarks --enable-tests --write-ghc-environment-files=always -j2
cabal v2-build --project-file=cabal.project.ci all --only-dependencies

21
.gitmodules vendored
View File

@ -0,0 +1,21 @@
[submodule "semantic-json/vendor/tree-sitter-json"]
path = semantic-json/vendor/tree-sitter-json
url = https://github.com/tree-sitter/tree-sitter-json.git
[submodule "semantic-python/vendor/tree-sitter-python"]
path = semantic-python/vendor/tree-sitter-python
url = https://github.com/tree-sitter/tree-sitter-python.git
[submodule "semantic-java/vendor/tree-sitter-java"]
path = semantic-java/vendor/tree-sitter-java
url = https://github.com/tree-sitter/tree-sitter-java.git
[submodule "semantic-go/vendor/tree-sitter-go"]
path = semantic-go/vendor/tree-sitter-go
url = https://github.com/tree-sitter/tree-sitter-go.git
[submodule "semantic-ruby/vendor/tree-sitter-ruby"]
path = semantic-ruby/vendor/tree-sitter-ruby
url = https://github.com/tree-sitter/tree-sitter-ruby.git
[submodule "semantic-typescript/vendor/tree-sitter-typescript"]
path = semantic-typescript/vendor/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript.git
[submodule "semantic-tsx/vendor/tree-sitter-typescript"]
path = semantic-tsx/vendor/tree-sitter-typescript
url = https://github.com/tree-sitter/tree-sitter-typescript.git

29
HACKING.md Normal file
View File

@ -0,0 +1,29 @@
# Effective `semantic` Hacking for Fun and Profit
The Semantic repository is a large one, containing dozens of subprojects. This means that GHC has to do a lot of work when compiling. For this reason, it's important to keep in mind the principles that will let you avoid recompiling the whole world as soon as you change a single .cabal file.
## The Landscape
We officially recommend [Visual Studio Code](https://code.visualstudio.com) with the [`ghcide`](https://marketplace.visualstudio.com/items?itemName=DigitalAssetHoldingsLLC.ghcide) extension. Though our tooling scripts may work with other editor integration solutions, we can't guarantee that they'll do so indefinitely.
## Things to Do
1. *Use `script/repl`.* The REPL script is much more powerful than `cabal repl`; it ensures that all packages can be loaded (including tests), so you should be able to `:load` any on-disk package that you want—and you shouldn't have to restart the REPL every time you add a new file, as GHCi will optimistically read from any `import` statements it encounters. Keep in mind that `:load` accepts both file paths and module identifiers.
2. *Use the editor integration.* There is no substitute for a workflow that allows you to fix errors without switching applications. If you're using tooling other than VS Code and `ghcide`, we recommend you configure its GHCi process to be `script/repl`.
3. *Run tests in the REPL.* Unlike `cabal repl`, all the testing packages are loaded into the REPL, so you can `:load` a path to a test file and invoke the relevant test with `main`. This will enable the fastest fix/build/test cycle possible. It may take some time to get used to avoiding `cabal test`. If all you're wanting to see is if the `semantic` CLI tool builds correctly, `:load src/Semantic/CLI.hs`.
4. *If you have to build, be sure to disable optimizations and parallelize aggressively.* `cabal` builds with `-O1` on by default; this entails a significant hit to compile speed. If you find yourself building some product repeatedly, add `optimizations: False`.
5. *Turn on stylish-haskell integration.* Most editors are capable of running Haskell code through `stylish-haskell` on save; enabling this does wonders towards keeping your code in compliance with our style guide, frees you from having to fret over the minor details of how something should be formatted, and saves us time in the review process. The VSCode extension for `stylish-haskell` can be found here.
## Things to Avoid
1. *Don't `cabal clean`*. `cabal clean` doesn't take any arguments that determine what to clean; as such, running it will clean everything, including the language ASTs, which take some time to recompile.
2. *Don't `cabal configure` if humanly possible*. It nukes all your build caches. Should you need to modify a global build setting, edit `cabal.project.local` manually.
3. *Write small modules with minimal dependencies.* Keep the code that deals with language ASTs well-isolated.
4. *Avoid fancy type tricks if possible.* Techniques like [advanced overlap](https://wiki.haskell.org/GHC/AdvancedOverlap) can save on boilerplate but may not be worth the pain it puts the type checker through. If the only downside to avoiding a fancy type trick is some boilerplate, consider that boilerplate is often preferable to slowing down everyone's build for the indefinite future.

View File

@ -1,16 +1,19 @@
{-# LANGUAGE DataKinds, FlexibleContexts, PackageImports, PartialTypeSignatures, TypeApplications, TypeFamilies #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
module Evaluation (benchmarks) where
import Analysis.Project
import Control.Carrier.Parse.Simple
import qualified Data.Duration as Duration
import Data.Abstract.Evaluatable
import Data.Blob
import Data.Blob.IO (readBlobFromFile')
import Data.Bifunctor
import "semantic" Data.Graph (topologicalSort)
import Data.Blob.IO (readBlobFromPath)
import qualified Data.Duration as Duration
import Data.Graph.Algebraic (topologicalSort)
import qualified Data.Language as Language
import Data.Project
import Data.Proxy
import Gauge.Main
import Parsing.Parser
@ -18,21 +21,23 @@ import Semantic.Config (defaultOptions)
import Semantic.Graph
import Semantic.Task (TaskSession (..), runTask, withOptions)
import Semantic.Util
import qualified System.Path as Path
import System.Path ((</>))
import qualified System.Path as Path
import qualified System.Path.PartClass as Path.PartClass
-- Duplicating this stuff from Util to shut off the logging
callGraphProject' :: ( Language.SLanguage lang
, HasPrelude lang
, Path.PartClass.AbsRel ar
)
=> TaskSession
-> Proxy lang
-> Path.RelFile
-> Path.File ar
-> IO (Either String ())
callGraphProject' session proxy path
| Just (SomeParser parser) <- parserForLanguage analysisParsers lang = fmap (bimap show (const ())) . runTask session $ do
blob <- readBlobFromFile' (fileForTypedPath path)
blob <- readBlobFromPath (Path.toAbsRel path)
package <- fmap snd <$> runParse (Duration.fromSeconds 10) (parsePackage parser (Project (Path.toString (Path.takeDirectory path)) [blob] lang []))
modules <- topologicalSort <$> runImportGraphToModules proxy package
runCallGraph proxy False modules package

View File

@ -1,6 +1,6 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeApplications #-}
module Tagging (benchmarks) where
@ -8,19 +8,19 @@ import Control.Carrier.Parse.Measured
import Control.Carrier.Reader
import Control.Exception (throwIO)
import Control.Monad
import Data.Blob
import Data.Foldable
import Data.Language (PerLanguageModes (..), aLaCarteLanguageModes, preciseLanguageModes)
import Gauge
import System.FilePath.Glob
import qualified System.Path as Path
import Data.Flag
import Proto.Semantic as P hiding (Blob, BlobPair)
import Semantic.Api.Symbols (parseSymbols)
import Semantic.Config as Config
import Semantic.Task
import Semantic.Task.Files
import qualified Analysis.File as File
import Data.Flag
import Proto.Semantic as P hiding (Blob, BlobPair)
import Semantic.Api.Symbols (parseSymbols)
import Semantic.Config as Config
import Semantic.Task
import Semantic.Task.Files
benchmarks :: Benchmark
benchmarks = bgroup "tagging"
@ -66,7 +66,7 @@ parseSymbolsFilePath ::
=> PerLanguageModes
-> Path.RelFile
-> m ParseTreeSymbolResponse
parseSymbolsFilePath languageModes path = readBlob (fileForTypedPath path) >>= runReader languageModes . parseSymbols . pure @[]
parseSymbolsFilePath languageModes path = readBlob (File.fromPath path) >>= runReader languageModes . parseSymbols . pure @[]
testOptions :: Config.Options
testOptions = defaultOptions

View File

@ -1,20 +1,21 @@
-- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file).
-- ATTENTION: care must be taken to keep this file in sync with cabal.project.ci and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of that file).
-- Local packages
packages: .
semantic-analysis
semantic-ast
semantic-codegen
semantic-core
semantic-go
semantic-java
semantic-json
semantic-parse
semantic-python
semantic-ruby
semantic-scope-graph
semantic-tsx
semantic-typescript
semantic-tags
semantic-scope-graph
-- Packages brought in from other repos instead of hackage
-- ATTENTION: remember to update cabal.project.ci when bumping SHAs here!

View File

@ -1,13 +1,15 @@
-- ATTENTION: care must be taken to keep this file in sync with cabal.project. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file).
-- ATTENTION: care must be taken to keep this file in sync with cabal.project and script/ghci-flags. If you add a package here, add it there (and add a package stanza with ghc-options to enable errors in CI at the bottom of this file).
-- Local packages
packages: .
semantic-analysis
semantic-ast
semantic-codegen
semantic-core
semantic-go
semantic-java
semantic-json
semantic-parse
semantic-python
semantic-ruby
semantic-scope-graph
@ -43,6 +45,9 @@ package semantic-analysis
package semantic-ast
ghc-options: -Werror
package semantic-codegen
ghc-options: -Werror
package semantic-core
ghc-options: -Werror
@ -55,6 +60,9 @@ package semantic-java
package semantic-json
ghc-options: -Werror
package semantic-parse
ghc-options: -Werror
package semantic-python
ghc-options: -Werror

View File

@ -9,7 +9,7 @@ Please note that this list of steps reflects the state of Semantic as is, not wh
1. **Find or write a [tree-sitter](https://tree-sitter.github.io) parser for your language.** The tree-sitter [organization page](https://github.com/tree-sitter) has a number of parsers beyond those we currently support in Semantic; look there first to make sure you're not duplicating work. The tree-sitter [documentation on creating parsers](http://tree-sitter.github.io/tree-sitter/creating-parsers) provides an exhaustive look at the process of developing and debugging tree-sitter parsers. Though we do not support grammars written with other toolkits such as [ANTLR](https://www.antlr.org), translating an ANTLR or other BNF-style grammar into a tree-sitter grammar is usually straightforward.
2. **Create a Haskell library providing an interface to that C source.** The [`haskell-tree-sitter`](https://github.com/tree-sitter/haskell-tree-sitter) repository provides a Cabal package for each supported language. You can find an example of a pull request to add such a package here. Each package needs to provide two API surfaces:
* a bridged (via the FFI) reference to the toplevel parser in the generated file ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/internal/TreeSitter/JSON/Internal.hs))
* symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter-json/TreeSitter/JSON.hs))
* symbol datatypes for each syntax node in the parser, generated with the `mkSymbolDatatype` Template Haskell splice ([example](https://github.com/github/semantic/blob/master/semantic-json/src/Language/JSON/Grammar.hs)). For more information, see [CodeGen docs](https://github.com/github/semantic/blob/master/semantic-codegen/README.md).
3. **Identify the new syntax nodes required to represent your language.** While we provide an extensive library of reusable AST nodes for [literals](https://github.com/github/semantic/blob/master/src/Data/Syntax/Literal.hs), [expressions](https://github.com/github/semantic/blob/master/src/Data/Syntax/Expression.hs), [statements](https://github.com/github/semantic/blob/master/src/Data/Syntax/Statement.hs), and [types](https://github.com/github/semantic/blob/master/src/Data/Syntax/Type.hs), most languages will require some syntax nodes not found in other languages. You'll need to create a new module providing those data types, and those data types must be written as an open union: [here](https://github.com/github/semantic/commits/master/src/Language/Ruby/Syntax.hs?author=charliesome) is an example for Ruby's syntactic details.
4. **Write an assignment step that translates tree-sitter trees into Haskell datatypes.** More information about this can be found in the [assignment documentation](assignment.md). This is currently the most time-consuming and error-prone part of the process (see [https://github.com/github/semantic/issues/77]).
5. **Implement `Evaluatable` instances and add new [`Value` effects](https://github.com/github/semantic/blob/master/src/Control/Abstract/Value.hs) as is needed to describe the control flow of your language.** While several features of Semantic (e.g. `semantic parse --symbols` and `semantic diff`) will become fully available given a working assignment step, further features based on concrete or abstract interpretation (such as `semantic graph`) require implementing the `Evaluatable` typeclass and providing value-style effects for each control flow feature provided by the language. This means that language support is a spectrum: Semantic can provide useful information without any knowledge of a language's semantics, but each successive addition to its interpretive capabilities enables more functionality.

View File

@ -13,8 +13,6 @@ Our CI systems ensure that all patches pass `hlint`'s muster. We have our own se
We strongly recommend adding Haddock documentation to any function/data type, unless its purpose is immediately apparent from its name.
Comments should describe the "why", type signatures should describe the "what", and the code should describe the "how".
The Haskell Prelude is too minimal for serious work. The `Prologue` module should be imported in most files, as it reexports most of what you need.
# Formatting
2 spaces everywhere. Tabs are forbidden. Haskell indentation can be unpredictable, so generally stick with what your editor suggests.
@ -50,14 +48,6 @@ data Pos = Pos
}
```
### Split up imports into logical groups.
We use the following convention, each section separated by a newline:
1. Prelude/Prologue import
2. Library/stdlib imports
3. Local in-project imports.
### Align typographical symbols.
`->` in `case` statements and signatures, `=` in functions, and `::` in records should be aligned. Your editor can help with this. In certain situations, aligning symbols may decrease readability, e.g. complicated `case` statements. Use your best judgment.
@ -66,7 +56,7 @@ We use the following convention, each section separated by a newline:
Locally bound variables (such as the arguments to functions, or helpers defined in a `where` clause) can have short names, such as `x` or `go`. Globally bound functions and variables should have descriptive names.
You'll often find yourself implementing functions that conflict with Prelude/Prologue definitions. If this is the case, avoid adding a prefix to these functions, and instead import them qualified.
You'll often find yourself implementing functions that conflict with Prelude definitions. If this is the case, avoid adding a prefix to these functions, and instead import them qualified.
``` haskell
-- Broke

View File

@ -10,7 +10,18 @@ ghc_version="$(ghc --numeric-version)"
# recent hie-bios requires us to output to the file at $HIE_BIOS_OUTPUT, but older builds & script/repl dont set that var, so we default it to stdout
output_file="${HIE_BIOS_OUTPUT:-/dev/stdout}"
build_products_dir="dist-newstyle/build/x86_64-osx/ghc-$ghc_version/build-repl"
build_dir="dist-newstyle/build/x86_64-osx/ghc-$ghc_version"
build_products_dir="$build_dir/build-repl"
function add_autogen_includes {
echo "-optP-include"
echo "-optP$1/cabal_macros.h"
# autogenerated files, .h and .hs
echo "-i$1"
echo "-I$1"
}
cores=$(sysctl -n machdep.cpu.core_count || echo 4)
function flags {
# disable optimizations for faster loading
@ -18,28 +29,33 @@ function flags {
# dont load .ghci files (for ghcide)
echo "-ignore-dot-ghci"
# use as many jobs as there are physical cores
echo "-j$cores"
# where to put build products
echo "-outputdir $build_products_dir"
echo "-odir $build_products_dir"
echo "-hidir $build_products_dir"
echo "-stubdir $build_products_dir"
# preprocessor options, for -XCPP
echo "-optP-include"
echo "-optP$build_products_dir/autogen/cabal_macros.h"
if [ -d "$build_dir/semantic-0.10.0.0/build/autogen" ]
then add_autogen_includes "$build_dir/semantic-0.10.0.0/build/autogen"
elif [ -d "$build_dir/semantic-0.10.0.0/noopt/build/autogen" ]
then add_autogen_includes "$build_dir/semantic-0.10.0.0/noopt/build/autogen"
fi
# autogenerated sources, both .hs and .h (e.g. Foo_paths.hs)
echo "-i$build_products_dir/autogen"
echo "-I$build_products_dir/autogen"
echo "-optP-Wno-macro-redefined"
# .hs source dirs
# TODO: would be nice to figure this out from cabal.project & the .cabal files
echo "-isemantic-analysis/src"
echo "-isemantic-ast/src"
echo "-isemantic-codegen/src"
echo "-isemantic-core/src"
echo "-isemantic-go/src"
echo "-isemantic-java/src"
echo "-isemantic-json/src"
echo "-isemantic-parse/src"
echo "-isemantic-python/src"
echo "-isemantic-python/test"
echo "-isemantic-ruby/src"

View File

@ -40,6 +40,7 @@ library
import: common
hs-source-dirs: src
exposed-modules:
Analysis.Blob
Analysis.Carrier.Env.Monovariant
Analysis.Carrier.Env.Precise
Analysis.Carrier.Heap.Monovariant
@ -50,23 +51,31 @@ library
Analysis.Effect.Heap
Analysis.File
Analysis.FlowInsensitive
Analysis.Functor.Named
Analysis.ImportGraph
Analysis.Intro
Analysis.Name
Analysis.Project
Analysis.Typecheck
Control.Carrier.Fail.WithLoc
build-depends:
algebraic-graphs ^>= 0.3
, aeson ^>= 1.4
, algebraic-graphs ^>= 0.3
, base >= 4.13 && < 5
, containers ^>= 0.6
, filepath
, fused-effects ^>= 1.0
, fused-effects-readline
, fused-syntax
, hashable
, haskeline ^>= 0.7.5
, hashable
, lingo ^>= 0.3
, pathtype ^>= 0.8.1
, prettyprinter >= 1.2 && < 2
, prettyprinter-ansi-terminal ^>= 1.1.1
, semantic-source ^>= 0.0.1
, semantic-source ^>= 0.0.2
, semilattices
, terminal-size ^>= 0.3
, text ^>= 1.2.3.1
, transformers ^>= 0.5

View File

@ -0,0 +1,45 @@
{-# LANGUAGE OverloadedStrings #-}
module Analysis.Blob
( Blob (..)
, fromSource
, blobLanguage
, blobPath
, nullBlob
) where
import Analysis.File
import Data.Aeson
import Source.Language as Language
import Source.Source as Source
import qualified System.Path as Path
import qualified System.Path.PartClass as Path.PartClass
-- | The source, path information, and language of a file read from disk.
data Blob = Blob
{ blobSource :: Source -- ^ The UTF-8 encoded source text of the blob.
, blobFile :: File Language -- ^ Path/language information for this blob.
} deriving (Show, Eq)
instance FromJSON Blob where
parseJSON = withObject "Blob" $ \b -> do
src <- b .: "content"
Right pth <- fmap Path.parse (b .: "path")
lang <- b .: "language"
let lang' = if knownLanguage lang then lang else Language.forPath pth
pure (fromSource (pth :: Path.AbsRelFile) lang' src)
-- | Create a Blob from a provided path, language, and UTF-8 source.
-- The resulting Blob's span is taken from the 'totalSpan' of the source.
fromSource :: Path.PartClass.AbsRel ar => Path.File ar -> Language -> Source -> Blob
fromSource filepath language source
= Blob source (Analysis.File.File (Path.toAbsRel filepath) (totalSpan source) language)
blobLanguage :: Blob -> Language
blobLanguage = Analysis.File.fileBody . blobFile
blobPath :: Blob -> FilePath
blobPath = Path.toString . Analysis.File.filePath . blobFile
nullBlob :: Blob -> Bool
nullBlob = Source.null . blobSource

View File

@ -29,7 +29,7 @@ import qualified Analysis.Carrier.Env.Precise as A
import qualified Analysis.Carrier.Heap.Precise as A
import qualified Analysis.Effect.Domain as A
import Analysis.File
import Analysis.Name
import Analysis.Functor.Named
import Control.Algebra
import Control.Carrier.Fail.WithLoc
import Control.Carrier.Fresh.Strict
@ -178,7 +178,7 @@ heapAddressGraph = heapGraph (\ addr v -> (Value v, addr)) (fmap G.vertex . (,)
addressStyle :: Heap (Concrete term) -> G.Style (EdgeType (Concrete term), Addr) Text
addressStyle heap = (G.defaultStyle vertex) { G.edgeAttributes }
where vertex (_, addr) = pack (show addr) <> " = " <> maybe "?" fromConcrete (IntMap.lookup addr heap)
edgeAttributes _ (Slot name, _) = ["label" G.:= unName name]
edgeAttributes _ (Slot name, _) = ["label" G.:= formatName name]
edgeAttributes _ (Edge Import, _) = ["color" G.:= "blue"]
edgeAttributes _ (Edge Lexical, _) = ["color" G.:= "green"]
edgeAttributes _ _ = []
@ -186,7 +186,7 @@ addressStyle heap = (G.defaultStyle vertex) { G.edgeAttributes }
Unit -> "()"
Bool b -> pack $ show b
String s -> pack $ show s
Closure p (Span s e) (Named n _) -> "\\\\ " <> unName n <> " [" <> pack (Path.toString p) <> ":" <> showPos s <> "-" <> showPos e <> "]"
Closure p (Span s e) (Named n _) -> "\\\\ " <> formatName n <> " [" <> pack (Path.toString p) <> ":" <> showPos s <> "-" <> showPos e <> "]"
Record _ -> "{}"
showPos (Pos l c) = pack (show l) <> ":" <> pack (show c)

View File

@ -24,7 +24,7 @@ module Analysis.Effect.Domain
, run
) where
import Analysis.Name
import Analysis.Functor.Named
import Control.Algebra
import Data.Text (Text)
import GHC.Generics (Generic1)

View File

@ -1,13 +1,18 @@
{-# LANGUAGE DeriveTraversable #-}
module Analysis.File
( File(..)
, fileLanguage
, fromBody
, fromPath
) where
import Data.Maybe (fromJust, listToMaybe)
import GHC.Stack
import Source.Span
import Data.Maybe (fromJust, listToMaybe)
import Data.Semilattice.Lower
import GHC.Stack
import Source.Language as Language
import Source.Span
import qualified System.Path as Path
import qualified System.Path.PartClass as Path.PartClass
data File a = File
{ filePath :: !Path.AbsRelFile
@ -19,3 +24,10 @@ data File a = File
fromBody :: HasCallStack => a -> File a
fromBody body = File (Path.absRel (srcLocFile srcLoc)) (spanFromSrcLoc srcLoc) body where
srcLoc = snd (fromJust (listToMaybe (getCallStack callStack)))
-- | The language of the provided file, as inferred by 'Language.forPath'.
fileLanguage :: File a -> Language
fileLanguage = Language.forPath . filePath
fromPath :: Path.PartClass.AbsRel ar => Path.File ar -> File Language
fromPath p = File (Path.toAbsRel p) lowerBound (Language.forPath p)

View File

@ -0,0 +1,37 @@
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveTraversable #-}
module Analysis.Functor.Named
( module Analysis.Name
, Named (..)
, named
, named'
, namedName
, namedValue
) where
import Analysis.Name
import Data.Function (on)
-- | Annotates an @a@ with a 'Name'-provided name, which is ignored for '==' and 'compare'.
data Named a = Named Name a
deriving (Foldable, Functor, Show, Traversable)
named :: Name -> a -> Named a
named = Named
named' :: Name -> Named Name
named' u = Named u u
namedName :: Named a -> Name
namedName (Named n _) = n
namedValue :: Named a -> a
namedValue (Named _ a) = a
instance Eq a => Eq (Named a) where
(==) = (==) `on` namedValue
instance Ord a => Ord (Named a) where
compare = compare `on` namedValue

View File

@ -21,7 +21,7 @@ import qualified Analysis.Carrier.Heap.Monovariant as A
import qualified Analysis.Effect.Domain as A
import Analysis.File
import Analysis.FlowInsensitive
import Analysis.Name
import Analysis.Functor.Named
import Control.Algebra
import Control.Applicative (Alternative (..))
import Control.Carrier.Fail.WithLoc

View File

@ -11,7 +11,7 @@ module Analysis.Intro
, record
) where
import Analysis.Name
import Analysis.Functor.Named
import Control.Algebra
import Data.Text (Text)
import GHC.Generics (Generic1)

View File

@ -1,40 +1,68 @@
{-# LANGUAGE DeriveTraversable, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Analysis.Name
( Name(..)
, Named(..)
, named
, named'
, namedName
, namedValue
( Name
-- * Constructors
, gensym
, name
, nameI
, formatName
, isGenerated
) where
import Data.Function (on)
import Data.String (IsString)
import Data.Text (Text)
import Control.Effect.Fresh
import Data.Aeson
import qualified Data.Char as Char
import Data.Hashable
import Data.String
import Data.Text (Text)
import qualified Data.Text as Text
-- | User-specified and -relevant names.
newtype Name = Name { unName :: Text }
deriving (Eq, IsString, Ord, Show)
-- | The type of variable names.
data Name
= Name Text
| I Int
deriving (Eq, Ord)
instance IsString Name where
fromString = Name . fromString
-- | Annotates an @a@ with a 'Name'-provided name, which is ignored for '==' and 'compare'.
data Named a = Named Name a
deriving (Foldable, Functor, Show, Traversable)
-- | Generate a fresh (unused) name for use in synthesized variables/closures/etc.
gensym :: Has Fresh sig m => m Name
gensym = I <$> fresh
named :: Name -> a -> Named a
named = Named
-- | Construct a 'Name' from a 'Text'.
name :: Text -> Name
name = Name
named' :: Name -> Named Name
named' u = Named u u
isGenerated :: Name -> Bool
isGenerated (I _) = True
isGenerated _ = False
namedName :: Named a -> Name
namedName (Named n _) = n
-- | Construct a 'Name' from an 'Int'. This is suitable for automatic generation, e.g. using a Fresh effect, but should not be used for human-generated names.
nameI :: Int -> Name
nameI = I
namedValue :: Named a -> a
namedValue (Named _ a) = a
-- | Extract a human-readable 'Text' from a 'Name'.
-- Sample outputs can be found in @Data.Abstract.Name.Spec@.
formatName :: Name -> Text
formatName (Name name) = name
formatName (I i) = Text.pack $ '_' : (alphabet !! a) : replicate n 'ʹ'
where alphabet = ['a'..'z']
(n, a) = i `divMod` length alphabet
instance Eq a => Eq (Named a) where
(==) = (==) `on` namedValue
instance Show Name where
showsPrec _ = prettyShowString . Text.unpack . formatName
where prettyShowString str = showChar '"' . foldr ((.) . prettyChar) id str . showChar '"'
prettyChar c
| c `elem` ['\\', '\"'] = Char.showLitChar c
| Char.isPrint c = showChar c
| otherwise = Char.showLitChar c
instance Ord a => Ord (Named a) where
compare = compare `on` namedValue
instance Hashable Name where
hashWithSalt salt (Name name) = hashWithSalt salt name
hashWithSalt salt (I i) = salt `hashWithSalt` (1 :: Int) `hashWithSalt` i
instance ToJSON Name where
toJSON = toJSON . formatName
toEncoding = toEncoding . formatName

View File

@ -0,0 +1,33 @@
module Analysis.Project
( Project (..)
, projectExtensions
, projectName
, projectFiles
) where
import Prelude hiding (readFile)
import Analysis.Blob
import Analysis.File
import Data.Text (Text)
import qualified Data.Text as T
import Source.Language
import System.FilePath.Posix
-- | A 'Project' contains all the information that semantic needs
-- to execute an analysis, diffing, or graphing pass.
data Project = Project
{ projectRootDir :: FilePath
, projectBlobs :: [Blob]
, projectLanguage :: Language
, projectExcludeDirs :: [FilePath]
} deriving (Eq, Show)
projectName :: Project -> Text
projectName = T.pack . dropExtensions . takeFileName . projectRootDir
projectExtensions :: Project -> [String]
projectExtensions = extensionsForLanguage . projectLanguage
projectFiles :: Project -> [File Language]
projectFiles = fmap blobFile . projectBlobs

View File

@ -26,8 +26,8 @@ import qualified Analysis.Carrier.Heap.Monovariant as A
import qualified Analysis.Effect.Domain as A
import Analysis.File
import Analysis.FlowInsensitive
import Analysis.Functor.Named
import qualified Analysis.Intro as Intro
import Analysis.Name
import Control.Algebra
import Control.Applicative (Alternative (..))
import Control.Carrier.Fail.WithLoc
@ -251,12 +251,12 @@ instance ( Alternative m
ret <- meta
unify t (Alg (arg :-> ret))
b <- concretize ret
k (Named (Name mempty) (lift b)) where
k (Named (name mempty) (lift b)) where
concretize = \case
Alg Unit -> pure Intro.unit
Alg Bool -> pure (Intro.bool True) <|> pure (Intro.bool False)
Alg String -> pure (Intro.string mempty)
Alg (_ :-> b) -> send . Intro.Lam . Named (Name mempty) . lift <$> concretize b
Alg (_ :-> b) -> send . Intro.Lam . Named (name mempty) . lift <$> concretize b
Alg (Record t) -> Intro.record <$> traverse (traverse concretize) (Map.toList t)
t -> fail $ "cant concretize " <> show t -- FIXME: concretize type variables by incrementally solving constraints
L (R (R (R (R (A.Record fields k))))) -> do

View File

@ -38,30 +38,25 @@ common haskell
library
import: haskell
exposed-modules:
AST.Element
AST.Traversable1
AST.Traversable1.Class
Marshal.JSON
-- other-modules:
-- other-extensions:
build-depends: base ^>= 4.13
, tree-sitter ^>= 0.8
, semantic-source ^>= 0.0.1
, tree-sitter-python ^>= 0.8.1
, bytestring ^>= 0.10.8.2
, aeson ^>= 1.4.2.0
, aeson-pretty ^>= 0.8.8
, bytestring ^>= 0.10.8.2
, tree-sitter ^>= 0.8
, semantic-source ^>= 0.0.2
, template-haskell ^>= 2.15
, bytestring ^>= 0.10.8.2
, optparse-applicative >= 0.14.3 && < 0.16
, pretty-simple ^>= 3.1.0.0
hs-source-dirs: src
default-language: Haskell2010
, pretty-simple ^>= 3.1.0.0
, text ^>= 1.2.3.1
executable semantic-ast
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base
, semantic-ast
, tree-sitter
, semantic-source
, tree-sitter-python
, bytestring
, optparse-applicative
, pretty-simple
hs-source-dirs: src
default-language: Haskell2010

View File

@ -0,0 +1,75 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module AST.Traversable1
( module AST.Traversable1.Class
, for1
, traverse1_
, for1_
, foldMap1
, Generics(..)
) where
import AST.Traversable1.Class
import Data.Functor (void)
import Data.Functor.Const
import Data.Monoid (Ap (..))
import GHC.Generics
for1
:: forall c t f a b
. (Traversable1 c t, Applicative f)
=> t a
-> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> f (t b)
for1 t f g = traverse1 @c f g t
traverse1_
:: forall c t f a a' a''
. (Traversable1 c t, Applicative f)
=> (a -> f a')
-> (forall t' . c t' => t' a -> f a'')
-> t a
-> f ()
traverse1_ f g = getAp . foldMap1 @c (Ap . void . f) (Ap . void . g)
for1_
:: forall c t f a a' a''
. (Traversable1 c t, Applicative f)
=> t a
-> (a -> f a')
-> (forall t' . c t' => t' a -> f a'')
-> f ()
for1_ t f g = getAp $ foldMap1 @c (Ap . void . f) (Ap . void . g) t
foldMap1 :: forall c t b a . (Traversable1 c t, Monoid b) => (a -> b) -> (forall t' . c t' => t' a -> b) -> t a -> b
foldMap1 f g = getConst . traverse1 @c (Const . f) (Const . g)
-- | @'Generics' t@ has a 'Traversable1' instance when @'Rep1' t@ has a 'GTraversable1' instance, making this convenient for applying 'traverse1' to 'Generic1' types lacking 'Traversable1' instances:
--
-- @
-- 'getGenerics' '<$>' 'traverse1' f g ('Generics' t) = 'to1' '<$>' 'gtraverse1' f g ('from1' t)
-- @
--
-- It further defines its 'Foldable', 'Functor', and 'Traversable' instances using 'Traversable1', making it suitable for deriving with @-XDerivingVia@.
newtype Generics t a = Generics { getGenerics :: t a }
instance (Generic1 t, GTraversable1 Foldable (Rep1 t)) => Foldable (Generics t) where
foldMap = foldMapDefault1
instance (Generic1 t, GTraversable1 Functor (Rep1 t)) => Functor (Generics t) where
fmap = fmapDefault1
instance (Generic1 t, GTraversable1 Foldable (Rep1 t), GTraversable1 Functor (Rep1 t), GTraversable1 Traversable (Rep1 t)) => Traversable (Generics t) where
traverse = traverseDefault1
instance (Generic1 t, GTraversable1 c (Rep1 t)) => Traversable1 c (Generics t) where
traverse1 f g = fmap (Generics . to1) . gtraverse1 @c f g . from1 . getGenerics

View File

@ -0,0 +1,100 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
-- | This module defines the 'Traversable1' class and its generic derivation using 'GTraversable1'. Note that any changes to this file will require recompilation of all of the AST modules, which is quite expensive; thus, most additions should be made in "AST.Traversable1" instead, and that that module should not be imported by the AST modules.
module AST.Traversable1.Class
( Traversable1(..)
, foldMapDefault1
, fmapDefault1
, traverseDefault1
, GTraversable1(..)
) where
import Data.Functor.Const
import Data.Functor.Identity
import GHC.Generics
-- | Simultaneous traversal of subterms of kind @*@ and @* -> *@ in an 'Applicative' context.
--
-- 'Traversable1' can express any combination of first- and second-order mapping, folding, and traversal.
--
-- Note that the @1@ suffix is used in the manner of 'Data.Functor.Classes.Show1' or 'Generic1', rather than 'foldr1'; its a higher-order traversal which is simultaneously able to traverse (and alter) annotations.
class Traversable1 c t where
-- | Map annotations of kind @*@ and heterogeneously-typed subterms of kind @* -> *@ under some constraint @c@ into an 'Applicative' context. The constraint is necessary to operate on otherwise universally-quantified subterms, since otherwise there would be insufficient information to inspect them at all.
--
-- No proxy is provided for the constraint @c@; instead, @-XTypeApplications@ should be used. E.g. here we ignore the annotations and print all the @* -> *@ subterms using 'Show1':
--
-- @
-- 'traverse1' \@'Data.Functor.Classes.Show1' 'pure' (\ t -> t '<$' 'putStrLn' ('Data.Functor.Classes.showsPrec1' 0 t ""))
-- @
--
-- Note that this traversal is non-recursive: any recursion through subterms must be performed by the second function argument.
traverse1
:: Applicative f
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
default traverse1
:: (Applicative f, Generic1 t, GTraversable1 c (Rep1 t))
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
traverse1 f g = fmap to1 . gtraverse1 @c f g . from1
-- | This function may be used as a value for 'foldMap' in a 'Foldable' instance.
foldMapDefault1 :: (Traversable1 Foldable t, Monoid b) => (a -> b) -> t a -> b
foldMapDefault1 f = getConst . traverse1 @Foldable (Const . f) (Const . foldMap f)
-- | This function may be used as a value for 'fmap' in a 'Functor' instance.
fmapDefault1 :: Traversable1 Functor t => (a -> b) -> t a -> t b
fmapDefault1 f = runIdentity . traverse1 @Functor (Identity . f) (Identity . fmap f)
-- | This function may be used as a value for 'traverse' in a 'Traversable' instance.
traverseDefault1 :: (Traversable1 Traversable t, Applicative f) => (a -> f b) -> t a -> f (t b)
traverseDefault1 f = traverse1 @Traversable f (traverse f)
class GTraversable1 c t where
-- | Generically map annotations and subterms of kind @* -> *@ into an 'Applicative' context.
gtraverse1
:: Applicative f
=> (a -> f b)
-> (forall t' . c t' => t' a -> f (t' b))
-> t a
-> f (t b)
instance GTraversable1 c f => GTraversable1 c (M1 i c' f) where
gtraverse1 f g = fmap M1 . gtraverse1 @c f g . unM1
instance (GTraversable1 c f, GTraversable1 c g) => GTraversable1 c (f :*: g) where
gtraverse1 f g (l :*: r) = (:*:) <$> gtraverse1 @c f g l <*> gtraverse1 @c f g r
instance (GTraversable1 c f, GTraversable1 c g) => GTraversable1 c (f :+: g) where
gtraverse1 f g (L1 l) = L1 <$> gtraverse1 @c f g l
gtraverse1 f g (R1 r) = R1 <$> gtraverse1 @c f g r
instance GTraversable1 c (K1 R t) where
gtraverse1 _ _ (K1 k) = pure (K1 k)
instance GTraversable1 c Par1 where
gtraverse1 f _ (Par1 a) = Par1 <$> f a
instance c t => GTraversable1 c (Rec1 t) where
gtraverse1 _ g (Rec1 t) = Rec1 <$> g t
instance (Traversable f, GTraversable1 c g) => GTraversable1 c (f :.: g) where
gtraverse1 f g = fmap Comp1 . traverse (gtraverse1 @c f g) . unComp1
instance GTraversable1 c U1 where
gtraverse1 _ _ _ = pure U1

View File

@ -0,0 +1,87 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE UndecidableInstances #-}
module Marshal.JSON
( MarshalJSON(..)
) where
import Data.Aeson as Aeson
import Data.List.NonEmpty (NonEmpty)
import Data.Text (Text)
import qualified Data.Text as Text
import GHC.Generics
-- TODO: use toEncoding -- direct serialization to ByteString
-- Serialize unmarshaled ASTs into JSON representation by auto-deriving Aeson instances generically
class MarshalJSON t where
marshal :: (ToJSON a) => t a -> Value
marshal = object . fields []
fields :: (ToJSON a) => [(Text, Value)] -> t a -> [(Text, Value)]
default fields :: ( Generic1 t, GFields (Rep1 t), ToJSON a) => [(Text, Value)] -> t a -> [(Text, Value)]
fields acc = gfields acc . from1
-- Implement the sum case
instance {-# OVERLAPPING #-} (MarshalJSON f, MarshalJSON g) => MarshalJSON (f :+: g) where
fields acc (L1 f) = fields acc f
fields acc (R1 g) = fields acc g
-- Create MarshalJSON instances for each type constructor
instance (GFields (Rep1 t), Generic1 t) => MarshalJSON t
-- Stores meta-data for datatypes
instance (GFields f, Datatype c) => GFields (M1 D c f) where
gfields acc x = gfields ((Text.pack "type", String (Text.pack (datatypeName x))): acc) $ unM1 x
-- Fold over S1 product types and pass the result to Aeson objects
instance GFields fields => GFields (C1 c fields) where
gfields acc x = gfields acc (unM1 x)
-- Implement base case for products
-- To get a value out of this datum, we define another typeclass: @GValue@ with the method @gvalue@.
instance (GValue p, Selector s) => GFields (S1 s p) where
gfields acc x = (Text.pack (selName x), gvalue (unM1 x)) : acc
-- Implement inductive case for product case
-- Product datatypes are marshalled to an object with a type field holding the constructor name and a separate field for each selector in the datatype.
instance (GFields f, GFields g) => GFields (f :*: g) where
gfields acc (f :*: g) = gfields (gfields acc g) f
-- GValue for leaves
instance ToJSON a => GValue (K1 i a) where
gvalue = toJSON . unK1
-- Par1 instance
instance GValue Par1 where
gvalue = toJSON . unPar1
instance (MarshalJSON t) => GValue (Rec1 t) where
gvalue (Rec1 f) = marshal f
instance (GValue t) => GValue (Maybe :.: t) where
gvalue (Comp1 (Just t)) = gvalue t
gvalue (Comp1 Nothing) = Null
instance (GValue t) => GValue ([] :.: t) where
gvalue (Comp1 ts) = toJSON $ map gvalue ts
instance (GValue t) => GValue (NonEmpty :.: t) where
gvalue (Comp1 ts) = toJSON $ fmap gvalue ts
-- GFields operates on product field types: it takes an accumulator, a datatype, and returns a new accumulator value.
class GFields f where
gfields :: ToJSON a => [(Text, Value)] -> f a -> [(Text, Value)]
-- gvalue is a wrapper that calls to @toJSON@ (for leaf nodes such as @Text@) or recurses via @marshal@
class GValue f where
gvalue :: (ToJSON a) => f a -> Value

View File

@ -0,0 +1,5 @@
# Revision history for semantic-codegen
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

4
semantic-codegen/Main.hs Normal file
View File

@ -0,0 +1,4 @@
module Main where
main :: IO ()
main = putStrLn "Hello, Haskell!"

216
semantic-codegen/README.md Normal file
View File

@ -0,0 +1,216 @@
# CodeGen Documentation
CodeGen is the process for auto-generating language-specific, strongly-typed ASTs to be used in [Semantic](https://github.com/github/semantic-code/blob/d9f91a05dc30a61b9ff8c536d75661d417f3c506/design-docs/precise-code-navigation.md).
_Note: This project was recently moved from `tree-sitter` into `Semantic`. These docs are in the process of being updated to reflect changes._
### Prerequisites
To get started, first make sure your language has:
1. An existing [tree-sitter](http://tree-sitter.github.io/tree-sitter/) parser;
2. An existing Cabal package in this repository for said language. This will provide an interface into tree-sitter's C source. [Here](https://github.com/tree-sitter/haskell-tree-sitter/tree/master/tree-sitter-python) is an example of a library for Python, a supported language that the remaining documentation will refer to.
### CodeGen Pipeline
During parser generation, tree-sitter produces a JSON file that captures the structure of a language's grammar. Based on this, we're able to derive datatypes representing surface languages, and then use those datatypes to generically build ASTs. This automates the engineering effort [historically required for adding a new language](https://github.com/github/semantic/blob/master/docs/adding-new-languages.md).
The following steps provide a high-level outline of the process:
1. [**Deserialize.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Deserialize.hs) First, we deserialize the `node-types.json` file for a given language into the desired shape of datatypes via parsing capabilities afforded by the [Aeson](http://hackage.haskell.org/package/aeson) library. There are four distinct types represented in the node-types.json file takes on: sums, products, named leaves and anonymous leaves.
2. [**Generate Syntax.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/GenerateSyntax.hs) We then use Template Haskell to auto-generate language-specific, strongly-typed datatypes that represent various language constructs. This API exports the top-level function `astDeclarationsForLanguage` to auto-generate datatypes at compile-time, which is is invoked by a given language [AST](https://github.com/github/semantic/blob/master/semantic-python/src/Language/Python/AST.hs) module.
3. [**Unmarshal.**](https://github.com/github/semantic/blob/master/semantic-codegen/src/AST/Unmarshal.hs) Unmarshaling is the process of iterating over tree-sitters parse trees using its tree cursor API, and producing Haskell ASTs for the relevant nodes. We parse source code from tree-sitter and unmarshal the data we get to build these ASTs generically. This file exports the top-level function `parseByteString`, which takes source code and a language as arguments, and produces an AST.
Here is an example that describes the relationship between a Python identifier represented in the tree-sitter generated JSON file, and a datatype generated by Template Haskell based on the provided JSON:
| Type | JSON | TH-generated code |
|----------|--------------|------------|
|Named leaf|<code>{<br>"type": "identifier",<br>"named": true<br>}|<code>data TreeSitter.Python.AST.Identifier a<br>= TreeSitter.Python.AST.Identifier {TreeSitter.Python.AST.ann :: a,<br>TreeSitter.Python.AST.bytes :: text-1.2.3.1:Data.Text.Internal.Text} -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Show a => Show (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Ord a => Ord (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Eq a => Eq (TreeSitter.Python.AST.Identifier a) -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Traversable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Functor TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Foldable TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance Unmarshal TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1<br>instance SymbolMatching TreeSitter.Python.AST.Identifier -- Defined at TreeSitter/Python/AST.hs:10:1|
The remaining document provides more details on generating ASTs, inspecting datatypes, tests, and information on decisions pertaining to relevant APIs.
___
### Table of Contents
- [CodeGen Documentation](#codegen-documentation)
- [Prerequisites](#prerequisites)
- [CodeGen Pipeline](#codegen-pipeline)
- [Table of Contents](#table-of-contents)
- [Generating ASTs](#generating-asts)
- [Inspecting auto-generated datatypes](#inspecting-auto-generated-datatypes)
- [Tests](#tests)
- [Additional notes](#additional-notes)
___
### Generating ASTs
To parse source code and produce ASTs locally:
1. Load the REPL for a given language:
```
cabal new-repl lib:tree-sitter-python
```
2. Set language extensions, `OverloadedStrings` and `TypeApplications`, and import relevant modules, `AST.Unmarshal`, `Source.Range` and `Source.Span`:
```
:seti -XOverloadedStrings
:seti -XTypeApplications
import Source.Span
import Source.Range
import AST.Unmarshal
```
3. You can now call `parseByteString`, passing in the desired language you wish to parse (in this case Python exemplified by `tree_sitter_python`), and the source code (in this case an integer `1`). Since the function is constrained by `(Unmarshal t, UnmarshalAnn a)`, you can use type applications to provide a top-level node `t`, an entry point into the tree, in addition to a polymorphic annotation `a` used to represent range and span:
```
parseByteString @TreeSitter.Python.AST.Module @(Source.Span.Span, Source.Range.Range) tree_sitter_python "1"
```
This generates the following AST:
```
Right
( Module
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, extraChildren =
[ R1
( SimpleStatement
( L1
( R1
( R1
( L1
( ExpressionStatement
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, extraChildren = L1
( L1
( Expression
( L1
( L1
( L1
( PrimaryExpression
( R1
( L1
( L1
( L1
( Integer
{ ann =
( Range
{ start = 0
, end = 1
}
, Span
{ start = Pos
{ line = 0
, column = 0
}
, end = Pos
{ line = 0
, column = 1
}
}
)
, text = "1"
}
)
)
)
)
)
)
)
)
)
)
) :| []
}
)
)
)
)
)
)
]
}
)
```
### Inspecting auto-generated datatypes
Datatypes are derived from a language and its `node-types.json` file using the GenerateSyntax API. Definition can be viewed in the REPL just as they would for any other datatype, using `:i`:
```
:i TreeSitter.Python.AST.Module
```
This shows us the auto-generated `Module` datatype:
```Haskell
data TreeSitter.Python.AST.Module a
= TreeSitter.Python.AST.Module {TreeSitter.Python.AST.ann :: a,
TreeSitter.Python.AST.extraChildren :: [(GHC.Generics.:+:)
TreeSitter.Python.AST.CompoundStatement
TreeSitter.Python.AST.SimpleStatement
a]}
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Show a => Show (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Ord a => Ord (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Eq a => Eq (TreeSitter.Python.AST.Module a)
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Traversable TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Functor TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Foldable TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance Unmarshal TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
instance SymbolMatching TreeSitter.Python.AST.Module
-- Defined at TreeSitter/Python/AST.hs:10:1
```
### Tests
As of right now, Hedgehog tests are minimal and only in place for the Python library.
To run tests:
`cabal v2-test tree-sitter-python`
### Additional notes
- [GenerateSyntax](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/GenerateSyntax.hs) provides a way to pre-define certain datatypes for which Template Haskell is not used. Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. While this gives us flexibility, we encourage that this is used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into `Integer`s), and may require defining `TS.UnmarshalAnn` or `TS.SymbolMatching` instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual `Foldable`, `Functor`, etc. instances provided for generated datatypes.
- Annotations are captured by a polymorphic parameter `a`
- [Unmarshal](https://github.com/tree-sitter/haskell-tree-sitter/blob/master/tree-sitter/src/TreeSitter/Unmarshal.hs) defines both generic and non-generic classes. This is because generic behaviors are different than what we get non-generically, and in the case of ` Maybe`, `[]`—we actually preference doing things non-generically. Since `[]` is a sum, the generic behavior for `:+:` would be invoked and expect that wed have repetitions represented in the parse tree as right-nested singly-linked lists (ex., `(a (b (c (d…))))`) rather than as just consecutive sibling nodes (ex., `(a b c ...d)`, which is what our trees have). We want to match the latter.

View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,83 @@
cabal-version: 2.4
-- Initial package description 'semantic-codegen.cabal' generated by 'cabal
-- init'. For further documentation, see
-- http://haskell.org/cabal/users-guide/
name: semantic-codegen
version: 0.1.0.0
-- synopsis:
-- description:
-- bug-reports:
license: MIT
license-file: LICENSE
author: The Semantic Authors
maintainer: opensource+semantic@github.com
copyright: (c) 2019 GitHub, Inc.
category: Language
extra-source-files: CHANGELOG.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
library
import: haskell
exposed-modules: AST.Deserialize
AST.GenerateSyntax
AST.Grammar.TH
AST.Token
AST.Unmarshal
-- other-modules:
-- other-extensions:
build-depends: base >= 4.13
, aeson ^>= 1.4.2.0
, bytestring ^>= 0.10.8.2
, tree-sitter ^>= 0.8
, fused-effects ^>= 1.0
, semantic-ast
, semantic-source ^>= 0.0.2
, template-haskell ^>= 2.15
, text ^>= 1.2.3.1
, unordered-containers ^>= 0.2.10
, containers >= 0.6.0.1
, text ^>= 1.2.3.1
, filepath ^>= 1.4.1
, directory ^>= 1.3.3.2
hs-source-dirs: src
default-language: Haskell2010
executable semantic-codegen
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base
, tree-sitter
, semantic-source
, bytestring
, aeson
, bytestring
, text
, unordered-containers
, containers
, filepath
default-language: Haskell2010
-- hs-source-dirs:
default-language: Haskell2010

View File

@ -0,0 +1,133 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveLift #-}
-- Turn off partial field warnings for Datatype.
{-# OPTIONS_GHC -Wno-partial-fields #-}
module AST.Deserialize
( Datatype (..)
, Field (..)
, Children(..)
, Required (..)
, Type (..)
, DatatypeName (..)
, Named (..)
, Multiple (..)
) where
import Data.Aeson as Aeson
import Data.Aeson.Types
import Data.Char
import GHC.Generics hiding (Constructor, Datatype)
import Language.Haskell.TH.Syntax (Lift)
import Data.Text (Text, unpack)
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe)
-- Types to deserialize into:
data Datatype
= SumType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
, datatypeSubtypes :: NonEmpty Type
}
| ProductType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
, datatypeChildren :: Maybe Children
, datatypeFields :: [(String, Field)]
}
| LeafType
{ datatypeName :: DatatypeName
, datatypeNameStatus :: Named
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Datatype where
parseJSON = withObject "Datatype" $ \v -> do
type' <- v .: "type"
named <- v .: "named"
subtypes <- v .:? "subtypes"
case subtypes of
Nothing -> do
fields <- fmap (fromMaybe HM.empty) (v .:? "fields")
children <- v .:? "children"
if null fields && null children then
pure (LeafType type' named)
else
ProductType type' named children <$> parseKVPairs (HM.toList fields)
Just subtypes -> pure (SumType type' named subtypes)
-- | Transforms list of key-value pairs to a Parser
parseKVPairs :: [(Text, Value)] -> Parser [(String, Field)]
parseKVPairs = traverse go
where go :: (Text, Value) -> Parser (String, Field)
go (t,v) = do
v' <- parseJSON v
pure (unpack t, v')
data Field = MkField
{ fieldRequired :: Required
, fieldTypes :: NonEmpty Type
, fieldMultiple :: Multiple
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Field where
parseJSON = genericParseJSON customOptions
newtype Children = MkChildren Field
deriving (Eq, Ord, Show, Generic)
deriving newtype (ToJSON, FromJSON)
data Required = Optional | Required
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Required where
parseJSON = withBool "Required" (\p -> pure (if p then Required else Optional))
data Type = MkType
{ fieldType :: DatatypeName
, isNamed :: Named
}
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Type where
parseJSON = genericParseJSON customOptions
newtype DatatypeName = DatatypeName { getDatatypeName :: String }
deriving (Eq, Ord, Show, Generic)
deriving newtype (FromJSON, ToJSON)
data Named = Anonymous | Named
deriving (Eq, Ord, Show, Generic, ToJSON, Lift)
instance FromJSON Named where
parseJSON = withBool "Named" (\p -> pure (if p then Named else Anonymous))
data Multiple = Single | Multiple
deriving (Eq, Ord, Show, Generic, ToJSON)
instance FromJSON Multiple where
parseJSON = withBool "Multiple" (\p -> pure (if p then Multiple else Single))
customOptions :: Aeson.Options
customOptions = Aeson.defaultOptions
{
fieldLabelModifier = initLower . dropPrefix
, constructorTagModifier = initLower
}
dropPrefix :: String -> String
dropPrefix = Prelude.dropWhile isLower
initLower :: String -> String
initLower (c:cs) = toLower c : cs
initLower "" = ""

View File

@ -0,0 +1,208 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module AST.GenerateSyntax
( syntaxDatatype
, astDeclarationsForLanguage
) where
import AST.Deserialize (Children (..), Datatype (..), DatatypeName (..), Field (..), Multiple (..), Named (..), Required (..), Type (..))
import AST.Token
import AST.Traversable1.Class
import qualified AST.Unmarshal as TS
import Data.Aeson hiding (String)
import Data.Foldable
import Data.List
import Data.List.NonEmpty (NonEmpty (..))
import Data.Text (Text)
import Foreign.C.String
import Foreign.Ptr
import GHC.Generics hiding (Constructor, Datatype)
import GHC.Records
import Language.Haskell.TH as TH
import Language.Haskell.TH.Syntax as TH
import System.Directory
import System.FilePath.Posix
import qualified TreeSitter.Language as TS
import TreeSitter.Node
import TreeSitter.Symbol (TSSymbol, toHaskellCamelCaseIdentifier, toHaskellPascalCaseIdentifier)
-- | Derive Haskell datatypes from a language and its @node-types.json@ file.
--
-- Datatypes will be generated according to the specification in the @node-types.json@ file, with anonymous leaf types defined as synonyms for the 'Token' datatype.
--
-- Any datatypes among the node types which have already been defined in the module where the splice is run will be skipped, allowing customization of the representation of parts of the tree. Note that this should be used sparingly, as it imposes extra maintenance burden, particularly when the grammar is changed. This may be used to e.g. parse literals into Haskell equivalents (e.g. parsing the textual contents of integer literals into 'Integer's), and may require defining 'TS.UnmarshalAnn' or 'TS.SymbolMatching' instances for (parts of) the custom datatypes, depending on where and how the datatype occurs in the generated tree, in addition to the usual 'Foldable', 'Functor', etc. instances provided for generated datatypes.
astDeclarationsForLanguage :: Ptr TS.Language -> FilePath -> Q [Dec]
astDeclarationsForLanguage language filePath = do
_ <- TS.addDependentFileRelative filePath
currentFilename <- loc_filename <$> location
pwd <- runIO getCurrentDirectory
let invocationRelativePath = takeDirectory (pwd </> currentFilename) </> filePath
input <- runIO (eitherDecodeFileStrict' invocationRelativePath) >>= either fail pure
allSymbols <- runIO (getAllSymbols language)
debugSymbolNames <- [d|
debugSymbolNames :: [String]
debugSymbolNames = $(listE (map (litE . stringL . debugPrefix) allSymbols))
|]
(debugSymbolNames <>) . concat @[] <$> traverse (syntaxDatatype language allSymbols) input
-- Build a list of all symbols
getAllSymbols :: Ptr TS.Language -> IO [(String, Named)]
getAllSymbols language = do
count <- TS.ts_language_symbol_count language
traverse getSymbol [(0 :: TSSymbol) .. fromIntegral (pred count)]
where
getSymbol i = do
cname <- TS.ts_language_symbol_name language i
n <- peekCString cname
t <- TS.ts_language_symbol_type language i
let named = if t == 0 then Named else Anonymous
pure (n, named)
-- Auto-generate Haskell datatypes for sums, products and leaf types
syntaxDatatype :: Ptr TS.Language -> [(String, Named)] -> Datatype -> Q [Dec]
syntaxDatatype language allSymbols datatype = skipDefined $ do
typeParameterName <- newName "a"
case datatype of
SumType (DatatypeName _) _ subtypes -> do
types' <- fieldTypesToNestedSum subtypes
let fieldName = mkName ("get" <> nameStr)
con <- recC name [TH.varBangType fieldName (TH.bangType strictness (pure types' `appT` varT typeParameterName))]
hasFieldInstance <- makeHasFieldInstance (conT name) (varT typeParameterName) (varE fieldName)
traversalInstances <- makeTraversalInstances (conT name)
pure
( NewtypeD [] name [PlainTV typeParameterName] Nothing con [deriveGN, deriveStockClause, deriveAnyClassClause]
: hasFieldInstance
<> traversalInstances)
ProductType (DatatypeName datatypeName) named children fields -> do
con <- ctorForProductType datatypeName typeParameterName children fields
symbolMatchingInstance <- symbolMatchingInstance allSymbols name named datatypeName
traversalInstances <- makeTraversalInstances (conT name)
pure
( generatedDatatype name [con] typeParameterName
: symbolMatchingInstance
<> traversalInstances)
-- Anonymous leaf types are defined as synonyms for the `Token` datatype
LeafType (DatatypeName datatypeName) Anonymous -> do
tsSymbol <- runIO $ withCStringLen datatypeName (\(s, len) -> TS.ts_language_symbol_for_name language s len False)
pure [ TySynD name [] (ConT ''Token `AppT` LitT (StrTyLit datatypeName) `AppT` LitT (NumTyLit (fromIntegral tsSymbol))) ]
LeafType (DatatypeName datatypeName) Named -> do
con <- ctorForLeafType (DatatypeName datatypeName) typeParameterName
symbolMatchingInstance <- symbolMatchingInstance allSymbols name Named datatypeName
traversalInstances <- makeTraversalInstances (conT name)
pure
( generatedDatatype name [con] typeParameterName
: symbolMatchingInstance
<> traversalInstances)
where
-- Skip generating datatypes that have already been defined (overridden) in the module where the splice is running.
skipDefined m = do
isLocal <- lookupTypeName nameStr >>= maybe (pure False) isLocalName
if isLocal then pure [] else m
name = mkName nameStr
nameStr = toNameString (datatypeNameStatus datatype) (getDatatypeName (AST.Deserialize.datatypeName datatype))
deriveStockClause = DerivClause (Just StockStrategy) [ ConT ''Eq, ConT ''Ord, ConT ''Show, ConT ''Generic, ConT ''Generic1]
deriveAnyClassClause = DerivClause (Just AnyclassStrategy) [ConT ''TS.Unmarshal, ConT ''Traversable1 `AppT` VarT (mkName "someConstraint")]
deriveGN = DerivClause (Just NewtypeStrategy) [ConT ''TS.SymbolMatching]
generatedDatatype name cons typeParameterName = DataD [] name [PlainTV typeParameterName] Nothing cons [deriveStockClause, deriveAnyClassClause]
makeTraversalInstances :: TypeQ -> Q [Dec]
makeTraversalInstances ty =
[d|
instance Foldable $ty where
foldMap = foldMapDefault1
instance Functor $ty where
fmap = fmapDefault1
instance Traversable $ty where
traverse = traverseDefault1
|]
makeHasFieldInstance :: TypeQ -> TypeQ -> ExpQ -> Q [Dec]
makeHasFieldInstance ty param elim =
[d|instance HasField "ann" $(ty `appT` param) $param where
getField = TS.gann . $elim |]
-- | Create TH-generated SymbolMatching instances for sums, products, leaves
symbolMatchingInstance :: [(String, Named)] -> Name -> Named -> String -> Q [Dec]
symbolMatchingInstance allSymbols name named str = do
let tsSymbols = elemIndices (str, named) allSymbols
names = intercalate ", " $ fmap (debugPrefix . (!!) allSymbols) tsSymbols
[d|instance TS.SymbolMatching $(conT name) where
matchedSymbols _ = tsSymbols
showFailure _ node = "expected " <> $(litE (stringL names))
<> " but got " <> if nodeSymbol node == 65535 then "ERROR" else genericIndex debugSymbolNames (nodeSymbol node)
<> " [" <> show r1 <> ", " <> show c1 <> "] -"
<> " [" <> show r2 <> ", " <> show c2 <> "]"
where TSPoint r1 c1 = nodeStartPoint node
TSPoint r2 c2 = nodeEndPoint node|]
-- | Prefix symbol names for debugging to disambiguate between Named and Anonymous nodes.
debugPrefix :: (String, Named) -> String
debugPrefix (name, Named) = name
debugPrefix (name, Anonymous) = "_" <> name
-- | Build Q Constructor for product types (nodes with fields)
ctorForProductType :: String -> Name -> Maybe Children -> [(String, Field)] -> Q Con
ctorForProductType constructorName typeParameterName children fields = ctorForTypes constructorName lists where
lists = annotation : fieldList <> childList
annotation = ("ann", varT typeParameterName)
fieldList = map (fmap toType) fields
childList = toList $ fmap toTypeChild children
toType (MkField required fieldTypes mult) =
let ftypes = fieldTypesToNestedSum fieldTypes `appT` varT typeParameterName
in case (required, mult) of
(Required, Multiple) -> appT (conT ''NonEmpty) ftypes
(Required, Single) -> ftypes
(Optional, Multiple) -> appT (conT ''[]) ftypes
(Optional, Single) -> appT (conT ''Maybe) ftypes
toTypeChild (MkChildren field) = ("extra_children", toType field)
-- | Build Q Constructor for leaf types (nodes with no fields or subtypes)
ctorForLeafType :: DatatypeName -> Name -> Q Con
ctorForLeafType (DatatypeName name) typeParameterName = ctorForTypes name
[ ("ann", varT typeParameterName) -- ann :: a
, ("text", conT ''Text) -- text :: Text
]
-- | Build Q Constructor for records
ctorForTypes :: String -> [(String, Q TH.Type)] -> Q Con
ctorForTypes constructorName types = recC (toName Named constructorName) recordFields where
recordFields = map (uncurry toVarBangType) types
toVarBangType str type' = TH.varBangType (mkName . toHaskellCamelCaseIdentifier $ str) (TH.bangType strictness type')
-- | Convert field types to Q types
fieldTypesToNestedSum :: NonEmpty AST.Deserialize.Type -> Q TH.Type
fieldTypesToNestedSum xs = go (toList xs)
where
combine lhs rhs = (conT ''(:+:) `appT` lhs) `appT` rhs -- (((((a :+: b) :+: c) :+: d)) :+: e) ((a :+: b) :+: (c :+: d))
convertToQType (MkType (DatatypeName n) named) = conT (toName named n)
go [x] = convertToQType x
go xs = let (l,r) = splitAt (length xs `div` 2) xs in combine (go l) (go r)
-- | Create bang required to build records
strictness :: BangQ
strictness = TH.bang noSourceUnpackedness noSourceStrictness
-- | Prepend "Anonymous" to named node when false, otherwise use regular toName
toName :: Named -> String -> Name
toName named str = mkName (toNameString named str)
toNameString :: Named -> String -> String
toNameString named str = prefix named <> toHaskellPascalCaseIdentifier str
where
prefix Anonymous = "Anonymous"
prefix Named = ""
-- | Get the 'Module', if any, for a given 'Name'.
moduleForName :: Name -> Maybe Module
moduleForName n = Module . PkgName <$> namePackage n <*> (ModName <$> nameModule n)
-- | Test whether the name is defined in the module where the splice is executed.
isLocalName :: Name -> Q Bool
isLocalName n = (moduleForName n ==) . Just <$> thisModule

View File

@ -0,0 +1,83 @@
{-# LANGUAGE DataKinds, DeriveAnyClass, DeriveGeneric, DuplicateRecordFields, TypeOperators #-}
module AST.Grammar.Examples () where
import Control.Effect.Reader
import Control.Monad.Fail
import qualified Data.ByteString as B
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import GHC.Generics ((:+:), Generic1)
import Numeric (readDec)
import Prelude hiding (fail)
import Source.Range
import AST.Token
import AST.Unmarshal
-- | An example of a sum-of-products datatype.
newtype Expr a = Expr ((If :+: Block :+: Var :+: Lit :+: Bin) a)
deriving (Generic1, Unmarshal)
instance SymbolMatching Expr where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Product with multiple fields.
data If a = If { ann :: a, condition :: Expr a, consequence :: Expr a, alternative :: Maybe (Expr a) }
deriving (Generic1, Unmarshal)
instance SymbolMatching If where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Single-field product.
data Block a = Block { ann :: a, body :: [Expr a] }
deriving (Generic1, Unmarshal)
instance SymbolMatching Block where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Leaf node.
data Var a = Var { ann :: a, text :: Text.Text }
deriving (Generic1, Unmarshal)
instance SymbolMatching Var where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Custom leaf node.
data Lit a = Lit { ann :: a, lit :: IntegerLit }
deriving (Generic1, Unmarshal)
instance SymbolMatching Lit where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Product with anonymous sum field.
data Bin a = Bin { ann :: a, lhs :: Expr a, op :: (AnonPlus :+: AnonTimes) a, rhs :: Expr a }
deriving (Generic1, Unmarshal)
instance SymbolMatching Bin where
matchedSymbols _ = []
showFailure _ _ = ""
-- | Anonymous leaf node.
type AnonPlus = Token "+" 0
-- | Anonymous leaf node.
type AnonTimes = Token "*" 1
newtype IntegerLit = IntegerLit Integer
instance UnmarshalAnn IntegerLit where
unmarshalAnn node = do
Range start end <- unmarshalAnn node
bytestring <- asks source
let drop = B.drop start
take = B.take (end - start)
slice = take . drop
str = Text.unpack (Text.decodeUtf8 (slice bytestring))
case readDec str of
(i, _):_ -> pure (IntegerLit i)
_ -> fail ("could not parse '" <> str <> "'")

View File

@ -0,0 +1,33 @@
{-# LANGUAGE TemplateHaskell #-}
module AST.Grammar.TH
( mkStaticallyKnownRuleGrammarData
) where
import Data.Ix (Ix)
import Data.List (mapAccumL)
import qualified Data.Set as Set
import Foreign.Ptr
import Language.Haskell.TH
import Language.Haskell.TH.Syntax
import TreeSitter.Symbol
import TreeSitter.Language (Language, languageSymbols)
-- | TemplateHaskell construction of a datatype for the referenced Language.
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData :: Name -> Ptr Language -> Q [Dec]
mkStaticallyKnownRuleGrammarData name language = do
symbols <- renameDups . map ((,) . fst <*> uncurry symbolToName) . (++ [(Regular, "ParseError")]) <$> runIO (languageSymbols language)
Module _ modName <- thisModule
let mkMatch symbolType str = match (conP (Name (OccName str) (NameQ modName)) []) (normalB [e|symbolType|]) []
datatype <- dataD (pure []) name [] Nothing (flip normalC [] . mkName . snd <$> symbols)
[ derivClause Nothing (map conT [ ''Bounded, ''Enum, ''Eq, ''Ix, ''Ord, ''Show ]) ]
symbolInstance <- [d|
instance Symbol $(conT name) where
symbolType = $(lamCaseE (uncurry mkMatch <$> symbols)) |]
pure (datatype : symbolInstance)
renameDups :: [(a, String)] -> [(a, String)]
renameDups = snd . mapAccumL go mempty
where go done (ty, name) = let name' = rename name in (Set.insert name' done, (ty, name'))
where rename name | name `Set.member` done = rename (name ++ "'")
| otherwise = name

View File

@ -0,0 +1,17 @@
{-# LANGUAGE DataKinds, DeriveGeneric, DeriveTraversable, KindSignatures #-}
module AST.Token
( Token(..)
) where
import GHC.Generics (Generic, Generic1)
import GHC.TypeLits (Symbol, Nat)
-- | An AST node representing a token, indexed by its name and numeric value.
--
-- For convenience, token types are typically used via type synonyms, e.g.:
--
-- @
-- type AnonymousPlus = Token "+" 123
-- @
newtype Token (symName :: Symbol) (symVal :: Nat) a = Token { ann :: a }
deriving (Eq, Foldable, Functor, Generic, Generic1, Ord, Show, Traversable)

View File

@ -0,0 +1,405 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module AST.Unmarshal
( parseByteString
, UnmarshalState(..)
, UnmarshalError(..)
, FieldName(..)
, Unmarshal(..)
, UnmarshalAnn(..)
, UnmarshalField(..)
, SymbolMatching(..)
, Match(..)
, hoist
, lookupSymbol
, unmarshalNode
, GHasAnn(..)
) where
import Control.Algebra (send)
import Control.Carrier.Reader hiding (asks)
import Control.Exception
import Control.Monad.IO.Class
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.Coerce
import Data.Foldable (toList)
import qualified Data.IntMap as IntMap
import Data.List.NonEmpty (NonEmpty (..))
import Data.Proxy
import qualified Data.Text as Text
import Data.Text.Encoding
import Data.Text.Encoding.Error (lenientDecode)
import Foreign.C.String
import Foreign.Marshal.Array
import Foreign.Marshal.Utils
import Foreign.Ptr
import Foreign.Storable
import GHC.Generics
import GHC.Records
import GHC.TypeLits
import Source.Loc
import Source.Span
import TreeSitter.Cursor as TS
import TreeSitter.Language as TS
import TreeSitter.Node as TS
import TreeSitter.Parser as TS
import AST.Token as TS
import TreeSitter.Tree as TS
asks :: Has (Reader r) sig m => (r -> r') -> m r'
asks f = send (Ask (pure . f))
{-# INLINE asks #-}
-- Parse source code and produce AST
parseByteString :: (Unmarshal t, UnmarshalAnn a) => Ptr TS.Language -> ByteString -> IO (Either String (t a))
parseByteString language bytestring = withParser language $ \ parser -> withParseTree parser bytestring $ \ treePtr ->
if treePtr == nullPtr then
pure (Left "error: didn't get a root node")
else
withRootNode treePtr $ \ rootPtr ->
withCursor (castPtr rootPtr) $ \ cursor ->
(Right <$> runReader (UnmarshalState bytestring cursor) (liftIO (peek rootPtr) >>= unmarshalNode))
`catch` (pure . Left . getUnmarshalError)
newtype UnmarshalError = UnmarshalError { getUnmarshalError :: String }
deriving (Show)
instance Exception UnmarshalError
data UnmarshalState = UnmarshalState
{ source :: {-# UNPACK #-} !ByteString
, cursor :: {-# UNPACK #-} !(Ptr Cursor)
}
type MatchM = ReaderC UnmarshalState IO
newtype Match t = Match
{ runMatch :: forall a . UnmarshalAnn a => Node -> MatchM (t a)
}
-- | A church-encoded binary tree with constant-time 'singleton', 'mempty', '<>', and 'fmap', and linear-time 'foldMap'.
newtype B a = B (forall r . (r -> r -> r) -> (a -> r) -> r -> r)
instance Functor B where
fmap f (B run) = B (\ fork leaf -> run fork (leaf . f))
{-# INLINE fmap #-}
a <$ B run = B (\ fork leaf -> run fork (leaf . const a))
{-# INLINE (<$) #-}
instance Semigroup (B a) where
B l <> B r = B (\ fork leaf nil -> fork (l fork leaf nil) (r fork leaf nil))
{-# INLINE (<>) #-}
instance Monoid (B a) where
mempty = B (\ _ _ nil -> nil)
{-# INLINE mempty #-}
instance Foldable B where
foldMap f (B run) = run (<>) f mempty
{-# INLINE foldMap #-}
singleton :: a -> B a
singleton a = B (\ _ leaf _ -> leaf a)
{-# INLINE singleton #-}
hoist :: (forall x . t x -> t' x) -> Match t -> Match t'
hoist f (Match run) = Match (fmap f . run)
{-# INLINE hoist #-}
lookupSymbol :: TSSymbol -> IntMap.IntMap a -> Maybe a
lookupSymbol sym map = IntMap.lookup (fromIntegral sym) map
{-# INLINE lookupSymbol #-}
-- | Unmarshal a node
unmarshalNode :: forall t a .
( UnmarshalAnn a
, Unmarshal t
)
=> Node
-> MatchM (t a)
unmarshalNode node = case lookupSymbol (nodeSymbol node) matchers' of
Just t -> runMatch t node
Nothing -> liftIO . throwIO . UnmarshalError $ showFailure (Proxy @t) node
{-# INLINE unmarshalNode #-}
-- | Unmarshalling is the process of iterating over tree-sitters parse trees using its tree cursor API and producing Haskell ASTs for the relevant nodes.
--
-- Datatypes which can be constructed from tree-sitter parse trees may use the default definition of 'matchers' providing that they have a suitable 'Generic1' instance.
class SymbolMatching t => Unmarshal t where
matchers' :: IntMap.IntMap (Match t)
matchers' = IntMap.fromList (toList matchers)
matchers :: B (Int, Match t)
default matchers :: (Generic1 t, GUnmarshal (Rep1 t)) => B (Int, Match t)
matchers = foldMap (singleton . (, match)) (matchedSymbols (Proxy @t))
where match = Match $ \ node -> do
cursor <- asks cursor
goto cursor (nodeTSNode node)
fmap to1 (gunmarshalNode node)
instance (Unmarshal f, Unmarshal g) => Unmarshal (f :+: g) where
matchers = fmap (fmap (hoist L1)) matchers <> fmap (fmap (hoist R1)) matchers
instance Unmarshal t => Unmarshal (Rec1 t) where
matchers = coerce (matchers @t)
instance (KnownNat n, KnownSymbol sym) => Unmarshal (Token sym n) where
matchers = singleton (fromIntegral (natVal (Proxy @n)), Match (fmap Token . unmarshalAnn))
-- | Unmarshal an annotation field.
--
-- Leaf nodes have 'Text.Text' fields, and leaves, anonymous leaves, and products all have parametric annotation fields. All of these fields are unmarshalled using the metadata of the node, e.g. its start/end bytes, without reference to any child nodes it may contain.
class UnmarshalAnn a where
unmarshalAnn
:: Node
-> MatchM a
instance UnmarshalAnn () where
unmarshalAnn _ = pure ()
instance UnmarshalAnn Text.Text where
unmarshalAnn node = do
range <- unmarshalAnn node
asks (decodeUtf8With lenientDecode . slice range . source)
-- | Instance for pairs of annotations
instance (UnmarshalAnn a, UnmarshalAnn b) => UnmarshalAnn (a,b) where
unmarshalAnn node = (,)
<$> unmarshalAnn @a node
<*> unmarshalAnn @b node
instance UnmarshalAnn Loc where
unmarshalAnn node = Loc
<$> unmarshalAnn @Range node
<*> unmarshalAnn @Span node
instance UnmarshalAnn Range where
unmarshalAnn node = do
let start = fromIntegral (nodeStartByte node)
end = fromIntegral (nodeEndByte node)
pure (Range start end)
instance UnmarshalAnn Span where
unmarshalAnn node = do
let spanStart = pointToPos (nodeStartPoint node)
spanEnd = pointToPos (nodeEndPoint node)
pure (Span spanStart spanEnd)
pointToPos :: TSPoint -> Pos
pointToPos (TSPoint line column) = Pos (fromIntegral line) (fromIntegral column)
-- | Optional/repeated fields occurring in product datatypes are wrapped in type constructors, e.g. 'Maybe', '[]', or 'NonEmpty', and thus can unmarshal zero or more nodes for the same field name.
class UnmarshalField t where
unmarshalField
:: ( Unmarshal f
, UnmarshalAnn a
)
=> String -- ^ datatype name
-> String -- ^ field name
-> [Node] -- ^ nodes
-> MatchM (t (f a))
instance UnmarshalField Maybe where
unmarshalField _ _ [] = pure Nothing
unmarshalField _ _ [x] = Just <$> unmarshalNode x
unmarshalField d f _ = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected zero or one nodes in field '" <> f <> "' but got multiple"
instance UnmarshalField [] where
unmarshalField d f (x:xs) = do
head' <- unmarshalNode x
tail' <- unmarshalField d f xs
pure $ head' : tail'
unmarshalField _ _ [] = pure []
instance UnmarshalField NonEmpty where
unmarshalField d f (x:xs) = do
head' <- unmarshalNode x
tail' <- unmarshalField d f xs
pure $ head' :| tail'
unmarshalField d f [] = liftIO . throwIO . UnmarshalError $ "type '" <> d <> "' expected one or more nodes in field '" <> f <> "' but got zero"
class SymbolMatching (a :: * -> *) where
matchedSymbols :: Proxy a -> [Int]
-- | Provide error message describing the node symbol vs. the symbols this can match
showFailure :: Proxy a -> Node -> String
instance SymbolMatching f => SymbolMatching (M1 i c f) where
matchedSymbols _ = matchedSymbols (Proxy @f)
showFailure _ = showFailure (Proxy @f)
instance SymbolMatching f => SymbolMatching (Rec1 f) where
matchedSymbols _ = matchedSymbols (Proxy @f)
showFailure _ = showFailure (Proxy @f)
instance (KnownNat n, KnownSymbol sym) => SymbolMatching (Token sym n) where
matchedSymbols _ = [fromIntegral (natVal (Proxy @n))]
showFailure _ _ = "expected " ++ symbolVal (Proxy @sym)
instance (SymbolMatching f, SymbolMatching g) => SymbolMatching (f :+: g) where
matchedSymbols _ = matchedSymbols (Proxy @f) <> matchedSymbols (Proxy @g)
showFailure _ = sep <$> showFailure (Proxy @f) <*> showFailure (Proxy @g)
sep :: String -> String -> String
sep a b = a ++ ". " ++ b
-- | Move the cursor to point at the passed 'TSNode'.
goto :: Ptr Cursor -> TSNode -> MatchM ()
goto cursor node = liftIO (with node (ts_tree_cursor_reset_p cursor))
type Fields = [(FieldName, Node)]
-- | Return the fields remaining in the current branch, represented as 'Map.Map' of 'FieldName's to their corresponding 'Node's.
getFields :: Ptr Cursor -> Node -> MatchM Fields
getFields cursor node
| maxCount == 0 = pure []
| otherwise = do
nodes <- liftIO . allocaArray maxCount $ \ ptr -> do
actualCount <- ts_tree_cursor_copy_child_nodes cursor ptr
peekArray (fromIntegral actualCount) ptr
traverse (\ node -> (, node) <$> getFieldName node) nodes
where
maxCount = fromIntegral (nodeChildCount node)
getFieldName node
| nodeFieldName node == nullPtr = pure (FieldName "extraChildren")
| otherwise = FieldName . toHaskellCamelCaseIdentifier <$> liftIO (peekCString (nodeFieldName node))
lookupField :: FieldName -> Fields -> [Node]
lookupField k = map snd . filter ((== k) . fst)
-- | Return a 'ByteString' that contains a slice of the given 'ByteString'.
slice :: Range -> ByteString -> ByteString
slice (Range start end) = take . drop
where drop = B.drop start
take = B.take (end - start)
newtype FieldName = FieldName { getFieldName :: String }
deriving (Eq, Ord, Show)
-- | Generic construction of ASTs from a 'Map.Map' of named fields.
--
-- Product types (specifically, record types) are constructed by looking up the node for each corresponding field name in the map, moving the cursor to it, and then invoking 'unmarshalNode' to construct the value for that field. Leaf types are constructed as a special case of product types.
--
-- Sum types are constructed by using the current nodes symbol to select the corresponding constructor deterministically.
class GUnmarshal f where
gunmarshalNode
:: UnmarshalAnn a
=> Node
-> MatchM (f a)
instance (Datatype d, GUnmarshalData f) => GUnmarshal (M1 D d f) where
gunmarshalNode = go (gunmarshalNode' (datatypeName @d undefined)) where
go :: (Node -> MatchM (f a)) -> Node -> MatchM (M1 i c f a)
go = coerce
class GUnmarshalData f where
gunmarshalNode'
:: UnmarshalAnn a
=> String
-> Node
-> MatchM (f a)
instance GUnmarshalData f => GUnmarshalData (M1 i c f) where
gunmarshalNode' = go gunmarshalNode' where
go :: (String -> Node -> MatchM (f a)) -> String -> Node -> MatchM (M1 i c f a)
go = coerce
-- For anonymous leaf nodes:
instance GUnmarshalData U1 where
gunmarshalNode' _ _ = pure U1
-- For unary products:
instance UnmarshalAnn k => GUnmarshalData (K1 c k) where
gunmarshalNode' _ = go unmarshalAnn where
go :: (Node -> MatchM k) -> Node -> MatchM (K1 c k a)
go = coerce
-- For anonymous leaf nodes
instance GUnmarshalData Par1 where
gunmarshalNode' _ = go unmarshalAnn where
go :: (Node -> MatchM a) -> Node -> MatchM (Par1 a)
go = coerce
instance Unmarshal t => GUnmarshalData (Rec1 t) where
gunmarshalNode' _ = go unmarshalNode where
go :: (Node -> MatchM (t a)) -> Node -> MatchM (Rec1 t a)
go = coerce
-- For product datatypes:
instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalData (f :*: g) where
gunmarshalNode' datatypeName node = asks cursor >>= flip getFields node >>= gunmarshalProductNode @(f :*: g) datatypeName node
-- | Generically unmarshal products
class GUnmarshalProduct f where
gunmarshalProductNode
:: UnmarshalAnn a
=> String
-> Node
-> Fields
-> MatchM (f a)
-- Product structure
instance (GUnmarshalProduct f, GUnmarshalProduct g) => GUnmarshalProduct (f :*: g) where
gunmarshalProductNode datatypeName node fields = (:*:)
<$> gunmarshalProductNode @f datatypeName node fields
<*> gunmarshalProductNode @g datatypeName node fields
-- Contents of product types (ie., the leaves of the product tree)
instance UnmarshalAnn k => GUnmarshalProduct (M1 S c (K1 i k)) where
gunmarshalProductNode _ node _ = go unmarshalAnn node where
go :: (Node -> MatchM k) -> Node -> MatchM (M1 S c (K1 i k) a)
go = coerce
instance GUnmarshalProduct (M1 S c Par1) where
gunmarshalProductNode _ node _ = go unmarshalAnn node where
go :: (Node -> MatchM a) -> Node -> MatchM (M1 S c Par1 a)
go = coerce
instance (UnmarshalField f, Unmarshal g, Selector c) => GUnmarshalProduct (M1 S c (f :.: g)) where
gunmarshalProductNode datatypeName _ = go (unmarshalField datatypeName fieldName . lookupField (FieldName fieldName)) where
go :: (Fields -> MatchM (f (g a))) -> Fields -> MatchM (M1 S c (f :.: g) a)
go = coerce
fieldName = selName @c undefined
instance (Unmarshal t, Selector c) => GUnmarshalProduct (M1 S c (Rec1 t)) where
gunmarshalProductNode datatypeName _ fields =
case lookupField (FieldName fieldName) fields of
[] -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node '" <> fieldName <> "' but didn't get one"
[x] -> go unmarshalNode x where
go :: (Node -> MatchM (t a)) -> Node -> MatchM (M1 S c (Rec1 t) a)
go = coerce
_ -> liftIO . throwIO . UnmarshalError $ "type '" <> datatypeName <> "' expected a node but got multiple"
where
fieldName = selName @c undefined
class GHasAnn a t where
gann :: t a -> a
instance GHasAnn a f => GHasAnn a (M1 i c f) where
gann = gann . unM1
instance (GHasAnn a l, GHasAnn a r) => GHasAnn a (l :+: r) where
gann (L1 l) = gann l
gann (R1 r) = gann r
instance {-# OVERLAPPABLE #-} HasField "ann" (t a) a => GHasAnn a t where
gann = getField @"ann"

View File

@ -49,12 +49,13 @@ library
base >= 4.13 && < 5
, fused-effects ^>= 1.0
, fused-syntax
, hashable
, parsers ^>= 0.12.10
, pathtype ^>= 0.8.1
, prettyprinter >= 1.2.1 && < 2
, prettyprinter-ansi-terminal ^>= 1.1.1
, semantic-analysis ^>= 0
, semantic-source ^>= 0.0.1
, semantic-source ^>= 0.0.2
, text ^>= 1.2.3.1
, trifecta >= 2 && < 2.2
, unordered-containers ^>= 0.2.10
@ -69,7 +70,7 @@ test-suite test
base
, semantic-analysis
, semantic-core
, semantic-source ^>= 0.0.1
, semantic-source
, fused-effects
, fused-syntax
, hedgehog ^>= 1

View File

@ -1,25 +1,34 @@
{-# LANGUAGE DeriveGeneric, DeriveTraversable, GeneralizedNewtypeDeriving, LambdaCase, OverloadedLists #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
module Core.Name
( module Analysis.Name
( module Analysis.Functor.Named
, reservedNames
, isSimpleCharacter
, needsQuotation
) where
import Analysis.Name
import Analysis.Functor.Named
import qualified Data.Char as Char
import Data.Hashable
import Data.HashSet (HashSet)
import qualified Data.HashSet as HashSet
import Data.Text as Text (any, unpack)
import Data.String
import Data.Text as Text (any)
reservedNames :: HashSet String
reservedNames :: (Eq s, IsString s, Hashable s) => HashSet s
reservedNames = [ "#true", "#false", "if", "then", "else"
, "#unit", "load", "rec", "#record"]
-- | Returns true if any character would require quotation or if the
-- name conflicts with a Core primitive.
needsQuotation :: Name -> Bool
needsQuotation (Name u) = HashSet.member (unpack u) reservedNames || Text.any (not . isSimpleCharacter) u
needsQuotation n
| isGenerated n = False
| otherwise = HashSet.member n reservedNames || Text.any (not . isSimpleCharacter) (formatName n)
-- | A simple character is, loosely defined, a character that is compatible
-- with identifiers in most ASCII-oriented programming languages. This is defined

View File

@ -1,4 +1,6 @@
{-# LANGUAGE FlexibleContexts, GeneralizedNewtypeDeriving, TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeOperators #-}
module Core.Parser
( core
, lit
@ -14,7 +16,7 @@ import Control.Applicative
import Control.Monad
import Core.Core ((:<-) (..), Core)
import qualified Core.Core as Core
import Core.Name
import Core.Name hiding (name)
import qualified Data.Char as Char
import Data.Foldable (foldl')
import Data.Function

View File

@ -10,7 +10,7 @@ module Core.Pretty
import Analysis.File
import Core.Core
import Core.Name
import Core.Name hiding (name)
import Data.Foldable (toList)
import Data.Text.Prettyprint.Doc
import qualified Data.Text.Prettyprint.Doc.Render.String as Pretty
@ -43,7 +43,9 @@ primitive = keyword . mappend "#"
data Style = Unicode | Ascii
name :: Name -> AnsiDoc
name (Name n) = if needsQuotation (Name n) then enclose (symbol "#{") (symbol "}") (pretty n) else pretty n
name n
| needsQuotation n = enclose (symbol "#{") (symbol "}") (pretty (formatName n))
| otherwise = pretty (formatName n)
prettyCore :: Style -> Term Core Name -> AnsiDoc
prettyCore style = unPrec . go . fmap name

View File

@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Generators
( literal
@ -18,20 +19,21 @@ import qualified Hedgehog.Range as Range
import Control.Algebra
import qualified Core.Core as Core
import Core.Name
import Core.Name (Name, Named)
import qualified Core.Name as Name
-- The 'prune' call here ensures that we don't spend all our time just generating
-- fresh names for variables, since the length of variable names is not an
-- interesting property as they parse regardless.
name :: MonadGen m => m (Named Name)
name = Gen.prune (named' <$> names) where
names = Name <$> Gen.text (Range.linear 1 10) Gen.lower
name = Gen.prune (Name.named' <$> names) where
names = Name.name <$> Gen.text (Range.linear 1 10) Gen.lower
boolean :: (Has Core.Core sig t, MonadGen m) => m (t Name)
boolean = Core.bool <$> Gen.bool
variable :: (Applicative t, MonadGen m) => m (t Name)
variable = pure . namedValue <$> name
variable = pure . Name.namedValue <$> name
ifthenelse :: (Has Core.Core sig t, MonadGen m) => m (t Name) -> m (t Name)
ifthenelse bod = Gen.subterm3 boolean bod bod Core.if'
@ -51,7 +53,7 @@ lambda bod = do
Gen.subterm bod (Core.lam arg)
record :: (Has Core.Core sig t, MonadGen m) => m (t Name) -> m (t Name)
record bod = Core.record <$> Gen.list (Range.linear 0 5) ((,) . namedValue <$> name <*> bod)
record bod = Core.record <$> Gen.list (Range.linear 0 5) ((,) . Name.namedValue <$> name <*> bod)
atoms :: (Has Core.Core sig t, MonadGen m) => [m (t Name)]
atoms = [variable, pure Core.unit, boolean, Core.string <$> Gen.text (Range.linear 1 10) Gen.lower]
@ -69,7 +71,7 @@ expr = Gen.recursive Gen.choice atoms
, Gen.subterm3 expr expr expr Core.if'
, Gen.subterm expr Core.load
, record expr
, Gen.subtermM expr (\ x -> (x Core....) . namedValue <$> name)
, Gen.subtermM expr (\ x -> (x Core..?) . namedValue <$> name)
, Gen.subtermM expr (\ x -> (x Core....) . Name.namedValue <$> name)
, Gen.subtermM expr (\ x -> (x Core..?) . Name.namedValue <$> name)
, Gen.subterm2 expr expr (Core..=)
]

View File

@ -24,9 +24,12 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.1
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-go ^>= 0.4.1
@ -50,5 +53,7 @@ library
import: haskell
exposed-modules:
Language.Go
Language.Go.AST
Language.Go.Grammar
Language.Go.Tags
hs-source-dirs: src

View File

@ -6,11 +6,11 @@ module Language.Go
import Data.Proxy
import qualified Language.Go.AST as Go
import qualified Language.Go.Tags as GoTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Go (tree_sitter_go)
import qualified TreeSitter.Go.AST as Go
import qualified TreeSitter.Unmarshal as TS
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Go.SourceFile a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Go.AST
( module Language.Go.AST
) where
import Prelude hiding (False, Float, Integer, Rational, String, True)
import AST.GenerateSyntax
import qualified Language.Go.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_go "../../../vendor/tree-sitter-go/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Go.Grammar
( tree_sitter_go
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Go (tree_sitter_go)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-go/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_go

View File

@ -9,16 +9,16 @@ module Language.Go.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.Text as Text
import GHC.Generics
import qualified Language.Go.AST as Go
import Source.Loc
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Go.AST as Go
import TreeSitter.Token
class ToTags t where
tags
@ -30,8 +30,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -72,12 +71,11 @@ instance ToTags (Token sym n) where tags _ = pure ()
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
yieldTag :: (Has (Reader Source) sig m, Has (Writer Tags.Tags) sig m) => Text -> Kind -> Loc -> Range -> m ()
yieldTag name kind loc range = do

1
semantic-go/vendor/tree-sitter-go vendored Submodule

@ -0,0 +1 @@
Subproject commit 689cc8fbdc0613d267434f221af85aff91a31f8c

View File

@ -21,12 +21,17 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Language.Java
Language.Java.AST
Language.Java.Grammar
Language.Java.Tags
build-depends:
base >= 4.13 && < 5
, fused-effects ^>= 1.0
, semantic-source ^>= 0.0.1
, semantic-ast
, semantic-codegen
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, tree-sitter ^>= 0.8
, tree-sitter-java ^>= 0.6.1
hs-source-dirs: src
@ -43,3 +48,4 @@ library
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
-Wno-missing-deriving-strategies

View File

@ -1,15 +1,15 @@
-- | Semantic functionality for Java programs.
module Language.Java
( Term(..)
, TreeSitter.Java.tree_sitter_java
, Language.Java.Grammar.tree_sitter_java
) where
import Data.Proxy
import qualified Language.Java.AST as Java
import qualified Language.Java.Tags as JavaTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Java (tree_sitter_java)
import qualified TreeSitter.Java.AST as Java
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Java.Grammar (tree_sitter_java)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: Java.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Java.AST
( module Language.Java.AST
) where
import AST.GenerateSyntax
import qualified Language.Java.Grammar as Grammar
import AST.Token
astDeclarationsForLanguage Grammar.tree_sitter_java "../../../vendor/tree-sitter-java/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Java.Grammar
( tree_sitter_java
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Java (tree_sitter_java)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-java/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_java

View File

@ -8,16 +8,17 @@ module Language.Java.Tags
( ToTags(..)
) where
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import GHC.Generics
import GHC.Generics ((:+:)(..))
import qualified Language.Java.AST as Java
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Java.AST as Java
import TreeSitter.Token
class ToTags t where
tags
@ -29,8 +30,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -80,12 +80,11 @@ instance ToTags Java.MethodInvocation where
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
instance ToTags Java.AnnotatedType
instance ToTags Java.Annotation
@ -140,7 +139,7 @@ instance ToTags Java.FieldAccess
instance ToTags Java.FieldDeclaration
instance ToTags Java.FinallyClause
instance ToTags Java.FloatingPointType
instance ToTags Java.ForInit
-- instance ToTags Java.ForInit
instance ToTags Java.ForStatement
instance ToTags Java.FormalParameter
instance ToTags Java.FormalParameters
@ -160,7 +159,7 @@ instance ToTags Java.LabeledStatement
instance ToTags Java.LambdaExpression
instance ToTags Java.Literal
instance ToTags Java.LocalVariableDeclaration
instance ToTags Java.LocalVariableDeclarationStatement
-- instance ToTags Java.LocalVariableDeclarationStatement
instance ToTags Java.MarkerAnnotation
-- instance ToTags Java.MethodDeclaration
-- instance ToTags Java.MethodInvocation

@ -0,0 +1 @@
Subproject commit afc4cec799f6594390aeb0ca5e16ec89e73d488e

View File

@ -21,9 +21,13 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Language.JSON
Language.JSON.AST
Language.JSON.Grammar
build-depends:
base >= 4.13 && < 5
, semantic-codegen
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, tree-sitter ^>= 0.8
, tree-sitter-json ^>= 0.6
hs-source-dirs: src
@ -40,3 +44,5 @@ library
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies

View File

@ -1,14 +1,14 @@
-- | Semantic functionality for JSON programs.
module Language.JSON
( Term(..)
, TreeSitter.JSON.tree_sitter_json
, Language.JSON.Grammar.tree_sitter_json
) where
import Data.Proxy
import qualified Language.JSON.AST as JSON
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.JSON (tree_sitter_json)
import qualified TreeSitter.JSON.AST as JSON
import qualified TreeSitter.Unmarshal as TS
import qualified Language.JSON.Grammar (tree_sitter_json)
import qualified AST.Unmarshal as TS
newtype Term a = Term { getTerm :: JSON.Document a }

View File

@ -0,0 +1,20 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.JSON.AST
( module Language.JSON.AST
) where
import Prelude hiding (String)
import AST.GenerateSyntax
import qualified Language.JSON.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_json "../../../vendor/tree-sitter-json/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.JSON.Grammar
( tree_sitter_json
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.JSON (tree_sitter_json)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-json/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_json

@ -0,0 +1 @@
Subproject commit 7b6a33f300e3e88c3017e0a9d88c77b50ea6d149

View File

@ -0,0 +1,5 @@
# Revision history for semantic-parse
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

21
semantic-parse/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,28 +1,25 @@
# semantic-ast
# semantic-parse
This package has two goals:
1. Develop a library that will produce ASTs;
2. Provide a command line tool that will output ASTs in supported formats.
This package provides a command line tool that will output ASTs in supported formats.
#### CLI
To output ASTs, run the `semantic-ast` command, specifying two mandatory options: 1) the format you'd like to return (ex., `Show`, `JSON`, etc.) and 2) the option specifying whether the source code will be passed in directly via command line (using `--sourceString`) or via providing the file path `--sourceFile`.
To output ASTs, run the `semantic-parse` command, specifying two mandatory options: 1) the format you'd like to return (ex., `Show`, `JSON`, etc.) and 2) the option specifying whether the source code will be passed in directly via command line (using `--sourceString`) or via providing the file path `--sourceFile`.
Filepath:
```
semantic-ast --format [FORMAT] --sourceFile [FILEPATH]
semantic-parse --format [FORMAT] --sourceFile [FILEPATH]
```
Source string:
```
semantic-ast --format [FORMAT] --sourceString [SOURCE]
semantic-parse --format [FORMAT] --sourceString [SOURCE]
```
An example command is:
```
semantic-ast -- --format Show --sourceString "a"
semantic-parse -- --format Show --sourceString "a"
```
This will generate an AST

2
semantic-parse/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -1,24 +1,27 @@
{-# LANGUAGE TypeApplications #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}
module Main (main) where
import TreeSitter.Unmarshal
import qualified TreeSitter.Python.AST as AST
import qualified TreeSitter.Python as Python
import AST.Unmarshal
import qualified Language.Python.AST as AST
import qualified Language.Python.Grammar as Python
import Source.Range
import Source.Span
import Data.Aeson (toJSON)
import Data.ByteString.Char8
import Data.ByteString (readFile)
import Options.Applicative hiding (style)
import Text.Pretty.Simple (pPrint, pPrintNoColor)
import Data.Foldable (traverse_)
import Control.Monad ((>=>))
import Marshal.JSON (marshal)
import Data.ByteString.Lazy.Char8 (putStrLn)
import Data.Aeson.Encode.Pretty (encodePretty)
data SemanticAST = SemanticAST
{ format :: Format
, noColor :: Bool
, source :: Either [FilePath] String
{ _format :: Format
, _noColor :: Bool
, _source :: Either [FilePath] String
}
-- Usage: semantic-ast --format ARG [--no-color] (--sourceString STRING | FILEPATHS…)
@ -51,13 +54,13 @@ generateAST (SemanticAST format noColor source) =
Left filePaths -> traverse Data.ByteString.readFile filePaths
Right source -> pure [Data.ByteString.Char8.pack source]
go = ast >=> display
ast = parseByteString @AST.Module @(Range, Span) Python.tree_sitter_python
ast = parseByteString @AST.Module @(Range, Span) Python.tree_sitter_python -- TODO: generalize for all languages
display = case format of
Json -> Data.ByteString.Lazy.Char8.putStrLn . encodePretty . either toJSON (marshal . fmap (const ())) -- TODO: replacing range and span annotations with () for which there is a ToJSON instance for now, deal with this later
Show -> print
Pretty | noColor -> pPrintNoColor
| otherwise -> pPrint
-- need AST in scope for case format and ..
opts :: ParserInfo SemanticAST
opts = info (parseAST <**> helper)
@ -68,6 +71,5 @@ opts = info (parseAST <**> helper)
-- TODO: Define formats for json, sexpression, etc.
data Format = Show
| Pretty
| Json
deriving (Read)
-- bool field would break Read

View File

@ -0,0 +1,57 @@
cabal-version: 2.4
-- Initial package description 'semantic-ast.cabal' generated by 'cabal
-- init'. For further documentation, see
-- http://haskell.org/cabal/users-guide/
name: semantic-parse
version: 0.1.0.0
-- synopsis:
-- description:
-- bug-reports:
license: MIT
license-file: LICENSE
author: The Semantic Authors
maintainer: opensource+semantic@github.com
copyright: (c) 2019 GitHub, Inc.
category: Language
extra-source-files: CHANGELOG.md
tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
ghc-options:
-Weverything
-Wno-missing-local-signatures
-Wno-missing-import-lists
-Wno-implicit-prelude
-Wno-safe
-Wno-unsafe
-Wno-name-shadowing
-Wno-monomorphism-restriction
-Wno-missed-specialisations
-Wno-all-missed-specialisations
-Wno-star-is-type
if (impl(ghc >= 8.8))
ghc-options: -Wno-missing-deriving-strategies
executable semantic-parse
import: haskell
main-is: Main.hs
-- other-modules:
-- other-extensions:
build-depends: base
, semantic-ast
, tree-sitter
, semantic-source
, tree-sitter-python ^>= 0.8.1
, bytestring
, optparse-applicative
, pretty-simple
, aeson
, bytestring
, aeson-pretty
, semantic-python
, text
hs-source-dirs: app
default-language: Haskell2010

View File

@ -21,18 +21,22 @@ tested-with: GHC == 8.6.5
common haskell
default-language: Haskell2010
build-depends: base ^>= 4.13
, fused-effects ^>= 1.0
, fused-effects ^>= 1.0.0.1
, fused-syntax
, parsers ^>= 0.12.10
, semantic-analysis ^>= 0
, semantic-ast
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.1
, semantic-codegen
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, semantic-scope-graph ^>= 0.0
, semilattices ^>= 0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-python ^>= 0.8.1
, containers
ghc-options:
-Weverything
-Wno-missing-local-signatures
@ -53,12 +57,15 @@ library
import: haskell
exposed-modules:
Language.Python
Language.Python.AST
Language.Python.Core
Language.Python.Grammar
Language.Python.Failure
Language.Python.Patterns
Language.Python.ScopeGraph
Language.Python.Tags
hs-source-dirs: src
build-depends: lens ^>= 4.18
test-suite compiling
import: haskell
@ -79,6 +86,7 @@ test-suite compiling
, process ^>= 1.6.5
, resourcet ^>= 1.2.2
, semantic-analysis ^>= 0
, semantic-codegen
, streaming ^>= 0.2.2
, streaming-process ^>= 0.1
, streaming-bytestring ^>= 0.1.6
@ -99,6 +107,7 @@ test-suite graphing
build-depends: base
, semantic-python
, semantic-codegen
, semantic-scope-graph
, bytestring
, pathtype

View File

@ -1,17 +1,17 @@
-- | Semantic functionality for Python programs.
module Language.Python
( Term(..)
, TreeSitter.Python.tree_sitter_python
, Language.Python.Grammar.tree_sitter_python
) where
import qualified AST.Unmarshal as TS
import Data.Proxy
import qualified Language.Python.AST as Py
import qualified Language.Python.Grammar (tree_sitter_python)
import Language.Python.ScopeGraph
import qualified Language.Python.Tags as PyTags
import ScopeGraph.Convert
import Scope.Graph.Convert
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python (tree_sitter_python)
import qualified TreeSitter.Python.AST as Py
import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Py.Module a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Python.AST
( module Language.Python.AST
) where
import Prelude hiding (False, Float, Integer, String, True)
import AST.GenerateSyntax
import qualified Language.Python.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_python "../../../vendor/tree-sitter-python/src/node-types.json"

View File

@ -33,12 +33,12 @@ import Data.Function
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe
import GHC.Records
import qualified Language.Python.AST as Py
import Language.Python.Failure
import Language.Python.Patterns
import Source.Span (Span)
import Syntax.Stack (Stack (..))
import qualified Syntax.Stack as Stack
import qualified TreeSitter.Python.AST as Py
-- | Keeps track of the current scope's bindings (so that we can, when
-- compiling a class or module, return the list of bound variables as
@ -197,7 +197,7 @@ instance Compile Py.Call where
compile it _ _ = pure . invariantViolated $ "can't compile Call node with generator expression: " <> show it
instance Compile Py.ClassDefinition where
compile it@Py.ClassDefinition { body = pybody, name = Py.Identifier _ann (Name -> n) } cc next = do
compile it@Py.ClassDefinition { body = pybody, name = Py.Identifier _ann (Name.name -> n) } cc next = do
let buildTypeCall _ = do
bindings <- asks @Bindings (toList . unBindings)
let buildName n = (n, pure n)
@ -205,7 +205,7 @@ instance Compile Py.ClassDefinition where
typefn = prelude ["type"]
object = prelude ["object"]
pure (typefn $$ Core.string (coerce n) $$ object $$ contents)
pure (typefn $$ Core.string (formatName n) $$ object $$ contents)
body <- compile pybody buildTypeCall next
let coreName = Name.named' n
@ -244,8 +244,8 @@ instance Compile Py.DottedName where
compile it@Py.DottedName
{ extraChildren = Py.Identifier { text } :| rest
} cc _next = do
let aggregate Py.Identifier { text = inner } x = x ... Name inner
composite = foldr aggregate (pure (Name text)) rest
let aggregate Py.Identifier { text = inner } x = x ... Name.name inner
composite = foldr aggregate (pure (Name.name text)) rest
locate it composite & cc
@ -287,21 +287,21 @@ instance Compile Py.FunctionDefinition where
let parameters' = catMaybes parameterMs
body' <- compile body pure next
-- Build a lambda.
let located = locate it (rec (Name.named' (Name name)) (lams parameters' body'))
let located = locate it (rec (Name.named' (Name.name name)) (lams parameters' body'))
-- Give it a name (below), then augment the current continuation
-- with the new name (with 'def'), so that calling contexts know
-- that we have built an exportable definition.
assigning located <$> local (def (Name name)) (cc next)
where param (Py.Parameter (Prj (Py.Identifier _pann pname))) = Just . named' . Name $ pname
assigning located <$> local (def (Name.name name)) (cc next)
where param (Py.Parameter (Prj (Py.Identifier _pann pname))) = Just . named' . Name.name $ pname
param _ = Nothing
assigning item f = (Name.named' (Name name) :<- item) >>>= f
assigning item f = (Name.named' (Name.name name) :<- item) >>>= f
instance Compile Py.FutureImportStatement
instance Compile Py.GeneratorExpression
instance Compile Py.GlobalStatement
instance Compile Py.Identifier where
compile Py.Identifier { text } cc _ = cc . pure . Name $ text
compile Py.Identifier { text } cc _ = cc . pure . Name.name $ text
instance Compile Py.IfStatement where
compile it@Py.IfStatement{ condition, consequence, alternative} cc next =
@ -323,7 +323,7 @@ instance Compile Py.Lambda where
, parameters
} cc next = do
let unparams (Py.LambdaParameters _ ps) = toList ps
unparam (Py.Parameter (Prj (Py.Identifier _pann pname))) = Just . named' . Name $ pname
unparam (Py.Parameter (Prj (Py.Identifier _pann pname))) = Just . named' . Name.name $ pname
unparam _ = Nothing
body' <- compile body cc next
let params = maybe [] unparams parameters

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Python.Grammar
( tree_sitter_python
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Python (tree_sitter_python)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-python/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_python

View File

@ -9,17 +9,16 @@ module Language.Python.Patterns
) where
import AST.Element
import Data.Coerce
import Data.Text (Text)
import qualified TreeSitter.Python.AST as Py
import qualified Analysis.Name
import qualified Language.Python.AST as Py
-- | Useful pattern synonym for extracting a single identifier from
-- a Python ExpressionList. Easier than pattern-matching every time.
-- TODO: when this is finished, we won't need this pattern, as we'll
-- handle ExpressionLists the smart way every time.
pattern SingleIdentifier :: Coercible t Text => t -> Py.ExpressionList a
pattern SingleIdentifier name <- Py.ExpressionList
pattern SingleIdentifier :: Analysis.Name.Name -> Py.ExpressionList a
pattern SingleIdentifier n <- Py.ExpressionList
{ Py.extraChildren =
[ Py.Expression (Prj (Py.PrimaryExpression (Prj Py.Identifier { text = coerce -> name })))
[ Py.Expression (Prj (Py.PrimaryExpression (Prj Py.Identifier { text = Analysis.Name.name -> n })))
]
}

View File

@ -5,6 +5,7 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE PatternSynonyms #-}
@ -15,28 +16,32 @@
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Language.Python.ScopeGraph
( scopeGraphModule
) where
import Control.Algebra (Algebra (..), handleCoercible)
import Control.Effect.Sketch
import qualified Analysis.Name as Name
import AST.Element
import Control.Effect.ScopeGraph
import qualified Control.Effect.ScopeGraph.Properties.Declaration as Props
import qualified Control.Effect.ScopeGraph.Properties.Function as Props
import qualified Control.Effect.ScopeGraph.Properties.Reference as Props
import Control.Lens (set, (^.))
import Data.Foldable
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe
import Data.Monoid
import Data.Name
import GHC.Generics
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import Data.Traversable
import GHC.Records
import GHC.TypeLits
import qualified Language.Python.AST as Py
import Language.Python.Patterns
import ScopeGraph.Convert (Result (..), complete, todo)
import Source.Loc
import qualified TreeSitter.Python.AST as Py
-- This orphan instance will perish once it lands in fused-effects.
instance Algebra sig m => Algebra sig (Ap m) where
alg = Ap . alg . handleCoercible
import Scope.Graph.Convert (Result (..), complete, todo)
import Source.Loc (Loc)
import Source.Span (Span, span_)
-- This typeclass is internal-only, though it shares the same interface
-- as the one defined in semantic-scope-graph. The somewhat-unconventional
@ -44,7 +49,7 @@ instance Algebra sig m => Algebra sig (Ap m) where
-- every single Python AST type.
class (forall a . Show a => Show (t a)) => ToScopeGraph t where
scopeGraph ::
( Has (Sketch Name) sig m
( ScopeGraphEff sig m
, Monoid (m Result)
)
=> t Loc
@ -56,7 +61,7 @@ instance (ToScopeGraph l, ToScopeGraph r) => ToScopeGraph (l :+: r) where
onField ::
forall (field :: Symbol) syn sig m r .
( Has (Sketch Name) sig m
( ScopeGraphEff sig m
, HasField field (r Loc) (syn Loc)
, ToScopeGraph syn
, Monoid (m Result)
@ -70,7 +75,7 @@ onField
onChildren ::
( Traversable t
, ToScopeGraph syn
, Has (Sketch Name) sig m
, ScopeGraphEff sig m
, HasField "extraChildren" (r Loc) (t (syn Loc))
, Monoid (m Result)
)
@ -81,14 +86,21 @@ onChildren
. traverse scopeGraph
. getField @"extraChildren"
scopeGraphModule :: Has (Sketch Name) sig m => Py.Module Loc -> m Result
scopeGraphModule :: ScopeGraphEff sig m => Py.Module Loc -> m Result
scopeGraphModule = getAp . scopeGraph
instance ToScopeGraph Py.AssertStatement where scopeGraph = onChildren
instance ToScopeGraph Py.Assignment where
scopeGraph (Py.Assignment _ (SingleIdentifier t) _val _typ) = complete <* declare @Name t DeclProperties
scopeGraph x = todo x
scopeGraph (Py.Assignment ann (SingleIdentifier t) val _typ) = do
declare t Props.Declaration
{ Props.kind = ScopeGraph.Assignment
, Props.relation = ScopeGraph.Default
, Props.associatedScope = Nothing
, Props.span = ann^.span_
}
maybe complete scopeGraph val
scopeGraph x = todo x
instance ToScopeGraph Py.Await where
scopeGraph (Py.Await _ a) = scopeGraph a
@ -107,7 +119,19 @@ instance ToScopeGraph Py.Block where scopeGraph = onChildren
instance ToScopeGraph Py.BreakStatement where scopeGraph = mempty
instance ToScopeGraph Py.Call where scopeGraph = todo
instance ToScopeGraph Py.Call where
scopeGraph Py.Call
{ function
, arguments = L1 Py.ArgumentList { extraChildren = args }
} = do
result <- scopeGraph function
let scopeGraphArg = \case
Prj expr -> scopeGraph @Py.Expression expr
other -> todo other
args <- traverse scopeGraphArg args
pure (result <> mconcat args)
scopeGraph it = todo it
instance ToScopeGraph Py.ClassDefinition where scopeGraph = todo
@ -156,15 +180,44 @@ instance ToScopeGraph Py.Float where scopeGraph = mempty
instance ToScopeGraph Py.ForStatement where scopeGraph = todo
instance ToScopeGraph Py.FunctionDefinition where scopeGraph = todo
instance ToScopeGraph Py.FunctionDefinition where
scopeGraph Py.FunctionDefinition
{ ann
, name = Py.Identifier _ann1 name
, parameters = Py.Parameters _ann2 parameters
, body
} = do
(_, associatedScope) <- declareFunction (Just $ Name.name name) Props.Function
{ Props.kind = ScopeGraph.Function
, Props.span = ann^.span_
}
withScope associatedScope $ do
let declProps = Props.Declaration
{ Props.kind = ScopeGraph.Parameter
, Props.relation = ScopeGraph.Default
, Props.associatedScope = Nothing
, Props.span = lowerBound
}
let param (Py.Parameter (Prj (Py.Identifier pann pname))) = Just (pann, Name.name pname)
param _ = Nothing
let parameterMs = fmap param parameters
if any isNothing parameterMs
then todo parameterMs
else do
let parameters' = catMaybes parameterMs
paramDeclarations <- for parameters' $ \(pos, parameter) ->
complete <* declare parameter (set span_ (pos^.span_) declProps)
bodyResult <- scopeGraph body
pure (mconcat paramDeclarations <> bodyResult)
instance ToScopeGraph Py.FutureImportStatement where scopeGraph = todo
instance ToScopeGraph Py.GeneratorExpression where scopeGraph = todo
instance ToScopeGraph Py.Identifier where
scopeGraph (Py.Identifier _ name) = do
reference @Name name name RefProperties
scopeGraph (Py.Identifier ann name) = do
let refProps = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (ann^.span_ :: Span)
newReference (Name.name name) refProps
complete
instance ToScopeGraph Py.IfStatement where
@ -177,9 +230,33 @@ instance ToScopeGraph Py.GlobalStatement where scopeGraph = todo
instance ToScopeGraph Py.Integer where scopeGraph = mempty
instance ToScopeGraph Py.ImportStatement where scopeGraph = todo
instance ToScopeGraph Py.ImportStatement where
scopeGraph (Py.ImportStatement _ ((R1 (Py.DottedName _ names@((Py.Identifier ann name) :| _))) :| [])) = do
let toName (Py.Identifier _ name) = Name.name name
newEdge ScopeGraph.Import (toName <$> names)
let referenceProps = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (ann^.span_ :: Span)
newReference (Name.name name) referenceProps
let pairs = zip (toList names) (tail $ toList names)
for_ pairs $ \pair -> do
case pair of
(scopeIdentifier, referenceIdentifier@(Py.Identifier ann2 _)) -> do
withScope (toName scopeIdentifier) $ do
let referenceProps = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (ann2^.span_ :: Span)
newReference (toName referenceIdentifier) referenceProps
complete
scopeGraph term = todo (show term)
instance ToScopeGraph Py.ImportFromStatement where
scopeGraph (Py.ImportFromStatement _ [] (L1 (Py.DottedName _ names)) (Just (Py.WildcardImport _ _))) = do
let toName (Py.Identifier _ name) = Name.name name
complete <* newEdge ScopeGraph.Import (toName <$> names)
scopeGraph impossibleTerm@(Py.ImportFromStatement _ [] (L1 (Py.DottedName _ _)) Nothing) =
todo impossibleTerm
scopeGraph term = todo term
instance ToScopeGraph Py.ImportFromStatement where scopeGraph = todo
instance ToScopeGraph Py.Lambda where scopeGraph = todo

View File

@ -10,19 +10,19 @@ module Language.Python.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import Control.Effect.Reader
import Control.Effect.Writer
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (listToMaybe)
import Data.Text as Text
import GHC.Generics
import qualified Language.Python.AST as Py
import Source.Loc
import Source.Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Python.AST as Py
import TreeSitter.Token
class ToTags t where
tags
@ -34,8 +34,7 @@ class ToTags t where
default tags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -50,8 +49,7 @@ instance ToTags (Token sym n) where tags _ = pure ()
keywordFunctionCall
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc -> Loc -> Range -> Text -> m ()
keywordFunctionCall t loc range name = yieldTag name Function loc range Nothing >> gtags t
@ -127,12 +125,11 @@ docComment _ _ = Nothing
gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
instance ToTags Py.AliasedImport
instance ToTags Py.ArgumentList

View File

@ -5,26 +5,32 @@
{-# LANGUAGE TypeOperators #-}
module Main (main) where
import Analysis.Name (Name)
import qualified Analysis.Name as Name
import qualified AST.Unmarshal as TS
import Control.Algebra
import Control.Carrier.Lift
import Control.Carrier.Sketch.Fresh
import Control.Carrier.Sketch.ScopeGraph
import qualified Control.Effect.ScopeGraph.Properties.Declaration as Props
import qualified Control.Effect.ScopeGraph.Properties.Function as Props
import qualified Control.Effect.ScopeGraph.Properties.Reference as Props
import Control.Monad
import qualified Data.ByteString as ByteString
import Data.Name (Name)
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.ScopeGraph as ScopeGraph
import qualified Language.Python ()
import qualified Language.Python as Py (Term)
import ScopeGraph.Convert
import qualified Language.Python.Grammar as TSP
import Scope.Graph.Convert
import Source.Loc
import qualified Source.Source as Source
import Source.Span
import System.Exit (die)
import System.Path ((</>))
import qualified System.Path as Path
import qualified System.Path.Directory as Path
import qualified Test.Tasty as Tasty
import qualified Test.Tasty.HUnit as HUnit
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Unmarshal as TS
{-
@ -50,10 +56,10 @@ The graph should be
runScopeGraph :: ToScopeGraph t => Path.AbsRelFile -> Source.Source -> t Loc -> (ScopeGraph.ScopeGraph Name, Result)
runScopeGraph p _src item = run . runSketch (Just p) $ scopeGraph item
sampleGraphThing :: (Has (Sketch Name) sig m) => m Result
sampleGraphThing :: ScopeGraphEff sig m => m Result
sampleGraphThing = do
declare @Name "hello" DeclProperties
declare @Name "goodbye" DeclProperties
declare "hello" (Props.Declaration ScopeGraph.Assignment ScopeGraph.Default Nothing (Span (Pos 2 0) (Pos 2 10)))
declare "goodbye" (Props.Declaration ScopeGraph.Assignment ScopeGraph.Default Nothing (Span (Pos 3 0) (Pos 3 12)))
pure Complete
graphFile :: FilePath -> IO (ScopeGraph.ScopeGraph Name, Result)
@ -71,12 +77,6 @@ assertSimpleAssignment = do
(expecto, Complete) <- runM $ runSketch Nothing sampleGraphThing
HUnit.assertEqual "Should work for simple case" expecto result
expectedReference :: (Has (Sketch Name) sig m) => m Result
expectedReference = do
declare @Name "x" DeclProperties
reference @Name "x" "x" RefProperties
pure Complete
assertSimpleReference :: HUnit.Assertion
assertSimpleReference = do
let path = "semantic-python/test/fixtures/5-01-simple-reference.py"
@ -85,6 +85,83 @@ assertSimpleReference = do
HUnit.assertEqual "Should work for simple case" expecto result
expectedReference :: ScopeGraphEff sig m => m Result
expectedReference = do
declare "x" (Props.Declaration ScopeGraph.Assignment ScopeGraph.Default Nothing (Span (Pos 0 0) (Pos 0 5)))
let refProperties = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (Span (Pos 1 0) (Pos 1 1))
newReference "x" refProperties
pure Complete
expectedQualifiedImport :: ScopeGraphEff sig m => m Result
expectedQualifiedImport = do
newEdge ScopeGraph.Import (NonEmpty.fromList ["cheese", "ints"])
let refProperties = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (Span (Pos 0 7) (Pos 0 13))
newReference (Name.name "cheese") refProperties
withScope "cheese" $ do
let refProperties = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (Span (Pos 0 14) (Pos 0 18))
newReference (Name.name "ints") refProperties
pure Complete
expectedImportHole :: ScopeGraphEff sig m => m Result
expectedImportHole = do
newEdge ScopeGraph.Import (NonEmpty.fromList ["cheese", "ints"])
pure Complete
assertLexicalScope :: HUnit.Assertion
assertLexicalScope = do
let path = "semantic-python/test/fixtures/5-02-simple-function.py"
(graph, _) <- graphFile path
case run (runSketch Nothing expectedLexicalScope) of
(expecto, Complete) -> HUnit.assertEqual "Should work for simple case" expecto graph
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
expectedLexicalScope :: ScopeGraphEff sig m => m Result
expectedLexicalScope = do
_ <- declareFunction (Just $ Name.name "foo") (Props.Function ScopeGraph.Function (Span (Pos 0 0) (Pos 1 24)))
let refProperties = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (Span (Pos 3 0) (Pos 3 3))
newReference "foo" refProperties
pure Complete
assertFunctionArg :: HUnit.Assertion
assertFunctionArg = do
let path = "semantic-python/test/fixtures/5-03-function-argument.py"
(graph, _) <- graphFile path
case run (runSketch Nothing expectedFunctionArg) of
(expecto, Complete) -> HUnit.assertEqual "Should work for simple case" expecto graph
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
expectedFunctionArg :: ScopeGraphEff sig m => m Result
expectedFunctionArg = do
(_, associatedScope) <- declareFunction (Just $ Name.name "foo") (Props.Function ScopeGraph.Function (Span (Pos 0 0) (Pos 1 12)))
withScope associatedScope $ do
declare "x" (Props.Declaration ScopeGraph.Parameter ScopeGraph.Default Nothing (Span (Pos 0 8) (Pos 0 9)))
let refProperties = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (Span (Pos 1 11) (Pos 1 12))
newReference "x" refProperties
pure ()
let refProperties = Props.Reference ScopeGraph.Identifier ScopeGraph.Default (Span (Pos 3 0) (Pos 3 3))
newReference "foo" refProperties
pure Complete
assertImportHole :: HUnit.Assertion
assertImportHole = do
let path = "semantic-python/test/fixtures/cheese/6-01-imports.py"
(graph, _) <- graphFile path
case run (runSketch Nothing expectedImportHole) of
(expecto, Complete) -> HUnit.assertEqual "Should work for simple case" expecto graph
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
assertQualifiedImport :: HUnit.Assertion
assertQualifiedImport = do
let path = "semantic-python/test/fixtures/cheese/6-01-qualified-imports.py"
(graph, _) <- graphFile path
case run (runSketch Nothing expectedQualifiedImport) of
(expecto, Complete) -> HUnit.assertEqual "Should work for simple case" expecto graph
(_, Todo msg) -> HUnit.assertFailure ("Failed to complete:" <> show msg)
main :: IO ()
main = do
-- make sure we're in the root directory so the paths resolve properly
@ -99,5 +176,13 @@ main = do
],
Tasty.testGroup "reference" [
HUnit.testCase "simple reference" assertSimpleReference
],
Tasty.testGroup "lexical scopes" [
HUnit.testCase "simple function scope" assertLexicalScope
, HUnit.testCase "simple function argument" assertFunctionArg
],
Tasty.testGroup "imports" [
HUnit.testCase "simple function argument" assertImportHole
, HUnit.testCase "qualified imports" assertQualifiedImport
]
]

View File

@ -1,4 +1,10 @@
{-# LANGUAGE DerivingStrategies, GeneralizedNewtypeDeriving, StandaloneDeriving, FlexibleInstances, NamedFieldPuns, OverloadedStrings, QuantifiedConstraints, TypeOperators, UndecidableInstances #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuantifiedConstraints #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Instances () where
@ -8,14 +14,10 @@ module Instances () where
-- we should keep track of them in a dedicated file.
import Analysis.File
import Core.Name (Name (..))
import Data.Aeson
import Data.Text (pack)
import qualified System.Path as Path
deriving newtype instance ToJSON Name
deriving newtype instance ToJSONKey Name
instance ToJSON a => ToJSON (File a) where
toJSON File{filePath, fileSpan, fileBody} = object
[ "path" .= filePath

View File

@ -1,4 +1,7 @@
{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, TypeApplications, TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
module Main (main) where
@ -36,8 +39,8 @@ import System.Path ((</>))
import qualified System.Path as Path
import qualified System.Path.Directory as Path
import qualified Text.Trifecta as Trifecta
import qualified TreeSitter.Python as TSP
import qualified TreeSitter.Unmarshal as TS
import qualified Language.Python.Grammar as TSP
import qualified AST.Unmarshal as TS
import qualified Test.Tasty as Tasty
import qualified Test.Tasty.HUnit as HUnit
@ -69,7 +72,7 @@ assertEvaluatesTo core k val = do
(_, files) ->
HUnit.assertFailure ("Unexpected number of files: " <> show (length files))
let found = Map.lookup (Name k) env >>= flip IntMap.lookup heap
let found = Map.lookup (name k) env >>= flip IntMap.lookup heap
found HUnit.@?= Just val
{-# HLINT ignore assertEvaluatesTo #-}

View File

@ -0,0 +1,4 @@
def foo():
return "hello world"
foo()

View File

@ -0,0 +1,4 @@
def foo(x):
return x
foo(1)

View File

@ -0,0 +1 @@
from cheese.ints import *

View File

@ -0,0 +1 @@
import cheese.ints

View File

@ -0,0 +1,5 @@
def one():
return 1
def two():
return 2

@ -0,0 +1 @@
Subproject commit 899ac8d5d6c883b2f05362c9953e14e78aac474c

View File

@ -24,9 +24,12 @@ common haskell
, fused-effects ^>= 1.0
, fused-syntax
, parsers ^>= 0.12.10
, semantic-ast
, semantic-codegen
, semantic-core ^>= 0.0
, semantic-source ^>= 0.0.1
, semantic-source ^>= 0.0.2
, semantic-tags ^>= 0.0
, template-haskell ^>= 2.15
, text ^>= 1.2.3
, tree-sitter ^>= 0.8
, tree-sitter-ruby ^>= 0.4.1
@ -50,5 +53,7 @@ library
import: haskell
exposed-modules:
Language.Ruby
Language.Ruby.AST
Language.Ruby.Grammar
Language.Ruby.Tags
hs-source-dirs: src

View File

@ -3,17 +3,17 @@
-- | Semantic functionality for Ruby programs.
module Language.Ruby
( Term(..)
, TreeSitter.Ruby.tree_sitter_ruby
, Language.Ruby.Grammar.tree_sitter_ruby
) where
import qualified AST.Unmarshal as TS
import Control.Carrier.State.Strict
import Data.Proxy
import Data.Text (Text)
import qualified Language.Ruby.AST as Rb
import qualified Language.Ruby.Grammar (tree_sitter_ruby)
import qualified Language.Ruby.Tags as RbTags
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Ruby (tree_sitter_ruby)
import qualified TreeSitter.Ruby.AST as Rb
import qualified TreeSitter.Unmarshal as TS
newtype Term a = Term { getTerm :: Rb.Program a }

View File

@ -0,0 +1,21 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Language.Ruby.AST
( module Language.Ruby.AST
) where
import Prelude hiding (False, Float, Integer, Rational, String, True)
import AST.GenerateSyntax
import qualified Language.Ruby.Grammar as Grammar
astDeclarationsForLanguage Grammar.tree_sitter_ruby "../../../vendor/tree-sitter-ruby/src/node-types.json"

View File

@ -0,0 +1,16 @@
{-# LANGUAGE TemplateHaskell #-}
module Language.Ruby.Grammar
( tree_sitter_ruby
, Grammar(..)
) where
import Language.Haskell.TH
import TreeSitter.Ruby (tree_sitter_ruby)
import AST.Grammar.TH
import TreeSitter.Language (addDependentFileRelative)
-- Regenerate template haskell code when these files change:
addDependentFileRelative "../../../vendor/tree-sitter-ruby/src/parser.c"
-- | Statically-known rules corresponding to symbols in the grammar.
mkStaticallyKnownRuleGrammarData (mkName "Grammar") tree_sitter_ruby

View File

@ -12,21 +12,21 @@ module Language.Ruby.Tags
) where
import AST.Element
import AST.Token
import AST.Traversable1
import qualified AST.Unmarshal as TS
import Control.Effect.Reader
import Control.Effect.State
import Control.Effect.Writer
import Control.Monad
import Data.Foldable
import Data.Text as Text
import GHC.Generics
import qualified Language.Ruby.AST as Rb
import Source.Loc
import Source.Range as Range
import Source.Source as Source
import Tags.Tag
import qualified Tags.Tagging.Precise as Tags
import qualified TreeSitter.Ruby.AST as Rb
import TreeSitter.Token
import qualified TreeSitter.Unmarshal as TS
class ToTags t where
tags
@ -40,8 +40,7 @@ class ToTags t where
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
@ -89,7 +88,7 @@ instance ToTags Rb.Class where
where
range' = case extraChildren of
Prj Rb.Superclass { ann = Loc { byteRange = Range { end }}} : _ -> Range start end
_ -> Range start (getEnd expr)
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
yield name = yieldTag name Class loc range' >> gtags t
@ -106,7 +105,7 @@ instance ToTags Rb.SingletonClass where
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> range
_ -> range
getStart = Range.start . byteRange . TS.gann
yield name = yieldTag name Class loc range' >> gtags t
@ -123,7 +122,7 @@ instance ToTags Rb.Module where
where
range' = case extraChildren of
x : _ -> Range start (getStart x)
_ -> Range start (getEnd expr)
_ -> Range start (getEnd expr)
getEnd = Range.end . byteRange . TS.gann
getStart = Range.start . byteRange . TS.gann
yield name = yieldTag name Module loc range' >> gtags t
@ -132,8 +131,7 @@ yieldMethodNameTag
:: ( Has (State [Text]) sig m
, Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
) => t Loc -> Loc -> Range -> Rb.MethodName Loc -> m ()
yieldMethodNameTag t loc range (Rb.MethodName expr) = enterScope True $ case expr of
Prj Rb.Identifier { text = name } -> yield name
@ -165,7 +163,7 @@ instance ToTags Rb.Method where
where
range' = case parameters of
Just Rb.MethodParameters { ann = Loc { byteRange = Range { end } }} -> Range start end
_ -> Range start (getEnd name)
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
instance ToTags Rb.SingletonMethod where
@ -177,7 +175,7 @@ instance ToTags Rb.SingletonMethod where
where
range' = case parameters of
Just Rb.MethodParameters { ann = Loc { byteRange = Range { end } }} -> Range start end
_ -> Range start (getEnd name)
_ -> Range start (getEnd name)
getEnd = Range.end . byteRange . TS.gann
instance ToTags Rb.Block where
@ -336,12 +334,11 @@ gtags
:: ( Has (Reader Source) sig m
, Has (Writer Tags.Tags) sig m
, Has (State [Text]) sig m
, Generic1 t
, Tags.GTraversable1 ToTags (Rep1 t)
, Traversable1 ToTags t
)
=> t Loc
-> m ()
gtags = Tags.traverse1_ @ToTags (const (pure ())) tags . Tags.Generics
gtags = traverse1_ @ToTags (const (pure ())) tags
-- instance ToTags Rb.Alias
instance ToTags Rb.Arg

@ -0,0 +1 @@
Subproject commit eb2b6225bfb80010f2e4cbd27db8c6f3775230b5

View File

@ -20,12 +20,20 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Control.Carrier.Sketch.Fresh
Control.Effect.Sketch
ScopeGraph.Convert
Control.Carrier.Sketch.ScopeGraph
Control.Effect.ScopeGraph
Control.Effect.ScopeGraph.Properties.Declaration
Control.Effect.ScopeGraph.Properties.Function
Control.Effect.ScopeGraph.Properties.Reference
Scope.Graph.AdjacencyList
Scope.Graph.Convert
Scope.Info
Scope.Path
Scope.Reference
Scope.Scope
Scope.Types
Data.Hole
Data.Module
Data.Name
Data.ScopeGraph
build-depends:
base >= 4.13 && < 5
@ -33,12 +41,14 @@ library
, algebraic-graphs >= 0.3 && < 0.5
, containers
, fused-effects ^>= 1.0
, generic-monoid
, generic-lens
, hashable
, lens
, semilattices
, generic-monoid
, pathtype
, semantic-source ^>= 0.0
, semantic-analysis
, semantic-source ^>= 0.0.2
, semilattices
, text ^>= 1.2.3.1
hs-source-dirs: src
default-language: Haskell2010

View File

@ -1,97 +0,0 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
-- | This carrier interprets the Sketch effect, keeping track of
-- the current scope and in-progress graph internally.
module Control.Carrier.Sketch.Fresh
( SketchC (..)
, runSketch
, module Control.Effect.Sketch
) where
import Control.Algebra
import Control.Carrier.Fresh.Strict
import Control.Carrier.State.Strict
import Control.Effect.Sketch
import Control.Monad.IO.Class
import Data.Bifunctor
import Data.Module
import Data.Name (Name)
import qualified Data.Name
import Data.ScopeGraph (ScopeGraph)
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import Source.Span
import qualified System.Path as Path
-- | The state type used to keep track of the in-progress graph and
-- positional/contextual information. The name "sketchbook" is meant
-- to invoke an in-progress, concealed work, as well as the
-- "sketching" of a graph.
data Sketchbook address = Sketchbook
{ sGraph :: ScopeGraph address
, sCurrentScope :: address
} deriving (Eq, Show)
instance Lower (Sketchbook Name) where
lowerBound =
let
initialGraph = ScopeGraph.insertScope n initialScope lowerBound
initialScope = ScopeGraph.Scope mempty mempty mempty
n = Data.Name.nameI 0
in
Sketchbook initialGraph n
newtype SketchC address m a = SketchC (StateC (Sketchbook address) (FreshC m) a)
deriving (Applicative, Functor, Monad, MonadIO)
instance (Effect sig, Algebra sig m) => Algebra (Sketch Name :+: sig) (SketchC Name m) where
alg (L (Declare n _props k)) = do
Sketchbook old current <- SketchC (get @(Sketchbook Name))
let (new, _pos) =
ScopeGraph.declare
(ScopeGraph.Declaration (Data.Name.name n))
(lowerBound @ModuleInfo)
ScopeGraph.Default
ScopeGraph.Public
(lowerBound @Span)
ScopeGraph.Identifier
Nothing
current
old
SketchC (put @(Sketchbook Name) (Sketchbook new current))
k ()
alg (L (Reference n decl _props k)) = do
Sketchbook old current <- SketchC (get @(Sketchbook Name))
let new =
ScopeGraph.reference
(ScopeGraph.Reference (Data.Name.name n))
(lowerBound @ModuleInfo)
(lowerBound @Span)
ScopeGraph.Identifier
(ScopeGraph.Declaration (Data.Name.name decl))
current
old
SketchC (put @(Sketchbook Name) (Sketchbook new current))
k ()
alg (R other) = SketchC (alg (R (R (handleCoercible other))))
runSketch ::
(Functor m)
=> Maybe Path.AbsRelFile
-> SketchC Name m a
-> m (ScopeGraph Name, a)
runSketch _rootpath (SketchC go)
= evalFresh 0
. fmap (first sGraph)
. runState lowerBound
$ go

View File

@ -0,0 +1,53 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -fprint-expanded-synonyms #-}
-- | This carrier interprets the Sketch effect, keeping track of
-- the current scope and in-progress graph internally.
module Control.Carrier.Sketch.ScopeGraph
( SketchC
, runSketch
, module Control.Effect.ScopeGraph
) where
import Analysis.Name (Name)
import qualified Analysis.Name as Name
import Control.Carrier.Fresh.Strict
import Control.Carrier.Reader
import Control.Carrier.State.Strict
import Control.Effect.ScopeGraph
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import qualified System.Path as Path
type SketchC addr m
= StateC (ScopeGraph Name)
( StateC Name
( ReaderC Name
( FreshC m
)))
runSketch ::
(Functor m)
=> Maybe Path.AbsRelFile
-> SketchC Name m a
-> m (ScopeGraph Name, a)
runSketch _rootpath go
= evalFresh 0
. runReader @Name rootname
. evalState @Name rootname
. runState @(ScopeGraph Name) initialGraph
$ go
where
rootname = Name.nameI 0
initialGraph = ScopeGraph.insertScope rootname lowerBound lowerBound

View File

@ -0,0 +1,182 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
-- | The ScopeGraph effect is used to build up a scope graph over
-- the lifetime of a monadic computation. The name is meant to evoke
-- physically sketching the hierarchical outline of a graph.
module Control.Effect.ScopeGraph
( ScopeGraph
, ScopeGraphEff
, declare
-- Scope Manipulation
, currentScope
, newEdge
, newReference
, newScope
, withScope
, declareFunction
, declareMaybeName
, reference
, Has
) where
import Analysis.Name (Name)
import qualified Analysis.Name as Name
import Control.Algebra
import Control.Effect.Fresh
import Control.Effect.Reader
import Control.Lens
import Data.List.NonEmpty
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import qualified Data.Module as Module
import qualified Data.ScopeGraph as ScopeGraph
import Data.Semilattice.Lower
import Data.Text (Text)
import GHC.Records
import qualified Scope.Reference as Reference
import Source.Span
import Scope.Graph.AdjacencyList (ScopeGraph)
import qualified Scope.Graph.AdjacencyList as AdjacencyList
import qualified Control.Effect.ScopeGraph.Properties.Declaration as Props
import qualified Control.Effect.ScopeGraph.Properties.Function as Props
import qualified Control.Effect.ScopeGraph.Properties.Reference as Props
import qualified Control.Effect.ScopeGraph.Properties.Reference as Props.Reference
import Control.Effect.State
-- | Extract the 'Just' of a 'Maybe' in an 'Applicative' context or, given 'Nothing', run the provided action.
maybeM :: Applicative f => f a -> Maybe a -> f a
maybeM f = maybe f pure
{-# INLINE maybeM #-}
type ScopeGraphEff sig m
= ( Has (State (ScopeGraph Name)) sig m
, Has (State Name) sig m
, Has (Reader Name) sig m
, Has Fresh sig m
)
graphInProgress :: ScopeGraphEff sig m => m (ScopeGraph Name)
graphInProgress = get
currentScope :: ScopeGraphEff sig m => m Name
currentScope = ask
withScope :: ScopeGraphEff sig m
=> Name
-> m a
-> m a
withScope scope = local (const scope)
declare :: ScopeGraphEff sig m => Name -> Props.Declaration -> m ()
declare n props = do
current <- currentScope
old <- graphInProgress
let Props.Declaration kind relation associatedScope span = props
let (new, _pos) =
ScopeGraph.declare
(ScopeGraph.Declaration n)
(lowerBound @Module.ModuleInfo)
relation
ScopeGraph.Public
span
kind
associatedScope
current
old
put new
-- | Establish a reference to a prior declaration.
reference :: forall sig m . ScopeGraphEff sig m => Text -> Text -> Props.Reference -> m ()
reference n decl props = do
current <- currentScope
old <- graphInProgress
let new =
ScopeGraph.reference
(ScopeGraph.Reference (Name.name n))
(lowerBound @Module.ModuleInfo)
(Props.Reference.span props)
(Props.Reference.kind props)
(ScopeGraph.Declaration (Name.name decl))
current
old
put new
newScope :: forall sig m . ScopeGraphEff sig m => Map ScopeGraph.EdgeLabel [Name] -> m Name
newScope edges = do
old <- graphInProgress
name <- Name.gensym
let new = ScopeGraph.newScope name edges old
name <$ put new
-- | Takes an edge label and a list of names and inserts an import edge to a hole.
newEdge :: ScopeGraphEff sig m => ScopeGraph.EdgeLabel -> NonEmpty Name -> m ()
newEdge label address = do
current <- currentScope
old <- graphInProgress
let new = ScopeGraph.addImportEdge label (toList address) current old
put new
lookupScope :: ScopeGraphEff sig m => Name -> m (ScopeGraph.Scope Name)
lookupScope address = maybeM undefined . ScopeGraph.lookupScope address =<< get
-- | Inserts a reference.
newReference :: ScopeGraphEff sig m => Name -> Props.Reference -> m ()
newReference name props = do
currentAddress <- currentScope
scope <- lookupScope currentAddress
let refProps = Reference.ReferenceInfo (props^.span_) (Props.Reference.kind props) lowerBound
insertRef' :: ScopeGraph.Path Name -> ScopeGraph.ScopeGraph Name -> ScopeGraph.ScopeGraph Name
insertRef' path scopeGraph = let
scope' = (ScopeGraph.insertReference (Reference.Reference name) lowerBound (Props.Reference.span props) (getField @"kind" props) path) scope
in
(ScopeGraph.insertScope currentAddress scope' scopeGraph)
scopeGraph <- get @(ScopeGraph.ScopeGraph Name)
case AdjacencyList.findPath (const Nothing) (ScopeGraph.Declaration name) currentAddress scopeGraph of
-- If a path to a declaration is found, insert a reference into the current scope.
Just path -> modify (insertRef' path)
-- If no path is found, insert a reference with a hole into the current scope.
Nothing ->
modify (ScopeGraph.insertScope
currentAddress
(ScopeGraph.newReference
(Reference.Reference name)
refProps
scope))
declareFunction :: forall sig m . ScopeGraphEff sig m => Maybe Name -> Props.Function -> m (Name, Name)
declareFunction name (Props.Function kind span) = do
currentScope' <- currentScope
let lexicalEdges = Map.singleton ScopeGraph.Lexical [ currentScope' ]
associatedScope <- newScope lexicalEdges
name' <- declareMaybeName name Props.Declaration
{ Props.relation = ScopeGraph.Default
, Props.kind = kind
, Props.associatedScope = Just associatedScope
, Props.span = span
}
pure (name', associatedScope)
declareMaybeName :: ScopeGraphEff sig m
=> Maybe Name
-> Props.Declaration
-> m Name
declareMaybeName maybeName props = do
case maybeName of
Just name -> name <$ declare name props
_ -> do
name <- Name.gensym
name <$ declare name (props { Props.relation = ScopeGraph.Gensym })

View File

@ -0,0 +1,25 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeApplications #-}
-- | The 'Declaration' record type is used by the 'Control.Effect.Sketch' module to keep
-- track of the parameters that need to be passed when establishing a new declaration.
-- That is to say, it is a record type primarily used for its selector names.
module Control.Effect.ScopeGraph.Properties.Declaration
( Declaration (..)
) where
import Analysis.Name (Name)
import Data.Generics.Product (field)
import Data.ScopeGraph as ScopeGraph (Kind, Relation)
import GHC.Generics (Generic)
import Source.Span
data Declaration = Declaration
{ kind :: ScopeGraph.Kind
, relation :: ScopeGraph.Relation
, associatedScope :: Maybe Name
, span :: Span
} deriving Generic
instance HasSpan Declaration where span_ = field @"span"

View File

@ -0,0 +1,22 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeApplications #-}
-- | The 'Function' record type is used by the 'Control.Effect.Sketch' module to keep
-- track of the parameters that need to be passed when establishing a new declaration.
-- That is to say, it is a record type primarily used for its selector names.
module Control.Effect.ScopeGraph.Properties.Function
( Function (..)
) where
import Data.Generics.Product (field)
import qualified Data.ScopeGraph as ScopeGraph (Kind)
import GHC.Generics (Generic)
import Source.Span
data Function = Function
{ kind :: ScopeGraph.Kind
, span :: Span
} deriving Generic
instance HasSpan Function where span_ = field @"span"

View File

@ -0,0 +1,27 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeApplications #-}
-- | The 'Declaration' record type is used by the 'Control.Effect.Sketch' module to keep
-- track of the parameters that need to be passed when establishing a new reference.
-- It is currently unused, but will possess more fields in the future as scope graph
-- functionality is enhanced.
module Control.Effect.ScopeGraph.Properties.Reference
( Reference (..)
) where
import Control.Lens
import Data.ScopeGraph as ScopeGraph (Kind, Relation)
import GHC.Generics (Generic)
import Prelude hiding (span)
import Source.Span
data Reference = Reference
{ kind :: ScopeGraph.Kind
, relation :: ScopeGraph.Relation
, span :: Span
} deriving (Generic, Show)
instance HasSpan Reference where
span_ = lens span (\r s -> r { span = s })
{-# INLINE span_ #-}

View File

@ -1,39 +0,0 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
-- | The Sketch effect is used to build up a scope graph over
-- the lifetime of a monadic computation. The name is meant to evoke
-- physically sketching the hierarchical outline of a graph.
module Control.Effect.Sketch
( Sketch (..)
, DeclProperties (..)
, RefProperties (..)
, declare
, reference
, Has
) where
import Control.Algebra
import Data.Text (Text)
import GHC.Generics
data DeclProperties = DeclProperties
data RefProperties = RefProperties
data Sketch address m k =
Declare Text DeclProperties (() -> m k)
| Reference Text Text RefProperties (() -> m k)
deriving (Generic, Generic1, HFunctor, Effect)
-- | Introduces a declaration into the scope.
declare :: forall a sig m . (Has (Sketch a) sig m) => Text -> DeclProperties -> m ()
declare n props = send @(Sketch a) (Declare n props pure)
-- | Establish a reference to a prior declaration.
reference :: forall a sig m . (Has (Sketch a) sig m) => Text -> Text -> RefProperties -> m ()
reference n decl props = send @(Sketch a) (Reference n decl props pure)

View File

@ -1,63 +0,0 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.Name
( Name
-- * Constructors
, gensym
, name
, nameI
, formatName
, __self
) where
import Control.Effect.Fresh
import Data.Aeson
import qualified Data.Char as Char
import Data.Hashable
import Data.Text (Text)
import qualified Data.Text as Text
-- | The type of variable names.
data Name
= Name Text
| I Int
deriving (Eq, Ord)
-- | Generate a fresh (unused) name for use in synthesized variables/closures/etc.
gensym :: Has Fresh sig m => m Name
gensym = I <$> fresh
-- | Construct a 'Name' from a 'Text'.
name :: Text -> Name
name = Name
-- | Construct a 'Name' from an 'Int'. This is suitable for automatic generation, e.g. using a Fresh effect, but should not be used for human-generated names.
nameI :: Int -> Name
nameI = I
-- | Extract a human-readable 'Text' from a 'Name'.
-- Sample outputs can be found in @Data.Abstract.Name.Spec@.
formatName :: Name -> Text
formatName (Name name) = name
formatName (I i) = Text.pack $ '_' : (alphabet !! a) : replicate n 'ʹ'
where alphabet = ['a'..'z']
(n, a) = i `divMod` length alphabet
instance Show Name where
showsPrec _ = prettyShowString . Text.unpack . formatName
where prettyShowString str = showChar '"' . foldr ((.) . prettyChar) id str . showChar '"'
prettyChar c
| c `elem` ['\\', '\"'] = Char.showLitChar c
| Char.isPrint c = showChar c
| otherwise = Char.showLitChar c
instance Hashable Name where
hashWithSalt salt (Name name) = hashWithSalt salt name
hashWithSalt salt (I i) = salt `hashWithSalt` (1 :: Int) `hashWithSalt` i
instance ToJSON Name where
toJSON = toJSON . formatName
toEncoding = toEncoding . formatName
__self :: Name
__self = name "__semantic_self"

View File

@ -1,433 +1,13 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
module Data.ScopeGraph
( Slot(..)
, Info(..)
, associatedScope
, lookupDeclaration
, declarationByName
, declarationsByAccessControl
, declarationsByRelation
, Declaration(..) -- TODO don't export these constructors
, declare
, formatDeclaration
, EdgeLabel(..)
, insertDeclarationScope
, insertDeclarationSpan
, insertImportReference
, newScope
, newPreludeScope
, insertScope
, insertEdge
, Path(..)
, pathDeclaration
, pathOfRef
, pathPosition
, Position(..)
, reference
, Reference(..) -- TODO don't export these constructors
, ReferenceInfo(..)
, Relation(..)
, ScopeGraph(..)
, Kind(..)
, lookupScope
, lookupScopePath
, Scope(..)
, scopeOfRef
, pathDeclarationScope
, putDeclarationScopeAtPosition
, declarationNames
, AccessControl(..)
( module Scope.Info
, module Scope.Path
, module Scope.Scope
, module Scope.Types
, module Scope.Graph.AdjacencyList
) where
import Prelude hiding (lookup)
import Control.Applicative
import Control.Lens.Lens
import Control.Monad
import Data.Aeson
import Data.Bifunctor
import Data.Foldable
import Data.Hashable
import Data.Hole
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe
import Data.Module
import Data.Monoid
import Data.Name
import Data.Semilattice.Lower
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text)
import GHC.Generics
import Source.Span
-- A slot is a location in the heap where a value is stored.
data Slot address = Slot { frameAddress :: address, position :: Position }
deriving (Eq, Show, Ord)
data AccessControl = Public
| Protected
| Private
deriving (Bounded, Enum, Eq, Generic, Hashable, ToJSON, Show)
-- | The Ord AccessControl instance represents an order specification of AccessControls.
-- AccessControls that are less than or equal to another AccessControl implies access.
-- It is helpful to consider `Public <= Private` as saying "Can a Public syntax term access a Private syntax term?"
-- In this way, Public AccessControl is the top of the order specification, and Private AccessControl is the bottom.
instance Ord AccessControl where
-- | Private AccessControl represents the least overlap or accessibility with other AccessControls.
-- When asking if the AccessControl "on the left" is less than the AccessControl "on the right", Private AccessControl on the left always implies access to the thing on the right.
(<=) Private _ = True
(<=) _ Private = False
-- | Protected AccessControl is in between Private and Public in the order specification.
-- Protected AccessControl "on the left" has access to Protected or Public AccessControls "on the right".
(<=) Protected Public = True
(<=) Protected Protected = True
-- | Public AccessControl "on the left" has access only to Public AccessControl "on the right".
(<=) Public Public = True
(<=) Public _ = False
data Relation = Default | Instance | Prelude | Gensym
deriving (Bounded, Enum, Eq, Show, Ord)
instance Lower Relation where
lowerBound = Default
data Info scopeAddress = Info
{ infoDeclaration :: Declaration
, infoModule :: ModuleInfo
, infoRelation :: Relation
, infoAccessControl :: AccessControl
, infoSpan :: Span
, infoKind :: Kind
, infoAssociatedScope :: Maybe scopeAddress
} deriving (Eq, Show, Ord)
instance HasSpan (Info scopeAddress) where
span_ = lens infoSpan (\i s -> i { infoSpan = s })
{-# INLINE span_ #-}
instance Lower (Info scopeAddress) where
lowerBound = Info lowerBound lowerBound lowerBound Public lowerBound lowerBound Nothing
data ReferenceInfo = ReferenceInfo
{ refSpan :: Span
, refKind :: Kind
, refModule :: ModuleInfo
} deriving (Eq, Show, Ord)
instance HasSpan ReferenceInfo where
span_ = lens refSpan (\r s -> r { refSpan = s })
{-# INLINE span_ #-}
data Kind = AbstractClass
| Assignment
| Call
| Class
| DefaultExport
| Function
| Identifier
| Let
| MemberAccess
| Method
| Module
| New
| Parameter
| PublicField
| QualifiedAliasedImport
| QualifiedExport
| QualifiedImport
| RequiredParameter
| This
| TypeAlias
| TypeIdentifier
| Unknown
| UnqualifiedImport
| VariableDeclaration
deriving (Bounded, Enum, Eq, Show, Ord)
instance Lower Kind where
lowerBound = Unknown
-- Offsets and frame addresses in the heap should be addresses?
data Scope address =
Scope {
edges :: Map EdgeLabel [address]
, references :: Map Reference ([ReferenceInfo], Path address)
, declarations :: Seq (Info address)
}
| PreludeScope {
edges :: Map EdgeLabel [address]
, references :: Map Reference ([ReferenceInfo], Path address)
, declarations :: Seq (Info address)
}
deriving (Eq, Show, Ord)
instance Lower (Scope scopeAddress) where
lowerBound = Scope mempty mempty mempty
instance AbstractHole (Scope scopeAddress) where
hole = lowerBound
instance AbstractHole address => AbstractHole (Slot address) where
hole = Slot hole (Position 0)
instance AbstractHole (Info address) where
hole = lowerBound
newtype Position = Position { unPosition :: Int }
deriving (Eq, Show, Ord)
newtype ScopeGraph scope = ScopeGraph { unScopeGraph :: Map scope (Scope scope) }
deriving (Eq, Ord, Show)
instance Ord scope => Lower (ScopeGraph scope) where
lowerBound = ScopeGraph mempty
data Path scope
= Hole
-- | Construct a direct path to a declaration.
| DPath Declaration Position
-- | Construct an edge from a scope to another declaration path.
| EPath EdgeLabel scope (Path scope)
deriving (Eq, Functor, Ord, Show)
instance AbstractHole (Path scope) where
hole = Hole
-- Returns the declaration of a path.
pathDeclaration :: Path scope -> Declaration
pathDeclaration (DPath d _) = d
pathDeclaration (EPath _ _ p) = pathDeclaration p
pathDeclaration Hole = undefined
-- TODO: Store the current scope closer _in_ the DPath?
pathDeclarationScope :: scope -> Path scope -> Maybe scope
pathDeclarationScope _ (EPath _ scope (DPath _ _)) = Just scope
pathDeclarationScope currentScope (EPath _ _ p) = pathDeclarationScope currentScope p
pathDeclarationScope currentScope (DPath _ _) = Just currentScope
pathDeclarationScope _ Hole = Nothing
-- TODO: Possibly return in Maybe since we can have Hole paths
pathPosition :: Path scope -> Position
pathPosition Hole = Position 0
pathPosition (DPath _ p) = p
pathPosition (EPath _ _ p) = pathPosition p
-- Returns the reference paths of a scope in a scope graph.
pathsOfScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Map Reference ([ReferenceInfo], Path scope))
pathsOfScope scope = fmap references . Map.lookup scope . unScopeGraph
-- Returns the declaration data of a scope in a scope graph.
ddataOfScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Seq (Info scope))
ddataOfScope scope = fmap declarations . Map.lookup scope . unScopeGraph
-- Returns the edges of a scope in a scope graph.
linksOfScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Map EdgeLabel [scope])
linksOfScope scope = fmap edges . Map.lookup scope . unScopeGraph
declarationsByAccessControl :: Ord scope => scope -> AccessControl -> ScopeGraph scope -> [ Info scope ]
declarationsByAccessControl scope accessControl g = fromMaybe mempty $ do
dataSeq <- ddataOfScope scope g
pure . toList $ Seq.filter (\Info{..} -> accessControl <= infoAccessControl) dataSeq
declarationsByRelation :: Ord scope => scope -> Relation -> ScopeGraph scope -> [ Info scope ]
declarationsByRelation scope relation g = fromMaybe mempty $ do
dataSeq <- ddataOfScope scope g
pure . toList $ Seq.filter (\Info{..} -> infoRelation == relation) dataSeq
declarationByName :: Ord scope => scope -> Declaration -> ScopeGraph scope -> Maybe (Info scope)
declarationByName scope name g = do
dataSeq <- ddataOfScope scope g
find (\Info{..} -> infoDeclaration == name) dataSeq
-- Lookup a scope in the scope graph.
lookupScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Scope scope)
lookupScope scope = Map.lookup scope . unScopeGraph
-- Declare a declaration with a span and an associated scope in the scope graph.
-- TODO: Return the whole value in Maybe or Either.
declare :: Ord scope
=> Declaration
-> ModuleInfo
-> Relation
-> AccessControl
-> Span
-> Kind
-> Maybe scope
-> scope
-> ScopeGraph scope
-> (ScopeGraph scope, Maybe Position)
declare decl moduleInfo rel accessControl declSpan kind assocScope currentScope g = fromMaybe (g, Nothing) $ do
scope <- lookupScope currentScope g
dataSeq <- ddataOfScope currentScope g
case Seq.findIndexR (\Info{..} -> decl == infoDeclaration && declSpan == infoSpan && rel == infoRelation) dataSeq of
Just index -> pure (g, Just (Position index))
Nothing -> do
let newScope = scope { declarations = declarations scope Seq.|> Info decl moduleInfo rel accessControl declSpan kind assocScope }
pure (insertScope currentScope newScope g, Just (Position (length (declarations newScope))))
-- | Add a reference to a declaration in the scope graph.
-- Returns the original scope graph if the declaration could not be found.
reference :: Ord scope => Reference -> ModuleInfo -> Span -> Kind -> Declaration -> scope -> ScopeGraph scope -> ScopeGraph scope
reference ref moduleInfo span kind decl currentAddress g = fromMaybe g $ do
-- Start from the current address
currentScope' <- lookupScope currentAddress g
-- Build a path up to the declaration
flip (insertScope currentAddress) g . flip (insertReference ref moduleInfo span kind) currentScope' <$> findPath (const Nothing) decl currentAddress g
-- | Insert a reference into the given scope by constructing a resolution path to the declaration within the given scope graph.
insertImportReference :: Ord address => Reference -> ModuleInfo -> Span -> Kind -> Declaration -> address -> ScopeGraph address -> Scope address -> Maybe (Scope address)
insertImportReference ref moduleInfo span kind decl currentAddress g scope = flip (insertReference ref moduleInfo span kind) scope . EPath Import currentAddress <$> findPath (const Nothing) decl currentAddress g
lookupScopePath :: Ord scopeAddress => Name -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Path scopeAddress)
lookupScopePath declaration currentAddress g = findPath (flip (lookupReference declaration) g) (Declaration declaration) currentAddress g
findPath :: Ord scopeAddress => (scopeAddress -> Maybe (Path scopeAddress)) -> Declaration -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Path scopeAddress)
findPath extra decl currentAddress g = snd <$> getFirst (foldGraph combine currentAddress g)
where combine address path = fmap (address, )
$ First (pathToDeclaration decl address g)
<> First (extra address)
<> (uncurry (EPath Superclass) <$> path Superclass)
<> (uncurry (EPath Import) <$> path Import)
<> (uncurry (EPath Export) <$> path Export)
<> (uncurry (EPath Lexical) <$> path Lexical)
foldGraph :: (Ord scopeAddress, Monoid a) => (scopeAddress -> (EdgeLabel -> a) -> a) -> scopeAddress -> ScopeGraph scopeAddress -> a
foldGraph combine address graph = go lowerBound address
where go visited address
| address `Set.notMember` visited
, Just edges <- linksOfScope address graph = combine address (recur edges)
| otherwise = mempty
where visited' = Set.insert address visited
recur edges edge = maybe mempty (foldMap (go visited')) (Map.lookup edge edges)
pathToDeclaration :: Ord scopeAddress => Declaration -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Path scopeAddress)
pathToDeclaration decl address g = DPath decl . snd <$> lookupDeclaration (unDeclaration decl) address g
insertReference :: Reference -> ModuleInfo -> Span -> Kind -> Path scopeAddress -> Scope scopeAddress -> Scope scopeAddress
insertReference ref moduleInfo span kind path scope = scope { references = Map.alter (\case
Nothing -> pure ([ ReferenceInfo span kind moduleInfo ], path)
Just (refInfos, path) -> pure (ReferenceInfo span kind moduleInfo : refInfos, path)) ref (references scope) }
lookupDeclaration :: Ord scopeAddress => Name -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Info scopeAddress, Position)
lookupDeclaration name scope g = do
dataSeq <- ddataOfScope scope g
index <- Seq.findIndexR (\Info{..} -> Declaration name == infoDeclaration) dataSeq
(, Position index) <$> Seq.lookup index dataSeq
declarationNames :: Ord address => [EdgeLabel] -> Scope address -> ScopeGraph address -> Set Declaration
declarationNames edgeLabels scope scopeGraph = localDeclarations <> edgeNames
where addresses = join (Map.elems $ Map.restrictKeys (edges scope) (Set.fromList edgeLabels))
edgeNames = flip foldMap addresses $ \address -> maybe mempty (flip (declarationNames edgeLabels) scopeGraph) (lookupScope address scopeGraph)
localDeclarations = Set.fromList . toList . fmap infoDeclaration $ declarations scope
putDeclarationScopeAtPosition :: Ord scopeAddress => scopeAddress -> Position -> Maybe scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
putDeclarationScopeAtPosition scope position assocScope g@(ScopeGraph graph) = fromMaybe g $ do
dataSeq <- ddataOfScope scope g
let seq = Seq.adjust' (\Info{..} -> Info { infoAssociatedScope = assocScope, .. }) (unPosition position) dataSeq
pure $ ScopeGraph (Map.adjust (\s -> s { declarations = seq }) scope graph)
lookupReference :: Ord scopeAddress => Name -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Path scopeAddress)
lookupReference name scope g = fmap snd . Map.lookup (Reference name) =<< pathsOfScope scope g
insertEdge :: Ord scopeAddress => EdgeLabel -> scopeAddress -> scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
insertEdge label target currentAddress g@(ScopeGraph graph) = fromMaybe g $ do
currentScope' <- lookupScope currentAddress g
scopes <- maybe (Just mempty) pure (Map.lookup label (edges currentScope'))
let newScope = currentScope' { edges = Map.insert label (target : scopes) (edges currentScope') }
pure (ScopeGraph (Map.insert currentAddress newScope graph))
-- | Update the 'Scope' containing a 'Declaration' with an associated scope address.
-- Returns an unmodified 'ScopeGraph' if the 'Declaration' cannot be found with the given scope address.
insertDeclarationScope :: Ord scopeAddress => Declaration -> scopeAddress -> scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
insertDeclarationScope Declaration{..} associatedScopeAddress scopeAddress g = fromMaybe g $ do
declScopeAddress <- pathDeclarationScope scopeAddress =<< lookupScopePath unDeclaration scopeAddress g
scope <- lookupScope declScopeAddress g
(declInfo, position) <- second unPosition <$> lookupDeclaration unDeclaration declScopeAddress g
pure $ insertScope declScopeAddress (scope { declarations = Seq.update position (declInfo { infoAssociatedScope = Just associatedScopeAddress }) (declarations scope) }) g
-- | Insert a declaration span into the declaration in the scope graph.
insertDeclarationSpan :: Ord scopeAddress => Declaration -> Span -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
insertDeclarationSpan decl@Declaration{..} span g = fromMaybe g $ do
declScopeAddress <- scopeOfDeclaration decl g
(declInfo, position) <- second unPosition <$> lookupDeclaration unDeclaration declScopeAddress g
scope <- lookupScope declScopeAddress g
pure $ insertScope declScopeAddress (scope { declarations = Seq.update position (declInfo { infoSpan = span }) (declarations scope) }) g
-- | Insert a new scope with the given address and edges into the scope graph.
newScope :: Ord address => address -> Map EdgeLabel [address] -> ScopeGraph address -> ScopeGraph address
newScope address edges = insertScope address (Scope edges mempty mempty)
-- | Insert a new scope with the given address and edges into the scope graph.
newPreludeScope :: Ord address => address -> Map EdgeLabel [address] -> ScopeGraph address -> ScopeGraph address
newPreludeScope address edges = insertScope address (PreludeScope edges mempty mempty)
insertScope :: Ord address => address -> Scope address -> ScopeGraph address -> ScopeGraph address
insertScope address scope = ScopeGraph . Map.insert address scope . unScopeGraph
-- | Returns the scope of a reference in the scope graph.
scopeOfRef :: Ord scope => Reference -> ScopeGraph scope -> Maybe scope
scopeOfRef ref g@(ScopeGraph graph) = go (Map.keys graph)
where
go (s : scopes') = fromMaybe (go scopes') $ do
pathMap <- pathsOfScope s g
_ <- Map.lookup ref pathMap
pure (Just s)
go [] = Nothing
-- | Returns the path of a reference in the scope graph.
pathOfRef :: (Ord scope) => Reference -> ScopeGraph scope -> Maybe (Path scope)
pathOfRef ref graph = do
scope <- scopeOfRef ref graph
pathsMap <- pathsOfScope scope graph
snd <$> Map.lookup ref pathsMap
-- Returns the scope the declaration was declared in.
scopeOfDeclaration :: Ord scope => Declaration -> ScopeGraph scope -> Maybe scope
scopeOfDeclaration Declaration{..} g@(ScopeGraph graph) = go (Map.keys graph)
where
go = foldr (\ scope -> (scope <$ lookupDeclaration unDeclaration scope g <|>)) Nothing
-- | Returns the scope associated with a declaration (the child scope if any exists).
associatedScope :: Ord scope => Declaration -> ScopeGraph scope -> Maybe scope
associatedScope Declaration{..} g@(ScopeGraph graph) = go (Map.keys graph)
where
go = foldr lookupAssociatedScope Nothing
lookupAssociatedScope scope = ((lookupDeclaration unDeclaration scope g >>= infoAssociatedScope . fst) <|>)
newtype Reference = Reference { unReference :: Name }
deriving (Eq, Ord, Show)
instance Lower Reference where
lowerBound = Reference $ name ""
newtype Declaration = Declaration { unDeclaration :: Name }
deriving (Eq, Ord, Show)
instance Lower Declaration where
lowerBound = Declaration $ name ""
formatDeclaration :: Declaration -> Text
formatDeclaration = formatName . unDeclaration
-- | The type of edge from a scope to its parent scopes.
-- Either a lexical edge or an import edge in the case of non-lexical edges.
data EdgeLabel = Lexical | Import | Export | Superclass
deriving (Bounded, Enum, Eq, Ord, Show)
import Scope.Graph.AdjacencyList
import Scope.Info
import Scope.Path
import Scope.Scope
import Scope.Types

View File

@ -0,0 +1,257 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
module Scope.Graph.AdjacencyList
( module Scope.Graph.AdjacencyList
) where
import Analysis.Name
import Control.Applicative
import Control.Monad
import Data.Bifunctor
import Data.Foldable
import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe
import Data.Module
import Data.Monoid
import Data.Semilattice.Lower
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
import Scope.Info
import Scope.Path
import Scope.Reference
import Scope.Scope
import Scope.Types
import Source.Span
newtype CurrentScope address = CurrentScope { unCurrentScope :: address }
newtype ScopeGraph scope = ScopeGraph { unScopeGraph :: Map scope (Scope scope) }
deriving (Eq, Ord, Show)
instance Ord scope => Lower (ScopeGraph scope) where
lowerBound = ScopeGraph mempty
-- Returns the reference paths of a scope in a scope graph.
pathsOfScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Map Reference ([ReferenceInfo], Path scope))
pathsOfScope scope = fmap references . Map.lookup scope . unScopeGraph
-- Returns the declaration data of a scope in a scope graph.
ddataOfScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Seq (Info scope))
ddataOfScope scope = fmap declarations . Map.lookup scope . unScopeGraph
-- Returns the edges of a scope in a scope graph.
linksOfScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Map EdgeLabel [scope])
linksOfScope scope = fmap edges . Map.lookup scope . unScopeGraph
declarationsByAccessControl :: Ord scope => scope -> AccessControl -> ScopeGraph scope -> [ Info scope ]
declarationsByAccessControl scope accessControl g = fromMaybe mempty $ do
dataSeq <- ddataOfScope scope g
pure . toList $ Seq.filter (\Info{..} -> accessControl <= infoAccessControl) dataSeq
declarationsByRelation :: Ord scope => scope -> Relation -> ScopeGraph scope -> [ Info scope ]
declarationsByRelation scope relation g = fromMaybe mempty $ do
dataSeq <- ddataOfScope scope g
pure . toList $ Seq.filter (\Info{..} -> infoRelation == relation) dataSeq
declarationByName :: Ord scope => scope -> Declaration -> ScopeGraph scope -> Maybe (Info scope)
declarationByName scope name g = do
dataSeq <- ddataOfScope scope g
find (\Info{..} -> infoDeclaration == name) dataSeq
-- Lookup a scope in the scope graph.
lookupScope :: Ord scope => scope -> ScopeGraph scope -> Maybe (Scope scope)
lookupScope scope = Map.lookup scope . unScopeGraph
-- Declare a declaration with a span and an associated scope in the scope graph.
-- TODO: Return the whole value in Maybe or Either.
declare :: Ord scope
=> Declaration
-> ModuleInfo
-> Relation
-> AccessControl
-> Span
-> Kind
-> Maybe scope
-> scope
-> ScopeGraph scope
-> (ScopeGraph scope, Maybe Position)
declare decl moduleInfo rel accessControl declSpan kind assocScope currentScope g = fromMaybe (g, Nothing) $ do
scope <- lookupScope currentScope g
dataSeq <- ddataOfScope currentScope g
case Seq.findIndexR (\Info{..} -> decl == infoDeclaration && declSpan == infoSpan && rel == infoRelation) dataSeq of
Just index -> pure (g, Just (Position index))
Nothing -> do
let newScope = scope { declarations = declarations scope Seq.|> Info decl moduleInfo rel accessControl declSpan kind assocScope }
pure (insertScope currentScope newScope g, Just (Position (length (declarations newScope))))
-- | Add a reference to a declaration in the scope graph.
-- Returns the original scope graph if the declaration could not be found.
reference :: Ord scope => Reference -> ModuleInfo -> Span -> Kind -> Declaration -> scope -> ScopeGraph scope -> ScopeGraph scope
reference ref moduleInfo span kind decl currentAddress g = fromMaybe g $ do
-- Start from the current address
currentScope' <- lookupScope currentAddress g
-- Build a path up to the declaration
flip (insertScope currentAddress) g . flip (insertReference ref moduleInfo span kind) currentScope' <$> findPath (const Nothing) decl currentAddress g
-- | Insert a reference into the given scope by constructing a resolution path to the declaration within the given scope graph.
insertImportReference :: Ord address => Reference -> ModuleInfo -> Span -> Kind -> Declaration -> address -> ScopeGraph address -> Scope address -> Maybe (Scope address)
insertImportReference ref moduleInfo span kind decl currentAddress g scope = flip (insertReference ref moduleInfo span kind) scope . EPath Import currentAddress <$> findPath (const Nothing) decl currentAddress g
lookupScopePath :: Ord scopeAddress => Name -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Path scopeAddress)
lookupScopePath declaration currentAddress g = findPath (flip (lookupReference declaration) g) (Declaration declaration) currentAddress g
findPath :: Ord scopeAddress => (scopeAddress -> Maybe (Path scopeAddress)) -> Declaration -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Path scopeAddress)
findPath extra decl currentAddress g = snd <$> getFirst (foldGraph combine currentAddress g)
where combine address path = fmap (address, )
$ First (pathToDeclaration decl address g)
<> First (extra address)
<> (uncurry (EPath Superclass) <$> path Superclass)
<> (uncurry (EPath Import) <$> path Import)
<> (uncurry (EPath Export) <$> path Export)
<> (uncurry (EPath Lexical) <$> path Lexical)
foldGraph :: (Ord scopeAddress, Monoid a) => (scopeAddress -> (EdgeLabel -> a) -> a) -> scopeAddress -> ScopeGraph scopeAddress -> a
foldGraph combine address graph = go lowerBound address
where go visited address
| address `Set.notMember` visited
, Just edges <- linksOfScope address graph = combine address (recur edges)
| otherwise = mempty
where visited' = Set.insert address visited
recur edges edge = maybe mempty (foldMap (go visited')) (Map.lookup edge edges)
pathToDeclaration :: Ord scopeAddress => Declaration -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Path scopeAddress)
pathToDeclaration decl address g = DPath decl . snd <$> lookupDeclaration (unDeclaration decl) address g
insertReference :: Reference -> ModuleInfo -> Span -> Kind -> Path scopeAddress -> Scope scopeAddress -> Scope scopeAddress
insertReference ref moduleInfo span kind path scope = scope { references = Map.alter (\case
Nothing -> pure ([ ReferenceInfo span kind moduleInfo ], path)
Just (refInfos, path) -> pure (ReferenceInfo span kind moduleInfo : refInfos, path)) ref (references scope) }
-- | Adds a reference and a Hole path to the given scope.
newReference :: Reference -> ReferenceInfo -> Scope scopeAddress -> Scope scopeAddress
newReference ref info scope = scope { references = Map.alter (\case
Nothing -> pure ([ info ], Hole)
Just (refInfos, path) -> pure (info : refInfos, path)) ref (references scope) }
lookupDeclaration :: Ord scopeAddress => Name -> scopeAddress -> ScopeGraph scopeAddress -> Maybe (Info scopeAddress, Position)
lookupDeclaration name scope g = do
dataSeq <- ddataOfScope scope g
index <- Seq.findIndexR (\Info{..} -> Declaration name == infoDeclaration) dataSeq
(, Position index) <$> Seq.lookup index dataSeq
declarationNames :: Ord address => [EdgeLabel] -> Scope address -> ScopeGraph address -> Set Declaration
declarationNames edgeLabels scope scopeGraph = localDeclarations <> edgeNames
where addresses = join (Map.elems $ Map.restrictKeys (edges scope) (Set.fromList edgeLabels))
edgeNames = flip foldMap addresses $ \address -> maybe mempty (flip (declarationNames edgeLabels) scopeGraph) (lookupScope address scopeGraph)
localDeclarations = Set.fromList . toList . fmap infoDeclaration $ declarations scope
putDeclarationScopeAtPosition :: Ord scopeAddress => scopeAddress -> Position -> Maybe scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
putDeclarationScopeAtPosition scope position assocScope g@(ScopeGraph graph) = fromMaybe g $ do
dataSeq <- ddataOfScope scope g
let seq = Seq.adjust' (\Info{..} -> Info { infoAssociatedScope = assocScope, .. }) (unPosition position) dataSeq
pure $ ScopeGraph (Map.adjust (\s -> s { declarations = seq }) scope graph)
-- | Lookup a reference by traversing the paths of a given scope and return a Maybe (Path address)
lookupReference :: Ord address => Name -> address -> ScopeGraph address -> Maybe (Path address)
lookupReference name scope g = fmap snd . Map.lookup (Reference name) =<< pathsOfScope scope g
insertEdge :: Ord scopeAddress => EdgeLabel -> scopeAddress -> scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
insertEdge label target currentAddress g@(ScopeGraph graph) = fromMaybe g $ do
currentScope' <- lookupScope currentAddress g
scopes <- maybe (Just mempty) pure (Map.lookup label (edges currentScope'))
let newScope = currentScope' { edges = Map.insert label (target : scopes) (edges currentScope') }
pure (ScopeGraph (Map.insert currentAddress newScope graph))
insertEdges :: Ord scopeAddress => NonEmpty EdgeLabel -> scopeAddress -> scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
insertEdges labels target currentAddress g =
foldr (\label graph -> insertEdge label target currentAddress graph) g labels
-- | Add an import edge of the form 'a -> Import -> b -> Import -> c' or creates intermediate void scopes of the form
-- 'a -> Void -> b -> Import -> c' if the given scopes cannot be found.
addImportEdge :: Ord scopeAddress => EdgeLabel -> [scopeAddress] -> scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
addImportEdge edge importEdge currentAddress g = do
case importEdge of
[] -> g
(name:[]) -> maybe
(addImportHole edge name currentAddress g)
(const (insertEdge edge name currentAddress g))
(lookupScope name g)
(name:names) -> let
scopeGraph' = maybe
(addImportHole edge name currentAddress g)
(const (insertEdge edge name currentAddress g))
(lookupScope name g)
in
addImportEdge edge names name scopeGraph'
addImportHole :: Ord scopeAddress => EdgeLabel -> scopeAddress -> scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
addImportHole edge name currentAddress g = let
scopeGraph' = newScope name mempty g
in
insertEdges (NonEmpty.fromList [Void, edge]) name currentAddress scopeGraph'
-- | Update the 'Scope' containing a 'Declaration' with an associated scope address.
-- Returns an unmodified 'ScopeGraph' if the 'Declaration' cannot be found with the given scope address.
insertDeclarationScope :: Ord scopeAddress => Declaration -> scopeAddress -> scopeAddress -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
insertDeclarationScope Declaration{..} associatedScopeAddress scopeAddress g = fromMaybe g $ do
declScopeAddress <- pathDeclarationScope scopeAddress =<< lookupScopePath unDeclaration scopeAddress g
scope <- lookupScope declScopeAddress g
(declInfo, position) <- second unPosition <$> lookupDeclaration unDeclaration declScopeAddress g
pure $ insertScope declScopeAddress (scope { declarations = Seq.update position (declInfo { infoAssociatedScope = Just associatedScopeAddress }) (declarations scope) }) g
-- | Insert a declaration span into the declaration in the scope graph.
insertDeclarationSpan :: Ord scopeAddress => Declaration -> Span -> ScopeGraph scopeAddress -> ScopeGraph scopeAddress
insertDeclarationSpan decl@Declaration{..} span g = fromMaybe g $ do
declScopeAddress <- scopeOfDeclaration decl g
(declInfo, position) <- second unPosition <$> lookupDeclaration unDeclaration declScopeAddress g
scope <- lookupScope declScopeAddress g
pure $ insertScope declScopeAddress (scope { declarations = Seq.update position (declInfo { infoSpan = span }) (declarations scope) }) g
-- | Insert a new scope with the given address and edges into the scope graph.
newScope :: Ord address => address -> Map EdgeLabel [address] -> ScopeGraph address -> ScopeGraph address
newScope address edges = insertScope address (Scope edges mempty mempty Standard)
-- | Insert a new scope with the given address and edges into the scope graph.
newPreludeScope :: Ord address => address -> Map EdgeLabel [address] -> ScopeGraph address -> ScopeGraph address
newPreludeScope address edges = insertScope address (Scope edges mempty mempty Preluded)
insertScope :: Ord address => address -> Scope address -> ScopeGraph address -> ScopeGraph address
insertScope address scope = ScopeGraph . Map.insert address scope . unScopeGraph
-- | Returns the scope of a reference in the scope graph.
scopeOfRef :: Ord scope => Reference -> ScopeGraph scope -> Maybe scope
scopeOfRef ref g@(ScopeGraph graph) = go (Map.keys graph)
where
go (s : scopes') = fromMaybe (go scopes') $ do
pathMap <- pathsOfScope s g
_ <- Map.lookup ref pathMap
pure (Just s)
go [] = Nothing
-- | Returns the path of a reference in the scope graph.
pathOfRef :: (Ord scope) => Reference -> ScopeGraph scope -> Maybe (Path scope)
pathOfRef ref graph = do
scope <- scopeOfRef ref graph
pathsMap <- pathsOfScope scope graph
snd <$> Map.lookup ref pathsMap
-- Returns the scope the declaration was declared in.
scopeOfDeclaration :: Ord scope => Declaration -> ScopeGraph scope -> Maybe scope
scopeOfDeclaration Declaration{..} g@(ScopeGraph graph) = go (Map.keys graph)
where
go = foldr (\ scope -> (scope <$ lookupDeclaration unDeclaration scope g <|>)) Nothing
-- | Returns the scope associated with a declaration (the child scope if any exists).
associatedScope :: Ord scope => Declaration -> ScopeGraph scope -> Maybe scope
associatedScope Declaration{..} g@(ScopeGraph graph) = go (Map.keys graph)
where
go = foldr lookupAssociatedScope Nothing
lookupAssociatedScope scope = ((lookupDeclaration unDeclaration scope g >>= infoAssociatedScope . fst) <|>)

View File

@ -6,22 +6,21 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module ScopeGraph.Convert
module Scope.Graph.Convert
( ToScopeGraph (..)
, Result (..)
, todo
, complete
) where
import Control.Effect.Sketch
import Control.Effect.ScopeGraph
import Data.List.NonEmpty
import Data.Name (Name)
import Data.Typeable
import Source.Loc
class Typeable t => ToScopeGraph t where
scopeGraph ::
( Has (Sketch Name) sig m
( ScopeGraphEff sig m
)
=> t Loc
-> m Result

Some files were not shown because too many files have changed in this diff Show More