1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 23:11:50 +03:00
semantic/src/Parser.hs
2017-10-04 10:57:39 -04:00

106 lines
5.5 KiB
Haskell

{-# LANGUAGE ConstraintKinds, DataKinds, GADTs, RankNTypes, ScopedTypeVariables, TypeFamilies, TypeOperators #-}
module Parser
( Parser(..)
, SomeParser(..)
, someParser
-- Syntax parsers
, syntaxParserForLanguage
-- À la carte parsers
, jsonParser
, markdownParser
, pythonParser
, rubyParser
, typescriptParser
) where
import qualified CMarkGFM
import Data.Functor.Classes (Eq1)
import Data.Kind
import Data.Ix
import Data.Record
import qualified Data.Syntax as Syntax
import Data.Syntax.Assignment
import Data.Term
import Data.Union
import Foreign.Ptr
import Info hiding (Empty, Go)
import Language
import qualified Language.JSON.Syntax as JSON
import qualified Language.Markdown.Syntax as Markdown
import qualified Language.Python.Syntax as Python
import qualified Language.Ruby.Syntax as Ruby
import qualified Language.TypeScript.Syntax as TypeScript
import Syntax hiding (Go)
import qualified TreeSitter.Language as TS (Language, Symbol)
import TreeSitter.Go
import TreeSitter.Python
import TreeSitter.Ruby
import TreeSitter.TypeScript
import TreeSitter.JSON
-- | A parser from 'Source' onto some term type.
data Parser term where
-- | A parser producing 'AST' using a 'TS.Language'.
ASTParser :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Parser (AST [] grammar)
-- | A parser producing an à la carte term given an 'AST'-producing parser and an 'Assignment' onto 'Term's in some syntax type.
AssignmentParser :: (Enum grammar, Ix grammar, Show grammar, TS.Symbol grammar, Syntax.Error :< fs, Eq1 ast, Apply Foldable fs, Apply Functor fs, Foldable ast, Functor ast)
=> Parser (Term ast (Node grammar)) -- ^ A parser producing AST.
-> Assignment ast grammar (Term (Union fs) (Record Location)) -- ^ An assignment from AST onto 'Term's.
-> Parser (Term (Union fs) (Record Location)) -- ^ A parser producing 'Term's.
-- | A tree-sitter parser.
TreeSitterParser :: Ptr TS.Language -> Parser (Term Syntax (Record DefaultFields))
-- | A parser for 'Markdown' using cmark.
MarkdownParser :: Parser (Term (TermF [] CMarkGFM.NodeType) (Node Markdown.Grammar))
-- | Apply all of a list of typeclasses to all of a list of functors using 'Apply'. Used by 'someParser' to constrain all of the language-specific syntax types to the typeclasses in question.
type family ApplyAll (typeclasses :: [(* -> *) -> Constraint]) (functors :: [* -> *]) :: Constraint where
ApplyAll (typeclass ': typeclasses) functors = (Apply typeclass functors, ApplyAll typeclasses functors)
ApplyAll '[] functors = ()
-- | A parser for some specific language, producing 'Term's whose syntax satisfies a list of typeclass constraints.
--
-- This enables us to abstract over the details of the specific syntax types in cases where we can describe all the requirements on the syntax with a list of typeclasses.
data SomeParser typeclasses where
SomeParser :: ApplyAll typeclasses fs => { unSomeParser :: Parser (Term (Union fs) (Record Location)) } -> SomeParser typeclasses
-- | Construct a 'SomeParser' given a proxy for a list of typeclasses and the 'Language' to be parsed, all of which must be satisfied by all of the types in the syntaxes of our supported languages.
--
-- This can be used to perform operations uniformly over terms produced by blobs with different 'Language's, and which therefore have different types in general. For example, given some 'Blob', we can parse and 'show' the parsed & assigned 'Term' like so:
--
-- > case someParser (Proxy :: Proxy '[Show1]) (blobLanguage language) of { Just parser -> runTask (parse parser blob) >>= putStrLn . show ; _ -> return () }
someParser :: (ApplyAll typeclasses JSON.Syntax, ApplyAll typeclasses Markdown.Syntax, ApplyAll typeclasses Python.Syntax, ApplyAll typeclasses Ruby.Syntax, ApplyAll typeclasses TypeScript.Syntax) => proxy typeclasses -> Language -> Maybe (SomeParser typeclasses)
someParser _ Go = Nothing
someParser _ JavaScript = Just (SomeParser typescriptParser)
someParser _ JSON = Just (SomeParser jsonParser)
someParser _ JSX = Just (SomeParser typescriptParser)
someParser _ Markdown = Just (SomeParser markdownParser)
someParser _ Python = Just (SomeParser pythonParser)
someParser _ Ruby = Just (SomeParser rubyParser)
someParser _ TypeScript = Just (SomeParser typescriptParser)
-- | Return a 'Language'-specific 'Parser', if one exists, falling back to the 'LineByLineParser'.
syntaxParserForLanguage :: Language -> Maybe (Parser (Term Syntax (Record DefaultFields)))
syntaxParserForLanguage language = case language of
Go -> Just (TreeSitterParser tree_sitter_go)
JavaScript -> Just (TreeSitterParser tree_sitter_typescript)
JSON -> Just (TreeSitterParser tree_sitter_json)
JSX -> Just (TreeSitterParser tree_sitter_typescript)
Ruby -> Just (TreeSitterParser tree_sitter_ruby)
TypeScript -> Just (TreeSitterParser tree_sitter_typescript)
_ -> Nothing
rubyParser :: Parser Ruby.Term
rubyParser = AssignmentParser (ASTParser tree_sitter_ruby) Ruby.assignment
pythonParser :: Parser Python.Term
pythonParser = AssignmentParser (ASTParser tree_sitter_python) Python.assignment
jsonParser :: Parser JSON.Term
jsonParser = AssignmentParser (ASTParser tree_sitter_json) JSON.assignment
typescriptParser :: Parser TypeScript.Term
typescriptParser = AssignmentParser (ASTParser tree_sitter_typescript) TypeScript.assignment
markdownParser :: Parser Markdown.Term
markdownParser = AssignmentParser MarkdownParser Markdown.assignment