1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 06:41:45 +03:00
semantic/src/TreeSitter.hs

123 lines
5.0 KiB
Haskell
Raw Normal View History

2016-07-14 23:35:20 +03:00
{-# LANGUAGE DataKinds #-}
module TreeSitter where
import Prologue hiding (Constructor)
2016-08-16 00:25:56 +03:00
import Control.Monad
2016-02-09 22:34:23 +03:00
import Category
2016-08-16 00:25:56 +03:00
import Data.Record
2016-02-11 01:30:14 +03:00
import Language
import Parser
2016-08-02 19:08:26 +03:00
import Range
2015-12-24 08:20:47 +03:00
import Source
import qualified Syntax
import Foreign
import Foreign.C.String
import Text.Parser.TreeSitter hiding (Language(..))
import qualified Text.Parser.TreeSitter as TS
2016-07-28 01:11:55 +03:00
import SourceSpan
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category])
2016-07-28 01:11:55 +03:00
treeSitterParser language grammar blob = do
2016-02-11 02:07:27 +03:00
document <- ts_document_make
ts_document_set_language document grammar
2016-07-28 01:11:55 +03:00
withCString (toString $ source blob) (\source -> do
2016-02-11 02:07:27 +03:00
ts_document_set_input_string document source
ts_document_parse document
2016-07-28 01:11:55 +03:00
term <- documentToTerm language document blob
2016-02-11 02:07:27 +03:00
ts_document_free document
pure term)
2016-02-11 01:59:48 +03:00
-- Given a language and a node name, return the correct categories.
categoriesForLanguage :: Language -> Text -> Category
2016-02-11 01:59:48 +03:00
categoriesForLanguage language name = case (language, name) of
2016-06-28 23:38:06 +03:00
(JavaScript, "object") -> Object
(JavaScript, "expression_statement") -> ExpressionStatements
(JavaScript, "this_expression") -> Identifier
(JavaScript, "null") -> Identifier
(JavaScript, "undefined") -> Identifier
(JavaScript, "arrow_function") -> Function
(JavaScript, "generator_function") -> Function
2016-08-01 19:19:05 +03:00
(JavaScript, "math_op") -> BinaryOperator -- bitwise operator, e.g. +, -, *, /.
(JavaScript, "bool_op") -> BinaryOperator -- boolean operator, e.g. ||, &&.
(JavaScript, "bitwise_op") -> BinaryOperator -- bitwise operator, e.g. ^, &, etc.
(JavaScript, "rel_op") -> BinaryOperator -- relational operator, e.g. >, <, <=, >=, ==, !=.
(JavaScript, "comma_op") -> Operator -- comma operator, e.g. expr1, expr2.
(JavaScript, "delete_op") -> Operator -- delete operator, e.g. delete x[2].
(JavaScript, "type_op") -> Operator -- type operator, e.g. typeof Object.
(JavaScript, "void_op") -> Operator -- void operator, e.g. void 2.
(JavaScript, "for_in_statement") -> For
(JavaScript, "for_of_statement") -> For
2016-07-30 21:27:34 +03:00
(JavaScript, "new_expression") -> Constructor
(JavaScript, "class") -> Class
(JavaScript, "catch") -> Catch
(JavaScript, "finally") -> Finally
2016-08-12 00:39:44 +03:00
(JavaScript, "if_statement") -> If
2016-06-28 23:38:06 +03:00
(Ruby, "hash") -> Object
2016-02-11 01:59:48 +03:00
_ -> defaultCategoryForNodeName name
{-# INLINE categoriesForLanguage #-}
2016-02-11 01:59:48 +03:00
-- | Given a node name from TreeSitter, return the correct categories.
defaultCategoryForNodeName :: Text -> Category
2016-02-11 01:57:01 +03:00
defaultCategoryForNodeName name = case name of
2016-06-04 01:35:27 +03:00
"program" -> Program
2016-06-04 01:38:04 +03:00
"ERROR" -> Error
"function_call" -> FunctionCall
"pair" -> Pair
"string" -> StringLiteral
"integer" -> IntegerLiteral
"symbol" -> SymbolLiteral
"array" -> ArrayLiteral
2016-06-10 22:20:13 +03:00
"function" -> Function
"identifier" -> Identifier
"formal_parameters" -> Params
2016-06-14 23:51:34 +03:00
"arguments" -> Args
2016-06-10 22:20:13 +03:00
"statement_block" -> ExpressionStatements
"assignment" -> Assignment
"member_access" -> MemberAccess
"op" -> Operator
"subscript_access" -> SubscriptAccess
"regex" -> Regex
"template_string" -> TemplateString
"var_assignment" -> VarAssignment
"var_declaration" -> VarDecl
"switch_statement" -> Switch
"math_assignment" -> MathAssignment
"case" -> Case
"true" -> Boolean
"false" -> Boolean
"ternary" -> Ternary
"for_statement" -> For
2016-07-28 21:00:28 +03:00
"while_statement" -> While
"do_statement" -> DoWhile
2016-07-28 21:34:53 +03:00
"return_statement" -> Return
"throw_statement" -> Throw
"try_statement" -> Try
"method_definition" -> Method
2016-06-04 01:40:43 +03:00
_ -> Other name
{-# INLINE defaultCategoryForNodeName #-}
2016-02-09 22:34:23 +03:00
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category])
2016-07-28 01:11:55 +03:00
documentToTerm language document blob = alloca $ \ root -> do
ts_document_root_node_p document root
toTerm root
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
count <- ts_node_p_named_child_count node
2016-05-26 21:56:38 +03:00
children <- traverse (alloca . getChild node) $ take (fromIntegral count) [0..]
2016-08-16 00:12:15 +03:00
let range = Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
let sourceSpan = SourceSpan { spanName = toS (path blob)
2016-07-29 21:05:11 +03:00
, spanStart = SourcePos (fromIntegral $ ts_node_p_start_point_row node) (fromIntegral $ ts_node_p_start_point_column node)
, spanEnd = SourcePos (fromIntegral $ ts_node_p_end_point_row node) (fromIntegral $ ts_node_p_end_point_column node) }
2016-08-16 00:12:15 +03:00
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after weve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
2016-08-16 00:46:43 +03:00
let info = range `seq` range .: categoriesForLanguage language (toS name) .: RNil
2016-08-16 17:59:10 +03:00
termConstructor (source blob) (pure sourceSpan) info children
2016-08-16 00:25:56 +03:00
getChild node n out = ts_node_p_named_child node n out >> toTerm out
{-# INLINE getChild #-}