2016-07-14 23:35:20 +03:00
{- # LANGUAGE DataKinds # -}
2015-12-09 17:58:15 +03:00
module TreeSitter where
2016-05-26 19:58:04 +03:00
import Prologue hiding ( Constructor )
2016-06-17 20:33:50 +03:00
import Data.Record
2016-02-09 22:34:23 +03:00
import Category
2016-02-11 01:30:14 +03:00
import Language
2015-12-17 00:24:23 +03:00
import Parser
2016-08-02 19:08:26 +03:00
import Range
2015-12-24 08:20:47 +03:00
import Source
2016-07-26 22:52:37 +03:00
import qualified Syntax
2015-12-09 17:58:15 +03:00
import Foreign
2016-02-28 03:34:10 +03:00
import Foreign.C.String
import Text.Parser.TreeSitter hiding ( Language ( .. ) )
import qualified Text.Parser.TreeSitter as TS
2016-07-28 01:11:55 +03:00
import SourceSpan
2015-12-09 17:58:15 +03:00
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
2016-07-26 22:52:37 +03:00
treeSitterParser :: Language -> Ptr TS . Language -> Parser ( Syntax . Syntax Text ) ( Record '[Range, Category] )
2016-07-28 01:11:55 +03:00
treeSitterParser language grammar blob = do
2016-02-11 02:07:27 +03:00
document <- ts_document_make
ts_document_set_language document grammar
2016-07-28 01:11:55 +03:00
withCString ( toString $ source blob ) ( \ source -> do
2016-02-11 02:07:27 +03:00
ts_document_set_input_string document source
ts_document_parse document
2016-07-28 01:11:55 +03:00
term <- documentToTerm language document blob
2016-02-11 02:07:27 +03:00
ts_document_free document
2016-05-26 19:58:04 +03:00
pure term )
2016-02-10 22:30:32 +03:00
2016-02-11 01:59:48 +03:00
-- Given a language and a node name, return the correct categories.
2016-06-07 02:53:48 +03:00
categoriesForLanguage :: Language -> Text -> Category
2016-02-11 01:59:48 +03:00
categoriesForLanguage language name = case ( language , name ) of
2016-06-28 23:38:06 +03:00
( JavaScript , " object " ) -> Object
2016-07-25 21:57:14 +03:00
( JavaScript , " expression_statement " ) -> ExpressionStatements
2016-06-16 17:54:46 +03:00
( JavaScript , " this_expression " ) -> Identifier
( JavaScript , " null " ) -> Identifier
( JavaScript , " undefined " ) -> Identifier
2016-06-16 19:25:17 +03:00
( JavaScript , " arrow_function " ) -> Function
2016-06-16 23:34:48 +03:00
( JavaScript , " generator_function " ) -> Function
2016-08-01 19:19:05 +03:00
( JavaScript , " math_op " ) -> BinaryOperator -- bitwise operator, e.g. +, -, *, /.
( JavaScript , " bool_op " ) -> BinaryOperator -- boolean operator, e.g. ||, &&.
( JavaScript , " bitwise_op " ) -> BinaryOperator -- bitwise operator, e.g. ^, &, etc.
( JavaScript , " rel_op " ) -> BinaryOperator -- relational operator, e.g. >, <, <=, >=, ==, !=.
( JavaScript , " comma_op " ) -> Operator -- comma operator, e.g. expr1, expr2.
( JavaScript , " delete_op " ) -> Operator -- delete operator, e.g. delete x[2].
( JavaScript , " type_op " ) -> Operator -- type operator, e.g. typeof Object.
( JavaScript , " void_op " ) -> Operator -- void operator, e.g. void 2.
2016-07-28 20:50:43 +03:00
( JavaScript , " for_in_statement " ) -> For
( JavaScript , " for_of_statement " ) -> For
2016-07-30 21:27:34 +03:00
( JavaScript , " new_expression " ) -> Constructor
2016-07-29 23:28:51 +03:00
( JavaScript , " class " ) -> Class
2016-08-01 20:38:50 +03:00
( JavaScript , " catch " ) -> Catch
( JavaScript , " finally " ) -> Finally
2016-08-12 00:39:44 +03:00
( JavaScript , " if_statement " ) -> If
2016-02-22 06:59:07 +03:00
2016-06-28 23:38:06 +03:00
( Ruby , " hash " ) -> Object
2016-02-11 01:59:48 +03:00
_ -> defaultCategoryForNodeName name
-- | Given a node name from TreeSitter, return the correct categories.
2016-06-07 02:53:48 +03:00
defaultCategoryForNodeName :: Text -> Category
2016-02-11 01:57:01 +03:00
defaultCategoryForNodeName name = case name of
2016-06-04 01:35:27 +03:00
" program " -> Program
2016-06-04 01:38:04 +03:00
" ERROR " -> Error
2016-05-31 23:13:01 +03:00
" function_call " -> FunctionCall
" pair " -> Pair
" string " -> StringLiteral
" integer " -> IntegerLiteral
" symbol " -> SymbolLiteral
" array " -> ArrayLiteral
2016-06-10 22:20:13 +03:00
" function " -> Function
" identifier " -> Identifier
" formal_parameters " -> Params
2016-06-14 23:51:34 +03:00
" arguments " -> Args
2016-06-10 22:20:13 +03:00
" statement_block " -> ExpressionStatements
2016-06-14 01:33:01 +03:00
" assignment " -> Assignment
" member_access " -> MemberAccess
2016-06-18 01:01:35 +03:00
" op " -> Operator
2016-06-17 22:39:47 +03:00
" subscript_access " -> SubscriptAccess
2016-06-15 19:09:52 +03:00
" regex " -> Regex
2016-06-15 18:39:36 +03:00
" template_string " -> TemplateString
2016-06-15 21:17:03 +03:00
" var_assignment " -> VarAssignment
" var_declaration " -> VarDecl
2016-06-16 02:52:39 +03:00
" switch_statement " -> Switch
2016-06-17 22:57:51 +03:00
" math_assignment " -> MathAssignment
2016-06-16 02:52:39 +03:00
" case " -> Case
2016-06-16 17:55:13 +03:00
" true " -> Boolean
" false " -> Boolean
2016-06-17 23:33:22 +03:00
" ternary " -> Ternary
2016-07-28 20:50:43 +03:00
" for_statement " -> For
2016-07-28 21:00:28 +03:00
" while_statement " -> While
" do_statement " -> DoWhile
2016-07-28 21:34:53 +03:00
" return_statement " -> Return
2016-07-29 22:33:15 +03:00
" throw_statement " -> Throw
2016-07-29 22:59:51 +03:00
" try_statement " -> Try
2016-07-30 21:08:46 +03:00
" method_definition " -> Method
2016-06-04 01:40:43 +03:00
_ -> Other name
2016-02-09 22:34:23 +03:00
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
2016-07-26 22:52:37 +03:00
documentToTerm :: Language -> Ptr Document -> Parser ( Syntax . Syntax Text ) ( Record '[Range, Category] )
2016-07-28 01:11:55 +03:00
documentToTerm language document blob = alloca $ \ root -> do
2015-12-09 17:58:15 +03:00
ts_document_root_node_p document root
2016-02-14 05:27:27 +03:00
toTerm root
2015-12-23 17:49:55 +03:00
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
2015-12-23 17:49:55 +03:00
count <- ts_node_p_named_child_count node
2016-05-26 21:56:38 +03:00
children <- traverse ( alloca . getChild node ) $ take ( fromIntegral count ) [ 0 .. ]
2015-12-24 01:54:28 +03:00
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after we’ ve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
2016-05-26 21:56:38 +03:00
range <- pure $! Range { start = fromIntegral $ ts_node_p_start_char node , end = fromIntegral $ ts_node_p_end_char node }
2015-12-09 17:58:15 +03:00
2016-07-29 21:05:11 +03:00
sourceSpan <- pure $! SourceSpan { spanName = toS ( path blob )
, spanStart = SourcePos ( fromIntegral $ ts_node_p_start_point_row node ) ( fromIntegral $ ts_node_p_start_point_column node )
, spanEnd = SourcePos ( fromIntegral $ ts_node_p_end_point_row node ) ( fromIntegral $ ts_node_p_end_point_column node ) }
2015-12-09 17:58:15 +03:00
2016-07-26 22:52:37 +03:00
let info = range .: ( categoriesForLanguage language ( toS name ) ) .: RNil
2016-07-29 21:05:11 +03:00
pure $! termConstructor ( source blob ) sourceSpan info children
2015-12-23 18:47:15 +03:00
getChild node n out = do
2015-12-23 17:49:55 +03:00
_ <- ts_node_p_named_child node n out
2015-12-23 18:47:15 +03:00
toTerm out