2015-12-09 17:58:15 +03:00
module TreeSitter where
2016-02-09 22:34:23 +03:00
import Category
2016-02-11 01:30:14 +03:00
import Language
2015-12-17 00:24:23 +03:00
import Parser
2016-02-11 01:30:14 +03:00
import Range
2015-12-24 08:20:47 +03:00
import Source
2015-12-16 23:49:46 +03:00
import qualified Data.Set as Set
2015-12-09 17:58:15 +03:00
import Foreign
2016-02-28 03:34:10 +03:00
import Foreign.C.String
import Text.Parser.TreeSitter hiding ( Language ( .. ) )
import qualified Text.Parser.TreeSitter as TS
2015-12-09 17:58:15 +03:00
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
2016-02-28 03:34:10 +03:00
treeSitterParser :: Language -> Ptr TS . Language -> Parser
2016-02-11 02:07:27 +03:00
treeSitterParser language grammar contents = do
document <- ts_document_make
ts_document_set_language document grammar
2016-03-12 02:31:01 +03:00
withCString ( toString contents ) ( \ source -> do
2016-02-11 02:07:27 +03:00
ts_document_set_input_string document source
ts_document_parse document
term <- documentToTerm ( termConstructor $ categoriesForLanguage language ) document contents
ts_document_free document
return term )
2016-02-10 22:30:32 +03:00
2016-02-11 01:59:48 +03:00
-- Given a language and a node name, return the correct categories.
categoriesForLanguage :: Language -> String -> Set . Set Category
categoriesForLanguage language name = case ( language , name ) of
( JavaScript , " object " ) -> Set . singleton DictionaryLiteral
2016-02-17 18:59:41 +03:00
( JavaScript , " rel_op " ) -> Set . singleton BinaryOperator -- relational operator, e.g. >, <, <=, >=, ==, !=
2016-02-22 06:59:07 +03:00
( Ruby , " hash " ) -> Set . singleton DictionaryLiteral
2016-02-11 01:59:48 +03:00
_ -> defaultCategoryForNodeName name
-- | Given a node name from TreeSitter, return the correct categories.
2016-02-11 01:57:01 +03:00
defaultCategoryForNodeName :: String -> Set . Set Category
defaultCategoryForNodeName name = case name of
2016-02-10 21:03:15 +03:00
" function_call " -> Set . singleton FunctionCall
2016-02-17 18:59:50 +03:00
" pair " -> Set . singleton Pair
2016-02-24 23:48:05 +03:00
" string " -> Set . singleton StringLiteral
" integer " -> Set . singleton IntegerLiteral
" symbol " -> Set . singleton SymbolLiteral
2016-03-03 07:03:47 +03:00
" array " -> Set . singleton ArrayLiteral
2016-02-10 21:03:15 +03:00
_ -> Set . singleton ( Other name )
2016-02-09 22:34:23 +03:00
2016-02-03 21:24:17 +03:00
-- | Given a constructor and a tree sitter document, return a parser.
2016-02-28 03:34:10 +03:00
documentToTerm :: Constructor -> Ptr Document -> Parser
2015-12-23 18:34:40 +03:00
documentToTerm constructor document contents = alloca $ \ root -> do
2015-12-09 17:58:15 +03:00
ts_document_root_node_p document root
2016-02-14 05:27:27 +03:00
toTerm root
2015-12-23 17:49:55 +03:00
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
2015-12-23 17:49:55 +03:00
count <- ts_node_p_named_child_count node
2015-12-23 18:47:15 +03:00
children <- mapM ( alloca . getChild node ) $ take ( fromIntegral count ) [ 0 .. ]
2015-12-24 01:54:28 +03:00
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after we’ ve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
2015-12-24 01:39:13 +03:00
range <- return $! Range { start = fromIntegral $ ts_node_p_start_char node , end = fromIntegral $ ts_node_p_end_char node }
2015-12-09 17:58:15 +03:00
2016-02-14 05:27:27 +03:00
return $! constructor contents range name children
2015-12-23 18:47:15 +03:00
getChild node n out = do
2015-12-23 17:49:55 +03:00
_ <- ts_node_p_named_child node n out
2015-12-23 18:47:15 +03:00
toTerm out