1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 14:21:31 +03:00
semantic/src/TreeSitter.hs

67 lines
2.7 KiB
Haskell
Raw Normal View History

module TreeSitter where
import Prologue hiding (Constructor)
import Data.String
2016-02-09 22:34:23 +03:00
import Category
2016-06-03 06:06:09 +03:00
import Info
2016-02-11 01:30:14 +03:00
import Language
import Parser
2016-02-11 01:30:14 +03:00
import Range
2015-12-24 08:20:47 +03:00
import Source
import Foreign
import Foreign.C.String
import Text.Parser.TreeSitter hiding (Language(..))
import qualified Text.Parser.TreeSitter as TS
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Language -> Ptr TS.Language -> Parser
2016-02-11 02:07:27 +03:00
treeSitterParser language grammar contents = do
document <- ts_document_make
ts_document_set_language document grammar
withCString (toString contents) (\source -> do
2016-02-11 02:07:27 +03:00
ts_document_set_input_string document source
ts_document_parse document
2016-06-03 06:06:09 +03:00
term <- documentToTerm language document contents
2016-02-11 02:07:27 +03:00
ts_document_free document
pure term)
2016-02-11 01:59:48 +03:00
-- Given a language and a node name, return the correct categories.
categoriesForLanguage :: Language -> String -> Category
2016-02-11 01:59:48 +03:00
categoriesForLanguage language name = case (language, name) of
(JavaScript, "object") -> DictionaryLiteral
(JavaScript, "rel_op") -> BinaryOperator -- relational operator, e.g. >, <, <=, >=, ==, !=
(Ruby, "hash") -> DictionaryLiteral
2016-02-11 01:59:48 +03:00
_ -> defaultCategoryForNodeName name
-- | Given a node name from TreeSitter, return the correct categories.
defaultCategoryForNodeName :: String -> Category
2016-02-11 01:57:01 +03:00
defaultCategoryForNodeName name = case name of
"function_call" -> FunctionCall
"pair" -> Pair
"string" -> StringLiteral
"integer" -> IntegerLiteral
"symbol" -> SymbolLiteral
"array" -> ArrayLiteral
_ -> (Other name)
2016-02-09 22:34:23 +03:00
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Language -> Ptr Document -> Parser
documentToTerm language document contents = alloca $ \ root -> do
ts_document_root_node_p document root
toTerm root
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
count <- ts_node_p_named_child_count node
2016-05-26 21:56:38 +03:00
children <- traverse (alloca . getChild node) $ take (fromIntegral count) [0..]
2015-12-24 01:54:28 +03:00
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after weve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
2016-05-26 21:56:38 +03:00
range <- pure $! Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
let size' = 1 + sum (size . extract <$> children)
let info = Info range (categoriesForLanguage language name) size' size'
2016-06-03 06:06:09 +03:00
pure $! termConstructor contents info children
2015-12-23 18:47:15 +03:00
getChild node n out = do
_ <- ts_node_p_named_child node n out
2015-12-23 18:47:15 +03:00
toTerm out