1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 14:21:31 +03:00
semantic/src/TreeSitter.hs
2016-06-03 18:38:04 -04:00

69 lines
2.7 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module TreeSitter where
import Prologue hiding (Constructor)
import Data.String
import Category
import Info
import Language
import Parser
import Range
import Source
import Foreign
import Foreign.C.String
import Text.Parser.TreeSitter hiding (Language(..))
import qualified Text.Parser.TreeSitter as TS
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Language -> Ptr TS.Language -> Parser
treeSitterParser language grammar contents = do
document <- ts_document_make
ts_document_set_language document grammar
withCString (toString contents) (\source -> do
ts_document_set_input_string document source
ts_document_parse document
term <- documentToTerm language document contents
ts_document_free document
pure term)
-- Given a language and a node name, return the correct categories.
categoriesForLanguage :: Language -> String -> Category
categoriesForLanguage language name = case (language, name) of
(JavaScript, "object") -> DictionaryLiteral
(JavaScript, "rel_op") -> BinaryOperator -- relational operator, e.g. >, <, <=, >=, ==, !=
(Ruby, "hash") -> DictionaryLiteral
_ -> defaultCategoryForNodeName name
-- | Given a node name from TreeSitter, return the correct categories.
defaultCategoryForNodeName :: String -> Category
defaultCategoryForNodeName name = case name of
"program" -> Program
"ERROR" -> Error
"function_call" -> FunctionCall
"pair" -> Pair
"string" -> StringLiteral
"integer" -> IntegerLiteral
"symbol" -> SymbolLiteral
"array" -> ArrayLiteral
_ -> (Other name)
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Language -> Ptr Document -> Parser
documentToTerm language document contents = alloca $ \ root -> do
ts_document_root_node_p document root
toTerm root
where toTerm node = do
name <- ts_node_p_name node document
name <- peekCString name
count <- ts_node_p_named_child_count node
children <- traverse (alloca . getChild node) $ take (fromIntegral count) [0..]
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after weve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
range <- pure $! Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
let size' = 1 + sum (size . extract <$> children)
let info = Info range (categoriesForLanguage language name) size' size'
pure $! termConstructor contents info children
getChild node n out = do
_ <- ts_node_p_named_child node n out
toTerm out