1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 14:21:31 +03:00
semantic/src/TreeSitter.hs

64 lines
2.7 KiB
Haskell
Raw Normal View History

2016-07-14 23:35:20 +03:00
{-# LANGUAGE DataKinds #-}
module TreeSitter (treeSitterParser) where
import Prologue hiding (Constructor)
2016-02-09 22:34:23 +03:00
import Category
2016-08-16 00:25:56 +03:00
import Data.Record
2016-02-11 01:30:14 +03:00
import Language
2016-09-07 18:57:24 +03:00
import qualified Language.JavaScript as JS
import qualified Language.C as C
import Parser
2016-08-02 19:08:26 +03:00
import Range
2015-12-24 08:20:47 +03:00
import Source
import qualified Syntax
import Foreign
import Foreign.C.String
import Text.Parser.TreeSitter hiding (Language(..))
import qualified Text.Parser.TreeSitter as TS
2016-07-28 01:11:55 +03:00
import SourceSpan
2016-08-23 00:56:48 +03:00
import Info
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
2016-10-06 00:27:45 +03:00
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
2016-07-28 01:11:55 +03:00
treeSitterParser language grammar blob = do
2016-02-11 02:07:27 +03:00
document <- ts_document_make
ts_document_set_language document grammar
2016-07-28 01:11:55 +03:00
withCString (toString $ source blob) (\source -> do
2016-02-11 02:07:27 +03:00
ts_document_set_input_string document source
ts_document_parse document
2016-07-28 01:11:55 +03:00
term <- documentToTerm language document blob
2016-02-11 02:07:27 +03:00
ts_document_free document
pure term)
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
2016-10-06 00:27:45 +03:00
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
documentToTerm language document SourceBlob{..} = alloca $ \ root -> do
ts_document_root_node_p document root
toTerm root
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
count <- ts_node_p_named_child_count node
children <- filter isNonEmpty <$> traverse (alloca . getChild node) (take (fromIntegral count) [0..])
2016-08-16 00:12:15 +03:00
let range = Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
2016-10-06 00:27:45 +03:00
let sourceSpan = SourceSpan {
spanName = toS path
2016-08-16 18:35:12 +03:00
, spanStart = SourcePos (fromIntegral $! ts_node_p_start_point_row node) (fromIntegral $! ts_node_p_start_point_column node)
2016-10-06 00:27:45 +03:00
, spanEnd = SourcePos (fromIntegral $! ts_node_p_end_point_row node) (fromIntegral $! ts_node_p_end_point_column node)
}
2016-09-10 00:23:19 +03:00
-- Note: The strict application here is semantically important.
-- Without it, we may not evaluate the range until after weve exited
-- the scope that `node` was allocated within, meaning `alloca` will
-- free it & other stack data may overwrite it.
range `seq` termConstructor source (pure $! sourceSpan) (toS name) range children
2016-08-16 00:25:56 +03:00
getChild node n out = ts_node_p_named_child node n out >> toTerm out
{-# INLINE getChild #-}
termConstructor = case language of
JavaScript -> JS.termConstructor
C -> C.termConstructor
_ -> Language.termConstructor
isNonEmpty child = category (extract child) /= Empty