2016-07-14 23:35:20 +03:00
|
|
|
|
{-# LANGUAGE DataKinds #-}
|
2016-08-25 22:42:30 +03:00
|
|
|
|
module TreeSitter (treeSitterParser) where
|
2015-12-09 17:58:15 +03:00
|
|
|
|
|
2016-05-26 19:58:04 +03:00
|
|
|
|
import Prologue hiding (Constructor)
|
2016-02-09 22:34:23 +03:00
|
|
|
|
import Category
|
2016-08-16 00:25:56 +03:00
|
|
|
|
import Data.Record
|
2016-02-11 01:30:14 +03:00
|
|
|
|
import Language
|
2016-09-07 22:11:55 +03:00
|
|
|
|
import qualified Language.C as C
|
2016-10-17 22:51:53 +03:00
|
|
|
|
import qualified Language.Go as Go
|
2016-11-02 22:25:50 +03:00
|
|
|
|
import qualified Language.JavaScript as JS
|
2016-10-19 18:22:10 +03:00
|
|
|
|
import qualified Language.Ruby as Ruby
|
2015-12-17 00:24:23 +03:00
|
|
|
|
import Parser
|
2016-08-02 19:08:26 +03:00
|
|
|
|
import Range
|
2015-12-24 08:20:47 +03:00
|
|
|
|
import Source
|
2016-07-26 22:52:37 +03:00
|
|
|
|
import qualified Syntax
|
2015-12-09 17:58:15 +03:00
|
|
|
|
import Foreign
|
2016-02-28 03:34:10 +03:00
|
|
|
|
import Foreign.C.String
|
|
|
|
|
import Text.Parser.TreeSitter hiding (Language(..))
|
|
|
|
|
import qualified Text.Parser.TreeSitter as TS
|
2016-07-28 01:11:55 +03:00
|
|
|
|
import SourceSpan
|
2016-08-23 00:56:48 +03:00
|
|
|
|
import Info
|
2015-12-09 17:58:15 +03:00
|
|
|
|
|
2016-02-11 02:07:27 +03:00
|
|
|
|
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
|
2016-10-06 00:27:45 +03:00
|
|
|
|
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
|
2016-07-28 01:11:55 +03:00
|
|
|
|
treeSitterParser language grammar blob = do
|
2016-10-06 21:07:40 +03:00
|
|
|
|
document <- ts_document_new
|
2016-02-11 02:07:27 +03:00
|
|
|
|
ts_document_set_language document grammar
|
2016-07-28 01:11:55 +03:00
|
|
|
|
withCString (toString $ source blob) (\source -> do
|
2016-02-11 02:07:27 +03:00
|
|
|
|
ts_document_set_input_string document source
|
|
|
|
|
ts_document_parse document
|
2016-07-28 01:11:55 +03:00
|
|
|
|
term <- documentToTerm language document blob
|
2016-02-11 02:07:27 +03:00
|
|
|
|
ts_document_free document
|
2016-05-26 19:58:04 +03:00
|
|
|
|
pure term)
|
2016-02-10 22:30:32 +03:00
|
|
|
|
|
2016-06-03 06:06:09 +03:00
|
|
|
|
-- | Return a parser for a tree sitter language & document.
|
2016-10-06 00:27:45 +03:00
|
|
|
|
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
|
2016-09-08 16:40:00 +03:00
|
|
|
|
documentToTerm language document SourceBlob{..} = alloca $ \ root -> do
|
2015-12-09 17:58:15 +03:00
|
|
|
|
ts_document_root_node_p document root
|
2016-02-14 05:27:27 +03:00
|
|
|
|
toTerm root
|
2015-12-23 17:49:55 +03:00
|
|
|
|
where toTerm node = do
|
|
|
|
|
name <- ts_node_p_name node document
|
2015-12-31 01:52:51 +03:00
|
|
|
|
name <- peekCString name
|
2015-12-23 17:49:55 +03:00
|
|
|
|
count <- ts_node_p_named_child_count node
|
2016-09-08 16:42:16 +03:00
|
|
|
|
children <- filter isNonEmpty <$> traverse (alloca . getChild node) (take (fromIntegral count) [0..])
|
2015-12-09 17:58:15 +03:00
|
|
|
|
|
2016-08-16 00:12:15 +03:00
|
|
|
|
let range = Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
|
2015-12-09 17:58:15 +03:00
|
|
|
|
|
2016-10-07 01:31:06 +03:00
|
|
|
|
let startPos = SourcePos (1 + (fromIntegral $! ts_node_p_start_point_row node)) (1 + (fromIntegral $! ts_node_p_start_point_column node))
|
|
|
|
|
let endPos = SourcePos (1 + (fromIntegral $! ts_node_p_end_point_row node)) (1 + (fromIntegral $! ts_node_p_end_point_column node))
|
2016-10-11 22:09:24 +03:00
|
|
|
|
let sourceSpan = SourceSpan { spanStart = startPos , spanEnd = endPos }
|
2015-12-09 17:58:15 +03:00
|
|
|
|
|
2016-09-10 00:23:19 +03:00
|
|
|
|
-- Note: The strict application here is semantically important.
|
|
|
|
|
-- Without it, we may not evaluate the range until after we’ve exited
|
|
|
|
|
-- the scope that `node` was allocated within, meaning `alloca` will
|
|
|
|
|
-- free it & other stack data may overwrite it.
|
2016-09-08 16:42:16 +03:00
|
|
|
|
range `seq` termConstructor source (pure $! sourceSpan) (toS name) range children
|
2016-08-16 00:25:56 +03:00
|
|
|
|
getChild node n out = ts_node_p_named_child node n out >> toTerm out
|
2016-08-16 00:04:45 +03:00
|
|
|
|
{-# INLINE getChild #-}
|
2016-09-07 20:00:59 +03:00
|
|
|
|
termConstructor = case language of
|
|
|
|
|
JavaScript -> JS.termConstructor
|
2016-09-07 22:11:55 +03:00
|
|
|
|
C -> C.termConstructor
|
2016-10-17 22:51:53 +03:00
|
|
|
|
Go -> Go.termConstructor
|
2016-10-19 18:22:10 +03:00
|
|
|
|
Ruby -> Ruby.termConstructor
|
2016-09-07 22:14:20 +03:00
|
|
|
|
_ -> Language.termConstructor
|
2016-09-08 16:42:16 +03:00
|
|
|
|
isNonEmpty child = category (extract child) /= Empty
|