2016-07-14 23:35:20 +03:00
{- # LANGUAGE DataKinds # -}
2016-08-25 22:42:30 +03:00
module TreeSitter ( treeSitterParser ) where
2015-12-09 17:58:15 +03:00
2016-05-26 19:58:04 +03:00
import Prologue hiding ( Constructor )
2016-08-16 00:25:56 +03:00
import Control.Monad
2016-02-09 22:34:23 +03:00
import Category
2016-08-16 00:25:56 +03:00
import Data.Record
2016-02-11 01:30:14 +03:00
import Language
2016-09-07 18:57:24 +03:00
import qualified Language.JavaScript as JS
2016-09-07 22:11:55 +03:00
import qualified Language.C as C
2015-12-17 00:24:23 +03:00
import Parser
2016-08-02 19:08:26 +03:00
import Range
2015-12-24 08:20:47 +03:00
import Source
2016-07-26 22:52:37 +03:00
import qualified Syntax
2015-12-09 17:58:15 +03:00
import Foreign
2016-02-28 03:34:10 +03:00
import Foreign.C.String
import Text.Parser.TreeSitter hiding ( Language ( .. ) )
import qualified Text.Parser.TreeSitter as TS
2016-07-28 01:11:55 +03:00
import SourceSpan
2016-08-23 00:56:48 +03:00
import Info
2015-12-09 17:58:15 +03:00
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
2016-07-26 22:52:37 +03:00
treeSitterParser :: Language -> Ptr TS . Language -> Parser ( Syntax . Syntax Text ) ( Record '[Range, Category] )
2016-07-28 01:11:55 +03:00
treeSitterParser language grammar blob = do
2016-02-11 02:07:27 +03:00
document <- ts_document_make
ts_document_set_language document grammar
2016-07-28 01:11:55 +03:00
withCString ( toString $ source blob ) ( \ source -> do
2016-02-11 02:07:27 +03:00
ts_document_set_input_string document source
ts_document_parse document
2016-07-28 01:11:55 +03:00
term <- documentToTerm language document blob
2016-02-11 02:07:27 +03:00
ts_document_free document
2016-05-26 19:58:04 +03:00
pure term )
2016-02-10 22:30:32 +03:00
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
2016-07-26 22:52:37 +03:00
documentToTerm :: Language -> Ptr Document -> Parser ( Syntax . Syntax Text ) ( Record '[Range, Category] )
2016-07-28 01:11:55 +03:00
documentToTerm language document blob = alloca $ \ root -> do
2015-12-09 17:58:15 +03:00
ts_document_root_node_p document root
2016-02-14 05:27:27 +03:00
toTerm root
2015-12-23 17:49:55 +03:00
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
2015-12-23 17:49:55 +03:00
count <- ts_node_p_named_child_count node
2016-05-26 21:56:38 +03:00
children <- traverse ( alloca . getChild node ) $ take ( fromIntegral count ) [ 0 .. ]
2015-12-09 17:58:15 +03:00
2016-08-16 00:12:15 +03:00
let range = Range { start = fromIntegral $ ts_node_p_start_char node , end = fromIntegral $ ts_node_p_end_char node }
2015-12-09 17:58:15 +03:00
2016-08-16 00:04:38 +03:00
let sourceSpan = SourceSpan { spanName = toS ( path blob )
2016-08-16 18:35:12 +03:00
, spanStart = SourcePos ( fromIntegral $! ts_node_p_start_point_row node ) ( fromIntegral $! ts_node_p_start_point_column node )
, spanEnd = SourcePos ( fromIntegral $! ts_node_p_end_point_row node ) ( fromIntegral $! ts_node_p_end_point_column node ) }
2015-12-09 17:58:15 +03:00
2016-08-16 00:12:15 +03:00
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after we’ ve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
2016-09-07 20:00:59 +03:00
range ` seq ` termConstructor ( source blob ) ( sourceSpan ` seq ` pure sourceSpan ) ( toS name ) range ( filter ( \ child -> category ( extract child ) /= Empty ) children )
2016-08-16 00:25:56 +03:00
getChild node n out = ts_node_p_named_child node n out >> toTerm out
2016-08-16 00:04:45 +03:00
{- # INLINE getChild # -}
2016-09-07 20:00:59 +03:00
termConstructor = case language of
JavaScript -> JS . termConstructor
2016-09-07 22:11:55 +03:00
C -> C . termConstructor
2016-09-07 22:14:20 +03:00
_ -> Language . termConstructor