2016-07-14 23:35:20 +03:00
|
|
|
{-# LANGUAGE DataKinds #-}
|
2017-01-20 21:04:36 +03:00
|
|
|
module TreeSitter
|
|
|
|
( treeSitterParser
|
|
|
|
, defaultTermAssignment
|
|
|
|
) where
|
2015-12-09 17:58:15 +03:00
|
|
|
|
2016-05-26 19:58:04 +03:00
|
|
|
import Prologue hiding (Constructor)
|
2016-02-09 22:34:23 +03:00
|
|
|
import Category
|
2016-08-16 00:25:56 +03:00
|
|
|
import Data.Record
|
2016-02-11 01:30:14 +03:00
|
|
|
import Language
|
2016-09-07 22:11:55 +03:00
|
|
|
import qualified Language.C as C
|
2016-10-17 22:51:53 +03:00
|
|
|
import qualified Language.Go as Go
|
2016-11-02 22:25:50 +03:00
|
|
|
import qualified Language.JavaScript as JS
|
2016-10-19 18:22:10 +03:00
|
|
|
import qualified Language.Ruby as Ruby
|
2015-12-17 00:24:23 +03:00
|
|
|
import Parser
|
2016-08-02 19:08:26 +03:00
|
|
|
import Range
|
2015-12-24 08:20:47 +03:00
|
|
|
import Source
|
2016-07-26 22:52:37 +03:00
|
|
|
import qualified Syntax
|
2015-12-09 17:58:15 +03:00
|
|
|
import Foreign
|
2017-02-10 19:02:38 +03:00
|
|
|
import Foreign.C.String (peekCString)
|
2017-03-27 22:48:24 +03:00
|
|
|
import Foreign.Marshal.Array (allocaArray)
|
2017-02-10 19:02:38 +03:00
|
|
|
import Data.Text.Foreign (withCStringLen)
|
2017-01-20 21:03:52 +03:00
|
|
|
import qualified Syntax as S
|
2017-01-19 23:18:02 +03:00
|
|
|
import Term
|
2016-02-28 03:34:10 +03:00
|
|
|
import Text.Parser.TreeSitter hiding (Language(..))
|
|
|
|
import qualified Text.Parser.TreeSitter as TS
|
2016-07-28 01:11:55 +03:00
|
|
|
import SourceSpan
|
2016-08-23 00:56:48 +03:00
|
|
|
import Info
|
2015-12-09 17:58:15 +03:00
|
|
|
|
2016-02-11 02:07:27 +03:00
|
|
|
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
|
2016-10-06 00:27:45 +03:00
|
|
|
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
|
2016-07-28 01:11:55 +03:00
|
|
|
treeSitterParser language grammar blob = do
|
2016-10-06 21:07:40 +03:00
|
|
|
document <- ts_document_new
|
2016-02-11 02:07:27 +03:00
|
|
|
ts_document_set_language document grammar
|
2017-02-10 19:02:38 +03:00
|
|
|
withCStringLen (toText (source blob)) $ \ (source, len) -> do
|
2017-02-10 17:23:51 +03:00
|
|
|
ts_document_set_input_string_with_length document source len
|
|
|
|
ts_document_parse document
|
|
|
|
term <- documentToTerm language document blob
|
|
|
|
ts_document_free document
|
|
|
|
pure term
|
|
|
|
|
2016-02-10 22:30:32 +03:00
|
|
|
|
2016-06-03 06:06:09 +03:00
|
|
|
-- | Return a parser for a tree sitter language & document.
|
2016-10-06 00:27:45 +03:00
|
|
|
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
|
2017-03-27 22:54:34 +03:00
|
|
|
documentToTerm language document SourceBlob{..} = do
|
|
|
|
root <- alloca (\ rootPtr -> do
|
|
|
|
ts_document_root_node_p document rootPtr
|
|
|
|
peek rootPtr)
|
2017-03-27 22:59:48 +03:00
|
|
|
toTerm root source
|
|
|
|
where toTerm :: Node -> Source -> IO (Term (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan]))
|
|
|
|
toTerm node source = do
|
2017-03-27 22:48:24 +03:00
|
|
|
name <- peekCString (nodeType node)
|
2017-03-27 22:59:48 +03:00
|
|
|
let range = nodeRange node
|
2017-02-13 19:41:57 +03:00
|
|
|
|
2017-03-27 22:54:34 +03:00
|
|
|
let childToTerm childNode =
|
|
|
|
let childRange = nodeRange childNode in
|
2017-03-27 22:59:48 +03:00
|
|
|
toTerm childNode (slice (offsetRange childRange (negate (start range))) source)
|
2017-03-27 22:54:34 +03:00
|
|
|
let getChildren count copy = do
|
|
|
|
nodes <- allocaArray count $ \ childNodesPtr -> do
|
|
|
|
_ <- with node (\ nodePtr -> copy document nodePtr childNodesPtr (fromIntegral count))
|
|
|
|
peekArray count childNodesPtr
|
2017-03-27 23:02:25 +03:00
|
|
|
children <- traverse childToTerm nodes
|
|
|
|
return $! filter isNonEmpty children
|
2017-02-13 19:41:57 +03:00
|
|
|
|
2017-03-27 23:03:35 +03:00
|
|
|
children <- getChildren (fromIntegral (nodeNamedChildCount node)) ts_node_copy_named_child_nodes
|
|
|
|
let allChildren = getChildren (fromIntegral (nodeChildCount node)) ts_node_copy_child_nodes
|
2016-12-06 22:21:31 +03:00
|
|
|
|
2017-03-27 23:04:43 +03:00
|
|
|
assignTerm language source (range :. categoryForLanguageProductionName language (toS name) :. nodeSpan node :. Nil) children allChildren
|
2017-01-19 20:26:20 +03:00
|
|
|
|
2017-02-13 19:41:10 +03:00
|
|
|
isNonEmpty :: HasField fields Category => SyntaxTerm Text fields -> Bool
|
|
|
|
isNonEmpty = (/= Empty) . category . extract
|
|
|
|
|
2017-03-27 22:48:24 +03:00
|
|
|
nodeRange :: Node -> Range
|
|
|
|
nodeRange Node{..} = Range (fromIntegral nodeStartByte) (fromIntegral nodeEndByte)
|
|
|
|
|
|
|
|
nodeSpan :: Node -> SourceSpan
|
|
|
|
nodeSpan Node{..} = nodeStartPoint `seq` nodeEndPoint `seq` SourceSpan (pointPos nodeStartPoint) (pointPos nodeEndPoint)
|
|
|
|
where pointPos Point{..} = pointRow `seq` pointColumn `seq` SourcePos (1 + fromIntegral pointRow) (1 + fromIntegral pointColumn)
|
2017-02-10 20:24:10 +03:00
|
|
|
|
2017-02-10 19:21:24 +03:00
|
|
|
assignTerm :: Language -> Source -> Record '[Range, Category, SourceSpan] -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO (SyntaxTerm Text '[ Range, Category, SourceSpan ])
|
2017-01-20 23:03:40 +03:00
|
|
|
assignTerm language source annotation children allChildren =
|
2017-01-24 00:04:31 +03:00
|
|
|
cofree . (annotation :<) <$> case assignTermByLanguage language source (category annotation) children of
|
2017-01-20 22:54:03 +03:00
|
|
|
Just a -> pure a
|
2017-01-20 22:55:43 +03:00
|
|
|
_ -> defaultTermAssignment source (category annotation) children allChildren
|
2017-02-10 19:21:24 +03:00
|
|
|
where assignTermByLanguage :: Language -> Source -> Category -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> Maybe (S.Syntax Text (SyntaxTerm Text '[ Range, Category, SourceSpan ]))
|
2017-01-20 21:37:20 +03:00
|
|
|
assignTermByLanguage = \case
|
2017-01-20 22:09:44 +03:00
|
|
|
JavaScript -> JS.termAssignment
|
2017-01-20 22:12:56 +03:00
|
|
|
C -> C.termAssignment
|
2017-01-24 00:01:42 +03:00
|
|
|
Language.Go -> Go.termAssignment
|
2017-01-20 22:15:09 +03:00
|
|
|
Ruby -> Ruby.termAssignment
|
2017-01-20 23:03:40 +03:00
|
|
|
_ -> \ _ _ _ -> Nothing
|
2017-01-19 23:18:02 +03:00
|
|
|
|
2017-02-10 19:21:24 +03:00
|
|
|
defaultTermAssignment :: Source -> Category -> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan]))
|
2017-01-20 22:59:49 +03:00
|
|
|
defaultTermAssignment source category children allChildren
|
|
|
|
| category `elem` operatorCategories = S.Operator <$> allChildren
|
|
|
|
| otherwise = pure $! case (category, children) of
|
2017-01-25 11:09:53 +03:00
|
|
|
(ParseError, children) -> S.ParseError children
|
2017-01-20 23:20:36 +03:00
|
|
|
|
2017-01-20 22:59:49 +03:00
|
|
|
(Comment, _) -> S.Comment (toText source)
|
2017-01-21 00:49:45 +03:00
|
|
|
|
2017-01-23 22:07:47 +03:00
|
|
|
(Pair, [key, value]) -> S.Pair key value
|
|
|
|
|
2017-01-21 00:49:45 +03:00
|
|
|
-- Control flow statements
|
2017-01-20 23:20:36 +03:00
|
|
|
(If, condition : body) -> S.If condition body
|
2017-01-23 23:06:05 +03:00
|
|
|
(Switch, _) -> uncurry S.Switch (Prologue.break ((== Case) . Info.category . extract) children)
|
2017-01-23 23:12:38 +03:00
|
|
|
(Case, expr : body) -> S.Case expr body
|
|
|
|
(While, expr : rest) -> S.While expr rest
|
2017-01-21 00:49:45 +03:00
|
|
|
|
|
|
|
-- Statements
|
2017-01-21 00:39:27 +03:00
|
|
|
(Return, _) -> S.Return children
|
2017-01-25 10:58:04 +03:00
|
|
|
(Yield, _) -> S.Yield children
|
2017-01-25 11:02:03 +03:00
|
|
|
(Throw, [expr]) -> S.Throw expr
|
2017-01-23 22:09:07 +03:00
|
|
|
(Break, [label]) -> S.Break (Just label)
|
|
|
|
(Break, []) -> S.Break Nothing
|
|
|
|
(Continue, [label]) -> S.Continue (Just label)
|
|
|
|
(Continue, []) -> S.Continue Nothing
|
2017-01-20 23:20:36 +03:00
|
|
|
|
2017-01-20 22:59:49 +03:00
|
|
|
(_, []) -> S.Leaf (toText source)
|
|
|
|
(_, children) -> S.Indexed children
|
|
|
|
where operatorCategories =
|
|
|
|
[ Operator
|
|
|
|
, Binary
|
|
|
|
, Unary
|
|
|
|
, RangeExpression
|
|
|
|
, ScopeOperator
|
|
|
|
, BooleanOperator
|
|
|
|
, MathOperator
|
|
|
|
, RelationalOperator
|
|
|
|
, BitwiseOperator
|
|
|
|
]
|
|
|
|
|
2017-01-20 21:03:52 +03:00
|
|
|
|
2017-01-19 20:26:20 +03:00
|
|
|
categoryForLanguageProductionName :: Language -> Text -> Category
|
2017-01-19 23:09:22 +03:00
|
|
|
categoryForLanguageProductionName = withDefaults . \case
|
2017-01-19 20:26:20 +03:00
|
|
|
JavaScript -> JS.categoryForJavaScriptProductionName
|
|
|
|
C -> C.categoryForCProductionName
|
|
|
|
Ruby -> Ruby.categoryForRubyName
|
|
|
|
Language.Go -> Go.categoryForGoName
|
|
|
|
_ -> Other
|
2017-01-19 23:09:22 +03:00
|
|
|
where withDefaults productionMap = \case
|
2017-01-25 11:08:06 +03:00
|
|
|
"ERROR" -> ParseError
|
2017-01-19 23:09:22 +03:00
|
|
|
s -> productionMap s
|