2017-06-07 22:19:32 +03:00
|
|
|
{-# LANGUAGE DataKinds, ScopedTypeVariables, TypeOperators #-}
|
2017-01-20 21:04:36 +03:00
|
|
|
module TreeSitter
|
|
|
|
( treeSitterParser
|
2017-05-19 20:01:28 +03:00
|
|
|
, parseToAST
|
2017-01-20 21:04:36 +03:00
|
|
|
) where
|
2015-12-09 17:58:15 +03:00
|
|
|
|
2016-02-09 22:34:23 +03:00
|
|
|
import Category
|
2017-07-28 21:37:02 +03:00
|
|
|
import Control.Comonad (extract)
|
2017-08-11 16:25:19 +03:00
|
|
|
import Control.Comonad.Cofree (unwrap)
|
2017-07-28 21:37:02 +03:00
|
|
|
import Control.Exception
|
|
|
|
import Control.Monad ((<=<))
|
2017-07-23 21:56:44 +03:00
|
|
|
import Data.Blob
|
2017-06-24 17:55:07 +03:00
|
|
|
import Data.ByteString.Unsafe (unsafeUseAsCStringLen)
|
2017-08-11 16:25:19 +03:00
|
|
|
import Data.Foldable (toList)
|
2017-04-12 22:03:35 +03:00
|
|
|
import Data.Functor.Foldable hiding (Nil)
|
2017-06-24 16:46:28 +03:00
|
|
|
import Data.Range
|
2016-08-16 00:25:56 +03:00
|
|
|
import Data.Record
|
2017-06-24 16:59:41 +03:00
|
|
|
import Data.Source
|
2017-06-24 16:41:51 +03:00
|
|
|
import Data.Span
|
2017-04-12 21:31:55 +03:00
|
|
|
import qualified Data.Syntax.Assignment as A
|
2017-07-28 20:57:58 +03:00
|
|
|
import Data.Text (Text, pack)
|
2016-02-11 01:30:14 +03:00
|
|
|
import Language
|
2016-10-17 22:51:53 +03:00
|
|
|
import qualified Language.Go as Go
|
2017-03-16 00:32:31 +03:00
|
|
|
import qualified Language.TypeScript as TS
|
2016-10-19 18:22:10 +03:00
|
|
|
import qualified Language.Ruby as Ruby
|
2015-12-09 17:58:15 +03:00
|
|
|
import Foreign
|
2017-02-10 19:02:38 +03:00
|
|
|
import Foreign.C.String (peekCString)
|
2017-03-27 22:48:24 +03:00
|
|
|
import Foreign.Marshal.Array (allocaArray)
|
2017-01-20 21:03:52 +03:00
|
|
|
import qualified Syntax as S
|
2017-01-19 23:18:02 +03:00
|
|
|
import Term
|
2017-08-11 15:43:53 +03:00
|
|
|
import qualified TreeSitter.Document as TS
|
|
|
|
import qualified TreeSitter.Node as TS
|
|
|
|
import qualified TreeSitter.Language as TS
|
2017-08-11 15:02:30 +03:00
|
|
|
import qualified TreeSitter.Go as TS
|
|
|
|
import qualified TreeSitter.Ruby as TS
|
|
|
|
import qualified TreeSitter.TypeScript as TS
|
2016-08-23 00:56:48 +03:00
|
|
|
import Info
|
2015-12-09 17:58:15 +03:00
|
|
|
|
2016-02-11 02:07:27 +03:00
|
|
|
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
|
2017-07-23 22:56:08 +03:00
|
|
|
treeSitterParser :: Ptr TS.Language -> Blob -> IO (SyntaxTerm DefaultFields)
|
2017-08-11 15:43:53 +03:00
|
|
|
treeSitterParser language blob = bracket TS.ts_document_new TS.ts_document_free $ \ document -> do
|
|
|
|
TS.ts_document_set_language document language
|
2017-07-23 21:56:44 +03:00
|
|
|
unsafeUseAsCStringLen (sourceBytes (blobSource blob)) $ \ (sourceBytes, len) -> do
|
2017-08-11 15:43:53 +03:00
|
|
|
TS.ts_document_set_input_string_with_length document sourceBytes len
|
|
|
|
TS.ts_document_parse_halt_on_error document
|
2017-07-23 21:56:44 +03:00
|
|
|
term <- documentToTerm language document blob
|
2017-02-10 17:23:51 +03:00
|
|
|
pure term
|
|
|
|
|
2016-02-10 22:30:32 +03:00
|
|
|
|
2017-05-19 20:06:00 +03:00
|
|
|
-- | Parse 'Source' with the given 'TS.Language' and return its AST.
|
2017-07-23 22:06:21 +03:00
|
|
|
parseToAST :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Blob -> IO (A.AST grammar)
|
2017-08-11 15:43:53 +03:00
|
|
|
parseToAST language Blob{..} = bracket TS.ts_document_new TS.ts_document_free $ \ document -> do
|
|
|
|
TS.ts_document_set_language document language
|
2017-07-23 22:06:21 +03:00
|
|
|
root <- unsafeUseAsCStringLen (sourceBytes blobSource) $ \ (source, len) -> do
|
2017-08-11 15:43:53 +03:00
|
|
|
TS.ts_document_set_input_string_with_length document source len
|
|
|
|
TS.ts_document_parse_halt_on_error document
|
2017-04-12 21:31:55 +03:00
|
|
|
alloca (\ rootPtr -> do
|
2017-08-11 15:43:53 +03:00
|
|
|
TS.ts_document_root_node_p document rootPtr
|
2017-04-12 21:31:55 +03:00
|
|
|
peek rootPtr)
|
|
|
|
|
2017-05-19 20:02:37 +03:00
|
|
|
anaM toAST root
|
2017-05-02 21:55:05 +03:00
|
|
|
|
2017-08-11 15:52:25 +03:00
|
|
|
toAST :: forall grammar . (Bounded grammar, Enum grammar) => TS.Node -> IO (Base (A.AST grammar) TS.Node)
|
2017-08-11 15:43:53 +03:00
|
|
|
toAST node@TS.Node{..} = do
|
2017-05-15 22:34:53 +03:00
|
|
|
let count = fromIntegral nodeChildCount
|
|
|
|
children <- allocaArray count $ \ childNodesPtr -> do
|
2017-08-11 15:43:53 +03:00
|
|
|
_ <- with nodeTSNode (\ nodePtr -> TS.ts_node_copy_child_nodes nullPtr nodePtr childNodesPtr (fromIntegral count))
|
2017-05-15 22:34:53 +03:00
|
|
|
peekArray count childNodesPtr
|
2017-07-14 21:43:36 +03:00
|
|
|
pure $! A.Node (toEnum (min (fromIntegral nodeSymbol) (fromEnum (maxBound :: grammar)))) (nodeRange node) (nodeSpan node) :< children
|
2017-05-15 22:34:53 +03:00
|
|
|
|
2017-05-15 22:34:45 +03:00
|
|
|
anaM :: (Corecursive t, Monad m, Traversable (Base t)) => (a -> m (Base t a)) -> a -> m t
|
|
|
|
anaM g = a where a = pure . embed <=< traverse a <=< g
|
|
|
|
|
|
|
|
|
2016-06-03 06:06:09 +03:00
|
|
|
-- | Return a parser for a tree sitter language & document.
|
2017-08-11 15:52:25 +03:00
|
|
|
documentToTerm :: Ptr TS.Language -> Ptr TS.Document -> Blob -> IO (SyntaxTerm DefaultFields)
|
2017-07-23 21:59:34 +03:00
|
|
|
documentToTerm language document Blob{..} = do
|
2017-03-27 22:54:34 +03:00
|
|
|
root <- alloca (\ rootPtr -> do
|
2017-08-11 15:43:53 +03:00
|
|
|
TS.ts_document_root_node_p document rootPtr
|
2017-03-27 22:54:34 +03:00
|
|
|
peek rootPtr)
|
2017-07-23 21:59:34 +03:00
|
|
|
toTerm root
|
2017-08-11 15:52:25 +03:00
|
|
|
where toTerm :: TS.Node -> IO (SyntaxTerm DefaultFields)
|
2017-08-11 15:43:53 +03:00
|
|
|
toTerm node@TS.Node{..} = do
|
|
|
|
name <- peekCString nodeType
|
2017-02-13 19:41:57 +03:00
|
|
|
|
2017-08-11 15:43:53 +03:00
|
|
|
children <- getChildren (fromIntegral nodeNamedChildCount) copyNamed
|
|
|
|
let allChildren = getChildren (fromIntegral nodeChildCount) copyAll
|
2017-02-13 19:41:57 +03:00
|
|
|
|
2017-07-23 22:08:53 +03:00
|
|
|
let source = slice (nodeRange node) blobSource
|
2017-07-28 20:57:58 +03:00
|
|
|
assignTerm language source (range :. categoryForLanguageProductionName language (pack name) :. nodeSpan node :. Nil) children allChildren
|
2017-03-27 23:17:10 +03:00
|
|
|
where getChildren count copy = do
|
|
|
|
nodes <- allocaArray count $ \ childNodesPtr -> do
|
2017-08-11 15:43:53 +03:00
|
|
|
_ <- with nodeTSNode (\ nodePtr -> copy nodePtr childNodesPtr (fromIntegral count))
|
2017-03-27 23:17:10 +03:00
|
|
|
peekArray count childNodesPtr
|
2017-07-23 21:59:34 +03:00
|
|
|
children <- traverse toTerm nodes
|
2017-03-27 23:17:10 +03:00
|
|
|
return $! filter isNonEmpty children
|
|
|
|
range = nodeRange node
|
2017-08-11 15:43:53 +03:00
|
|
|
copyNamed = TS.ts_node_copy_named_child_nodes document
|
|
|
|
copyAll = TS.ts_node_copy_child_nodes document
|
2015-12-09 17:58:15 +03:00
|
|
|
|
2017-07-23 22:56:08 +03:00
|
|
|
isNonEmpty :: HasField fields Category => SyntaxTerm fields -> Bool
|
2017-02-13 19:41:10 +03:00
|
|
|
isNonEmpty = (/= Empty) . category . extract
|
|
|
|
|
2017-08-11 15:43:53 +03:00
|
|
|
nodeRange :: TS.Node -> Range
|
|
|
|
nodeRange TS.Node{..} = Range (fromIntegral nodeStartByte) (fromIntegral nodeEndByte)
|
2017-03-27 22:48:24 +03:00
|
|
|
|
2017-08-11 15:43:53 +03:00
|
|
|
nodeSpan :: TS.Node -> Span
|
|
|
|
nodeSpan TS.Node{..} = nodeStartPoint `seq` nodeEndPoint `seq` Span (pointPos nodeStartPoint) (pointPos nodeEndPoint)
|
|
|
|
where pointPos TS.TSPoint{..} = pointRow `seq` pointColumn `seq` Pos (1 + fromIntegral pointRow) (1 + fromIntegral pointColumn)
|
2017-02-10 20:24:10 +03:00
|
|
|
|
2017-07-23 22:56:08 +03:00
|
|
|
assignTerm :: Ptr TS.Language -> Source -> Record DefaultFields -> [ SyntaxTerm DefaultFields ] -> IO [ SyntaxTerm DefaultFields ] -> IO (SyntaxTerm DefaultFields)
|
2017-01-20 23:03:40 +03:00
|
|
|
assignTerm language source annotation children allChildren =
|
2017-08-11 16:18:08 +03:00
|
|
|
case assignTermByLanguage source (category annotation) children of
|
|
|
|
Just a -> pure (cofree (annotation :< a))
|
|
|
|
_ -> defaultTermAssignment source annotation children allChildren
|
2017-07-23 22:56:08 +03:00
|
|
|
where assignTermByLanguage :: Source -> Category -> [ SyntaxTerm DefaultFields ] -> Maybe (S.Syntax (SyntaxTerm DefaultFields))
|
2017-07-23 21:56:44 +03:00
|
|
|
assignTermByLanguage = case languageForTSLanguage language of
|
|
|
|
Just Language.Go -> Go.termAssignment
|
|
|
|
Just Ruby -> Ruby.termAssignment
|
|
|
|
Just TypeScript -> TS.termAssignment
|
2017-01-20 23:03:40 +03:00
|
|
|
_ -> \ _ _ _ -> Nothing
|
2017-01-19 23:18:02 +03:00
|
|
|
|
2017-08-11 16:18:08 +03:00
|
|
|
defaultTermAssignment :: Source -> Record DefaultFields -> [ SyntaxTerm DefaultFields ] -> IO [ SyntaxTerm DefaultFields ] -> IO (SyntaxTerm DefaultFields)
|
|
|
|
defaultTermAssignment source annotation children allChildren
|
|
|
|
| category annotation `elem` operatorCategories = cofree . (annotation :<) . S.Operator <$> allChildren
|
2017-08-11 16:29:01 +03:00
|
|
|
| otherwise = case (category annotation, children) of
|
2017-08-11 16:20:12 +03:00
|
|
|
(ParseError, children) -> toTerm $ S.ParseError children
|
2017-01-20 23:20:36 +03:00
|
|
|
|
2017-08-11 16:20:12 +03:00
|
|
|
(Comment, _) -> toTerm $ S.Comment (toText source)
|
2017-01-21 00:49:45 +03:00
|
|
|
|
2017-08-11 16:20:12 +03:00
|
|
|
(Pair, [key, value]) -> toTerm $ S.Pair key value
|
2017-01-23 22:07:47 +03:00
|
|
|
|
2017-01-21 00:49:45 +03:00
|
|
|
-- Control flow statements
|
2017-08-11 16:20:12 +03:00
|
|
|
(If, condition : body) -> toTerm $ S.If condition body
|
2017-08-11 16:25:19 +03:00
|
|
|
(Switch, _) -> let (subject, body) = break ((== Other "switch_body") . Info.category . extract) children in toTerm $ S.Switch subject (body >>= toList . unwrap)
|
2017-08-11 16:20:12 +03:00
|
|
|
(Case, expr : body) -> toTerm $ S.Case expr body
|
|
|
|
(While, expr : rest) -> toTerm $ S.While expr rest
|
2017-01-21 00:49:45 +03:00
|
|
|
|
|
|
|
-- Statements
|
2017-08-11 16:20:12 +03:00
|
|
|
(Return, _) -> toTerm $ S.Return children
|
|
|
|
(Yield, _) -> toTerm $ S.Yield children
|
|
|
|
(Throw, [expr]) -> toTerm $ S.Throw expr
|
|
|
|
(Break, [label]) -> toTerm $ S.Break (Just label)
|
|
|
|
(Break, []) -> toTerm $ S.Break Nothing
|
|
|
|
(Continue, [label]) -> toTerm $ S.Continue (Just label)
|
|
|
|
(Continue, []) -> toTerm $ S.Continue Nothing
|
|
|
|
|
2017-08-11 16:29:01 +03:00
|
|
|
(ParenthesizedExpression, [child]) -> pure child
|
2017-08-11 16:20:12 +03:00
|
|
|
|
2017-08-11 16:40:18 +03:00
|
|
|
(Other "unary_expression", _) -> do
|
|
|
|
cs <- allChildren
|
|
|
|
let c = case category . extract <$> cs of
|
2017-08-11 16:53:08 +03:00
|
|
|
[Other s, _]
|
|
|
|
| s `elem` ["-", "+", "++", "--"] -> MathOperator
|
|
|
|
| s == "~" -> BitwiseOperator
|
|
|
|
| s == "!" -> BooleanOperator
|
|
|
|
[_, Other t]
|
|
|
|
| t `elem` ["--", "++"] -> MathOperator
|
2017-08-11 16:40:18 +03:00
|
|
|
_ -> Operator
|
|
|
|
pure (cofree ((setCategory annotation c) :< S.Operator cs))
|
|
|
|
|
|
|
|
(Other "binary_expression", _) -> do
|
|
|
|
cs <- allChildren
|
|
|
|
let c = case category . extract <$> cs of
|
|
|
|
[_, Other s, _]
|
2017-08-11 16:53:08 +03:00
|
|
|
| s `elem` ["<=", "<", ">=", ">", "==", "===", "!=", "!=="] -> RelationalOperator
|
2017-08-11 16:40:18 +03:00
|
|
|
| s `elem` ["*", "+", "-", "/", "%"] -> MathOperator
|
2017-08-11 16:46:08 +03:00
|
|
|
| s `elem` ["&&", "||"] -> BooleanOperator
|
2017-08-11 16:53:08 +03:00
|
|
|
| s `elem` [">>", ">>=", ">>>", ">>>=", "<<", "<<=", "&", "^", "|"] -> BitwiseOperator
|
2017-08-11 16:40:18 +03:00
|
|
|
_ -> Operator
|
|
|
|
pure (cofree ((setCategory annotation c) :< S.Operator cs))
|
|
|
|
|
2017-08-11 16:20:12 +03:00
|
|
|
(_, []) -> toTerm $ S.Leaf (toText source)
|
|
|
|
(_, children) -> toTerm $ S.Indexed children
|
2017-01-20 22:59:49 +03:00
|
|
|
where operatorCategories =
|
|
|
|
[ Operator
|
|
|
|
, Binary
|
|
|
|
, Unary
|
|
|
|
, RangeExpression
|
|
|
|
, ScopeOperator
|
|
|
|
, BooleanOperator
|
|
|
|
, MathOperator
|
|
|
|
, RelationalOperator
|
|
|
|
, BitwiseOperator
|
|
|
|
]
|
2017-08-11 16:29:01 +03:00
|
|
|
toTerm = pure . cofree . (annotation :<)
|
2017-01-20 22:59:49 +03:00
|
|
|
|
2017-01-20 21:03:52 +03:00
|
|
|
|
2017-07-23 21:56:44 +03:00
|
|
|
categoryForLanguageProductionName :: Ptr TS.Language -> Text -> Category
|
2017-03-28 22:32:45 +03:00
|
|
|
categoryForLanguageProductionName = withDefaults . byLanguage
|
2017-03-29 17:17:53 +03:00
|
|
|
where
|
|
|
|
withDefaults productionMap name = case name of
|
|
|
|
"ERROR" -> ParseError
|
|
|
|
s -> productionMap s
|
2017-03-31 22:22:26 +03:00
|
|
|
|
2017-07-23 21:56:44 +03:00
|
|
|
byLanguage language = case languageForTSLanguage language of
|
|
|
|
Just Ruby -> Ruby.categoryForRubyName
|
|
|
|
Just Language.Go -> Go.categoryForGoName
|
|
|
|
Just TypeScript -> TS.categoryForTypeScriptName
|
2017-03-29 17:17:53 +03:00
|
|
|
_ -> Other
|
2017-07-23 21:56:18 +03:00
|
|
|
|
|
|
|
|
|
|
|
languageForTSLanguage :: Ptr TS.Language -> Maybe Language
|
2017-07-23 22:03:02 +03:00
|
|
|
languageForTSLanguage = flip lookup
|
2017-07-28 00:14:23 +03:00
|
|
|
[ (TS.tree_sitter_go, Language.Go)
|
2017-07-23 22:03:02 +03:00
|
|
|
, (TS.tree_sitter_ruby, Ruby)
|
|
|
|
, (TS.tree_sitter_typescript, TypeScript)
|
|
|
|
]
|