1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 22:31:36 +03:00
semantic/src/TreeSitter.hs

137 lines
6.1 KiB
Haskell
Raw Normal View History

2016-07-14 23:35:20 +03:00
{-# LANGUAGE DataKinds #-}
2017-01-20 21:04:36 +03:00
module TreeSitter
( treeSitterParser
, defaultTermAssignment
) where
import Prologue hiding (Constructor)
2016-02-09 22:34:23 +03:00
import Category
2016-08-16 00:25:56 +03:00
import Data.Record
2016-02-11 01:30:14 +03:00
import Language
import qualified Language.C as C
import qualified Language.Go as Go
import qualified Language.JavaScript as JS
2016-10-19 18:22:10 +03:00
import qualified Language.Ruby as Ruby
import Parser
2016-08-02 19:08:26 +03:00
import Range
2015-12-24 08:20:47 +03:00
import Source
import qualified Syntax
import Foreign
2017-02-10 19:02:38 +03:00
import Foreign.C.String (peekCString)
import Data.Text.Foreign (withCStringLen)
2017-01-20 21:03:52 +03:00
import qualified Syntax as S
import Term
import Text.Parser.TreeSitter hiding (Language(..))
import qualified Text.Parser.TreeSitter as TS
2016-07-28 01:11:55 +03:00
import SourceSpan
2016-08-23 00:56:48 +03:00
import Info
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
2016-10-06 00:27:45 +03:00
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
2016-07-28 01:11:55 +03:00
treeSitterParser language grammar blob = do
document <- ts_document_new
2016-02-11 02:07:27 +03:00
ts_document_set_language document grammar
2017-02-10 19:02:38 +03:00
withCStringLen (toText (source blob)) $ \ (source, len) -> do
ts_document_set_input_string_with_length document source len
ts_document_parse document
term <- documentToTerm language document blob
ts_document_free document
pure term
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
2016-10-06 00:27:45 +03:00
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
documentToTerm language document SourceBlob{..} = alloca $ \ root -> do
ts_document_root_node_p document root
2017-02-10 20:26:02 +03:00
toTerm root source
where toTerm node source = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
count <- ts_node_p_named_child_count node
let range = nodeRange node
2017-02-10 20:26:02 +03:00
children <- filter isNonEmpty <$> traverse (alloca . getChild (start range) node) (take (fromIntegral count) [0..])
2016-10-07 01:31:06 +03:00
let startPos = SourcePos (1 + (fromIntegral $! ts_node_p_start_point_row node)) (1 + (fromIntegral $! ts_node_p_start_point_column node))
let endPos = SourcePos (1 + (fromIntegral $! ts_node_p_end_point_row node)) (1 + (fromIntegral $! ts_node_p_end_point_column node))
2016-10-11 22:09:24 +03:00
let sourceSpan = SourceSpan { spanStart = startPos , spanEnd = endPos }
allChildrenCount <- ts_node_p_child_count node
2017-02-10 20:26:02 +03:00
let allChildren = filter isNonEmpty <$> traverse (alloca . getUnnamedChild (start range) node) (take (fromIntegral allChildrenCount) [0..])
2016-09-10 00:23:19 +03:00
-- Note: The strict application here is semantically important.
-- Without it, we may not evaluate the value until after weve exited
2016-09-10 00:23:19 +03:00
-- the scope that `node` was allocated within, meaning `alloca` will
-- free it & other stack data may overwrite it.
2017-02-10 20:26:02 +03:00
range `seq` sourceSpan `seq` assignTerm language source (range :. categoryForLanguageProductionName language (toS name) :. sourceSpan :. Nil) children allChildren
getChild start node n out = ts_node_p_named_child node n out >> toTerm out (slice (offsetRange (nodeRange node) (negate start)) source)
{-# INLINE getChild #-}
2017-02-10 20:26:02 +03:00
getUnnamedChild start node n out = ts_node_p_child node n out >> toTerm out (slice (offsetRange (nodeRange node) (negate start)) source)
{-# INLINE getUnnamedChild #-}
isNonEmpty child = category (extract child) /= Empty
nodeRange node = Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
assignTerm :: Language -> Source -> Record '[Range, Category, SourceSpan] -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO (SyntaxTerm Text '[ Range, Category, SourceSpan ])
2017-01-20 23:03:40 +03:00
assignTerm language source annotation children allChildren =
cofree . (annotation :<) <$> case assignTermByLanguage language source (category annotation) children of
2017-01-20 22:54:03 +03:00
Just a -> pure a
_ -> defaultTermAssignment source (category annotation) children allChildren
where assignTermByLanguage :: Language -> Source -> Category -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> Maybe (S.Syntax Text (SyntaxTerm Text '[ Range, Category, SourceSpan ]))
assignTermByLanguage = \case
JavaScript -> JS.termAssignment
2017-01-20 22:12:56 +03:00
C -> C.termAssignment
2017-01-24 00:01:42 +03:00
Language.Go -> Go.termAssignment
2017-01-20 22:15:09 +03:00
Ruby -> Ruby.termAssignment
2017-01-20 23:03:40 +03:00
_ -> \ _ _ _ -> Nothing
defaultTermAssignment :: Source -> Category -> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan]))
defaultTermAssignment source category children allChildren
| category `elem` operatorCategories = S.Operator <$> allChildren
| otherwise = pure $! case (category, children) of
2017-01-25 11:09:53 +03:00
(ParseError, children) -> S.ParseError children
(Comment, _) -> S.Comment (toText source)
2017-01-21 00:49:45 +03:00
(Pair, [key, value]) -> S.Pair key value
2017-01-21 00:49:45 +03:00
-- Control flow statements
(If, condition : body) -> S.If condition body
(Switch, _) -> uncurry S.Switch (Prologue.break ((== Case) . Info.category . extract) children)
2017-01-23 23:12:38 +03:00
(Case, expr : body) -> S.Case expr body
(While, expr : rest) -> S.While expr rest
2017-01-21 00:49:45 +03:00
-- Statements
(Return, _) -> S.Return children
(Yield, _) -> S.Yield children
(Throw, [expr]) -> S.Throw expr
(Break, [label]) -> S.Break (Just label)
(Break, []) -> S.Break Nothing
(Continue, [label]) -> S.Continue (Just label)
(Continue, []) -> S.Continue Nothing
(_, []) -> S.Leaf (toText source)
(_, children) -> S.Indexed children
where operatorCategories =
[ Operator
, Binary
, Unary
, RangeExpression
, ScopeOperator
, BooleanOperator
, MathOperator
, RelationalOperator
, BitwiseOperator
]
2017-01-20 21:03:52 +03:00
categoryForLanguageProductionName :: Language -> Text -> Category
categoryForLanguageProductionName = withDefaults . \case
JavaScript -> JS.categoryForJavaScriptProductionName
C -> C.categoryForCProductionName
Ruby -> Ruby.categoryForRubyName
Language.Go -> Go.categoryForGoName
_ -> Other
where withDefaults productionMap = \case
2017-01-25 11:08:06 +03:00
"ERROR" -> ParseError
s -> productionMap s