1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 06:11:49 +03:00
semantic/src/TreeSitter.hs

183 lines
7.5 KiB
Haskell
Raw Normal View History

2017-06-07 22:19:32 +03:00
{-# LANGUAGE DataKinds, ScopedTypeVariables, TypeOperators #-}
2017-01-20 21:04:36 +03:00
module TreeSitter
( treeSitterParser
, parseToAST
2017-01-20 21:04:36 +03:00
, defaultTermAssignment
) where
import Prologue hiding (Constructor)
2016-02-09 22:34:23 +03:00
import Category
import Data.Blob
import Data.ByteString.Unsafe (unsafeUseAsCStringLen)
import Data.Functor.Foldable hiding (Nil)
2017-06-24 16:46:28 +03:00
import Data.Range
2016-08-16 00:25:56 +03:00
import Data.Record
import Data.Source
import Data.Span
import qualified Data.Syntax.Assignment as A
2016-02-11 01:30:14 +03:00
import Language
import qualified Language.C as C
import qualified Language.Go as Go
import qualified Language.TypeScript as TS
2016-10-19 18:22:10 +03:00
import qualified Language.Ruby as Ruby
import Foreign
2017-02-10 19:02:38 +03:00
import Foreign.C.String (peekCString)
2017-03-27 22:48:24 +03:00
import Foreign.Marshal.Array (allocaArray)
2017-01-20 21:03:52 +03:00
import qualified Syntax as S
import Term
import Text.Parser.TreeSitter hiding (Language(..))
import qualified Text.Parser.TreeSitter as TS
import qualified Text.Parser.TreeSitter.C as TS
import qualified Text.Parser.TreeSitter.Go as TS
import qualified Text.Parser.TreeSitter.Ruby as TS
import qualified Text.Parser.TreeSitter.TypeScript as TS
2016-08-23 00:56:48 +03:00
import Info
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Ptr TS.Language -> Blob -> IO (SyntaxTerm DefaultFields)
treeSitterParser language blob = bracket ts_document_new ts_document_free $ \ document -> do
ts_document_set_language document language
unsafeUseAsCStringLen (sourceBytes (blobSource blob)) $ \ (sourceBytes, len) -> do
2017-06-24 17:54:03 +03:00
ts_document_set_input_string_with_length document sourceBytes len
ts_document_parse_halt_on_error document
term <- documentToTerm language document blob
pure term
-- | Parse 'Source' with the given 'TS.Language' and return its AST.
2017-07-23 22:06:21 +03:00
parseToAST :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Blob -> IO (A.AST grammar)
parseToAST language Blob{..} = bracket ts_document_new ts_document_free $ \ document -> do
ts_document_set_language document language
2017-07-23 22:06:21 +03:00
root <- unsafeUseAsCStringLen (sourceBytes blobSource) $ \ (source, len) -> do
ts_document_set_input_string_with_length document source len
ts_document_parse_halt_on_error document
alloca (\ rootPtr -> do
ts_document_root_node_p document rootPtr
peek rootPtr)
anaM toAST root
toAST :: forall grammar . (Bounded grammar, Enum grammar) => Node -> IO (Base (A.AST grammar) Node)
2017-05-15 22:34:53 +03:00
toAST node@Node{..} = do
let count = fromIntegral nodeChildCount
children <- allocaArray count $ \ childNodesPtr -> do
_ <- with nodeTSNode (\ nodePtr -> ts_node_copy_child_nodes nullPtr nodePtr childNodesPtr (fromIntegral count))
peekArray count childNodesPtr
pure $! A.Node (toEnum (min (fromIntegral nodeSymbol) (fromEnum (maxBound :: grammar)))) (nodeRange node) (nodeSpan node) :< children
2017-05-15 22:34:53 +03:00
2017-05-15 22:34:45 +03:00
anaM :: (Corecursive t, Monad m, Traversable (Base t)) => (a -> m (Base t a)) -> a -> m t
anaM g = a where a = pure . embed <=< traverse a <=< g
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Ptr TS.Language -> Ptr Document -> Blob -> IO (SyntaxTerm DefaultFields)
documentToTerm language document Blob{..} = do
2017-03-27 22:54:34 +03:00
root <- alloca (\ rootPtr -> do
ts_document_root_node_p document rootPtr
peek rootPtr)
toTerm root
where toTerm :: Node -> IO (SyntaxTerm DefaultFields)
toTerm node = do
2017-03-27 22:48:24 +03:00
name <- peekCString (nodeType node)
children <- getChildren (fromIntegral (nodeNamedChildCount node)) copyNamed
let allChildren = getChildren (fromIntegral (nodeChildCount node)) copyAll
let source = slice (nodeRange node) blobSource
2017-03-27 23:04:43 +03:00
assignTerm language source (range :. categoryForLanguageProductionName language (toS name) :. nodeSpan node :. Nil) children allChildren
where getChildren count copy = do
nodes <- allocaArray count $ \ childNodesPtr -> do
_ <- with (nodeTSNode node) (\ nodePtr -> copy nodePtr childNodesPtr (fromIntegral count))
peekArray count childNodesPtr
children <- traverse toTerm nodes
return $! filter isNonEmpty children
range = nodeRange node
copyNamed = ts_node_copy_named_child_nodes document
copyAll = ts_node_copy_child_nodes document
isNonEmpty :: HasField fields Category => SyntaxTerm fields -> Bool
2017-02-13 19:41:10 +03:00
isNonEmpty = (/= Empty) . category . extract
2017-03-27 22:48:24 +03:00
nodeRange :: Node -> Range
nodeRange Node{..} = Range (fromIntegral nodeStartByte) (fromIntegral nodeEndByte)
nodeSpan :: Node -> Span
nodeSpan Node{..} = nodeStartPoint `seq` nodeEndPoint `seq` Span (pointPos nodeStartPoint) (pointPos nodeEndPoint)
where pointPos TSPoint{..} = pointRow `seq` pointColumn `seq` Pos (1 + fromIntegral pointRow) (1 + fromIntegral pointColumn)
assignTerm :: Ptr TS.Language -> Source -> Record DefaultFields -> [ SyntaxTerm DefaultFields ] -> IO [ SyntaxTerm DefaultFields ] -> IO (SyntaxTerm DefaultFields)
2017-01-20 23:03:40 +03:00
assignTerm language source annotation children allChildren =
cofree . (annotation :<) <$> case assignTermByLanguage source (category annotation) children of
2017-01-20 22:54:03 +03:00
Just a -> pure a
_ -> defaultTermAssignment source (category annotation) children allChildren
where assignTermByLanguage :: Source -> Category -> [ SyntaxTerm DefaultFields ] -> Maybe (S.Syntax (SyntaxTerm DefaultFields))
assignTermByLanguage = case languageForTSLanguage language of
Just C -> C.termAssignment
Just Language.Go -> Go.termAssignment
Just Ruby -> Ruby.termAssignment
Just TypeScript -> TS.termAssignment
2017-01-20 23:03:40 +03:00
_ -> \ _ _ _ -> Nothing
defaultTermAssignment :: Source -> Category -> [ SyntaxTerm DefaultFields ] -> IO [ SyntaxTerm DefaultFields ] -> IO (S.Syntax (SyntaxTerm DefaultFields))
defaultTermAssignment source category children allChildren
| category `elem` operatorCategories = S.Operator <$> allChildren
| otherwise = pure $! case (category, children) of
2017-01-25 11:09:53 +03:00
(ParseError, children) -> S.ParseError children
(Comment, _) -> S.Comment (toText source)
2017-01-21 00:49:45 +03:00
(Pair, [key, value]) -> S.Pair key value
2017-01-21 00:49:45 +03:00
-- Control flow statements
(If, condition : body) -> S.If condition body
(Switch, _) -> uncurry S.Switch (Prologue.break ((== Case) . Info.category . extract) children)
2017-01-23 23:12:38 +03:00
(Case, expr : body) -> S.Case expr body
(While, expr : rest) -> S.While expr rest
2017-01-21 00:49:45 +03:00
-- Statements
(Return, _) -> S.Return children
(Yield, _) -> S.Yield children
(Throw, [expr]) -> S.Throw expr
(Break, [label]) -> S.Break (Just label)
(Break, []) -> S.Break Nothing
(Continue, [label]) -> S.Continue (Just label)
(Continue, []) -> S.Continue Nothing
(_, []) -> S.Leaf (toText source)
(_, children) -> S.Indexed children
where operatorCategories =
[ Operator
, Binary
, Unary
, RangeExpression
, ScopeOperator
, BooleanOperator
, MathOperator
, RelationalOperator
, BitwiseOperator
]
2017-01-20 21:03:52 +03:00
categoryForLanguageProductionName :: Ptr TS.Language -> Text -> Category
2017-03-28 22:32:45 +03:00
categoryForLanguageProductionName = withDefaults . byLanguage
where
withDefaults productionMap name = case name of
"ERROR" -> ParseError
s -> productionMap s
byLanguage language = case languageForTSLanguage language of
Just C -> C.categoryForCProductionName
Just Ruby -> Ruby.categoryForRubyName
Just Language.Go -> Go.categoryForGoName
Just TypeScript -> TS.categoryForTypeScriptName
_ -> Other
languageForTSLanguage :: Ptr TS.Language -> Maybe Language
2017-07-23 22:03:02 +03:00
languageForTSLanguage = flip lookup
[ (TS.tree_sitter_c, C)
, (TS.tree_sitter_go, Language.Go)
, (TS.tree_sitter_ruby, Ruby)
, (TS.tree_sitter_typescript, TypeScript)
]