1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 06:11:49 +03:00
semantic/src/TreeSitter.hs

181 lines
7.4 KiB
Haskell
Raw Normal View History

2017-06-07 22:19:32 +03:00
{-# LANGUAGE DataKinds, ScopedTypeVariables, TypeOperators #-}
2017-01-20 21:04:36 +03:00
module TreeSitter
( treeSitterParser
, parseToAST
2017-01-20 21:04:36 +03:00
, defaultTermAssignment
) where
2016-02-09 22:34:23 +03:00
import Category
2017-07-28 21:37:02 +03:00
import Control.Comonad (extract)
import Control.Exception
import Control.Monad ((<=<))
import Data.Blob
import Data.ByteString.Unsafe (unsafeUseAsCStringLen)
import Data.Functor.Foldable hiding (Nil)
2017-06-24 16:46:28 +03:00
import Data.Range
2016-08-16 00:25:56 +03:00
import Data.Record
import Data.Source
import Data.Span
import qualified Data.Syntax.Assignment as A
2017-07-28 20:57:58 +03:00
import Data.Text (Text, pack)
2016-02-11 01:30:14 +03:00
import Language
import qualified Language.Go as Go
import qualified Language.TypeScript as TS
2016-10-19 18:22:10 +03:00
import qualified Language.Ruby as Ruby
import Foreign
2017-02-10 19:02:38 +03:00
import Foreign.C.String (peekCString)
2017-03-27 22:48:24 +03:00
import Foreign.Marshal.Array (allocaArray)
2017-01-20 21:03:52 +03:00
import qualified Syntax as S
import Term
2017-08-11 15:02:30 +03:00
import TreeSitter hiding (Language(..))
import qualified TreeSitter as TS
import qualified TreeSitter.Go as TS
import qualified TreeSitter.Ruby as TS
import qualified TreeSitter.TypeScript as TS
2016-08-23 00:56:48 +03:00
import Info
2016-02-11 02:07:27 +03:00
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Ptr TS.Language -> Blob -> IO (SyntaxTerm DefaultFields)
treeSitterParser language blob = bracket ts_document_new ts_document_free $ \ document -> do
ts_document_set_language document language
unsafeUseAsCStringLen (sourceBytes (blobSource blob)) $ \ (sourceBytes, len) -> do
2017-06-24 17:54:03 +03:00
ts_document_set_input_string_with_length document sourceBytes len
ts_document_parse_halt_on_error document
term <- documentToTerm language document blob
pure term
-- | Parse 'Source' with the given 'TS.Language' and return its AST.
2017-07-23 22:06:21 +03:00
parseToAST :: (Bounded grammar, Enum grammar) => Ptr TS.Language -> Blob -> IO (A.AST grammar)
parseToAST language Blob{..} = bracket ts_document_new ts_document_free $ \ document -> do
ts_document_set_language document language
2017-07-23 22:06:21 +03:00
root <- unsafeUseAsCStringLen (sourceBytes blobSource) $ \ (source, len) -> do
ts_document_set_input_string_with_length document source len
ts_document_parse_halt_on_error document
alloca (\ rootPtr -> do
ts_document_root_node_p document rootPtr
peek rootPtr)
anaM toAST root
toAST :: forall grammar . (Bounded grammar, Enum grammar) => Node -> IO (Base (A.AST grammar) Node)
2017-05-15 22:34:53 +03:00
toAST node@Node{..} = do
let count = fromIntegral nodeChildCount
children <- allocaArray count $ \ childNodesPtr -> do
_ <- with nodeTSNode (\ nodePtr -> ts_node_copy_child_nodes nullPtr nodePtr childNodesPtr (fromIntegral count))
peekArray count childNodesPtr
pure $! A.Node (toEnum (min (fromIntegral nodeSymbol) (fromEnum (maxBound :: grammar)))) (nodeRange node) (nodeSpan node) :< children
2017-05-15 22:34:53 +03:00
2017-05-15 22:34:45 +03:00
anaM :: (Corecursive t, Monad m, Traversable (Base t)) => (a -> m (Base t a)) -> a -> m t
anaM g = a where a = pure . embed <=< traverse a <=< g
2016-06-03 06:06:09 +03:00
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Ptr TS.Language -> Ptr Document -> Blob -> IO (SyntaxTerm DefaultFields)
documentToTerm language document Blob{..} = do
2017-03-27 22:54:34 +03:00
root <- alloca (\ rootPtr -> do
ts_document_root_node_p document rootPtr
peek rootPtr)
toTerm root
where toTerm :: Node -> IO (SyntaxTerm DefaultFields)
toTerm node = do
2017-03-27 22:48:24 +03:00
name <- peekCString (nodeType node)
children <- getChildren (fromIntegral (nodeNamedChildCount node)) copyNamed
let allChildren = getChildren (fromIntegral (nodeChildCount node)) copyAll
let source = slice (nodeRange node) blobSource
2017-07-28 20:57:58 +03:00
assignTerm language source (range :. categoryForLanguageProductionName language (pack name) :. nodeSpan node :. Nil) children allChildren
where getChildren count copy = do
nodes <- allocaArray count $ \ childNodesPtr -> do
_ <- with (nodeTSNode node) (\ nodePtr -> copy nodePtr childNodesPtr (fromIntegral count))
peekArray count childNodesPtr
children <- traverse toTerm nodes
return $! filter isNonEmpty children
range = nodeRange node
copyNamed = ts_node_copy_named_child_nodes document
copyAll = ts_node_copy_child_nodes document
isNonEmpty :: HasField fields Category => SyntaxTerm fields -> Bool
2017-02-13 19:41:10 +03:00
isNonEmpty = (/= Empty) . category . extract
2017-03-27 22:48:24 +03:00
nodeRange :: Node -> Range
nodeRange Node{..} = Range (fromIntegral nodeStartByte) (fromIntegral nodeEndByte)
nodeSpan :: Node -> Span
nodeSpan Node{..} = nodeStartPoint `seq` nodeEndPoint `seq` Span (pointPos nodeStartPoint) (pointPos nodeEndPoint)
where pointPos TSPoint{..} = pointRow `seq` pointColumn `seq` Pos (1 + fromIntegral pointRow) (1 + fromIntegral pointColumn)
assignTerm :: Ptr TS.Language -> Source -> Record DefaultFields -> [ SyntaxTerm DefaultFields ] -> IO [ SyntaxTerm DefaultFields ] -> IO (SyntaxTerm DefaultFields)
2017-01-20 23:03:40 +03:00
assignTerm language source annotation children allChildren =
cofree . (annotation :<) <$> case assignTermByLanguage source (category annotation) children of
2017-01-20 22:54:03 +03:00
Just a -> pure a
_ -> defaultTermAssignment source (category annotation) children allChildren
where assignTermByLanguage :: Source -> Category -> [ SyntaxTerm DefaultFields ] -> Maybe (S.Syntax (SyntaxTerm DefaultFields))
assignTermByLanguage = case languageForTSLanguage language of
Just Language.Go -> Go.termAssignment
Just Ruby -> Ruby.termAssignment
Just TypeScript -> TS.termAssignment
2017-01-20 23:03:40 +03:00
_ -> \ _ _ _ -> Nothing
defaultTermAssignment :: Source -> Category -> [ SyntaxTerm DefaultFields ] -> IO [ SyntaxTerm DefaultFields ] -> IO (S.Syntax (SyntaxTerm DefaultFields))
defaultTermAssignment source category children allChildren
| category `elem` operatorCategories = S.Operator <$> allChildren
| otherwise = pure $! case (category, children) of
2017-01-25 11:09:53 +03:00
(ParseError, children) -> S.ParseError children
(Comment, _) -> S.Comment (toText source)
2017-01-21 00:49:45 +03:00
(Pair, [key, value]) -> S.Pair key value
2017-01-21 00:49:45 +03:00
-- Control flow statements
(If, condition : body) -> S.If condition body
2017-07-28 21:37:02 +03:00
(Switch, _) -> uncurry S.Switch (break ((== Case) . Info.category . extract) children)
2017-01-23 23:12:38 +03:00
(Case, expr : body) -> S.Case expr body
(While, expr : rest) -> S.While expr rest
2017-01-21 00:49:45 +03:00
-- Statements
(Return, _) -> S.Return children
(Yield, _) -> S.Yield children
(Throw, [expr]) -> S.Throw expr
(Break, [label]) -> S.Break (Just label)
(Break, []) -> S.Break Nothing
(Continue, [label]) -> S.Continue (Just label)
(Continue, []) -> S.Continue Nothing
(_, []) -> S.Leaf (toText source)
(_, children) -> S.Indexed children
where operatorCategories =
[ Operator
, Binary
, Unary
, RangeExpression
, ScopeOperator
, BooleanOperator
, MathOperator
, RelationalOperator
, BitwiseOperator
]
2017-01-20 21:03:52 +03:00
categoryForLanguageProductionName :: Ptr TS.Language -> Text -> Category
2017-03-28 22:32:45 +03:00
categoryForLanguageProductionName = withDefaults . byLanguage
where
withDefaults productionMap name = case name of
"ERROR" -> ParseError
s -> productionMap s
byLanguage language = case languageForTSLanguage language of
Just Ruby -> Ruby.categoryForRubyName
Just Language.Go -> Go.categoryForGoName
Just TypeScript -> TS.categoryForTypeScriptName
_ -> Other
languageForTSLanguage :: Ptr TS.Language -> Maybe Language
2017-07-23 22:03:02 +03:00
languageForTSLanguage = flip lookup
2017-07-28 00:14:23 +03:00
[ (TS.tree_sitter_go, Language.Go)
2017-07-23 22:03:02 +03:00
, (TS.tree_sitter_ruby, Ruby)
, (TS.tree_sitter_typescript, TypeScript)
]