1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 06:11:49 +03:00
semantic/src/TreeSitter.hs

103 lines
4.8 KiB
Haskell
Raw Normal View History

module TreeSitter where
2016-02-09 22:34:23 +03:00
import Category
import Range
import Parser
2015-12-24 08:20:47 +03:00
import Source
2015-12-16 23:49:46 +03:00
import qualified Data.Set as Set
import Foreign
import Foreign.C
import Foreign.C.Types
2015-12-15 21:29:58 +03:00
import qualified Data.Text as T
import Foreign.CStorable
import qualified GHC.Generics as Generics
data TSLanguage = TsLanguage deriving (Show, Eq)
foreign import ccall "prototype/doubt-difftool/doubt-difftool-Bridging-Header.h ts_language_c" ts_language_c :: Ptr TSLanguage
foreign import ccall "prototype/doubt-difftool/doubt-difftool-Bridging-Header.h ts_language_javascript" ts_language_javascript :: Ptr TSLanguage
data TSDocument = TsDocument deriving (Show, Eq)
foreign import ccall "prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_make" ts_document_make :: IO (Ptr TSDocument)
foreign import ccall "prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_set_language" ts_document_set_language :: Ptr TSDocument -> Ptr TSLanguage -> IO ()
foreign import ccall "prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_set_input_string" ts_document_set_input_string :: Ptr TSDocument -> CString -> IO ()
foreign import ccall "prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_parse" ts_document_parse :: Ptr TSDocument -> IO ()
foreign import ccall "prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_free" ts_document_free :: Ptr TSDocument -> IO ()
data TSNode = TsNode { _data :: Ptr (), offset0 :: CSize, offset1 :: CSize, offset2 :: CSize }
deriving (Show, Eq, Generics.Generic)
instance CStorable TSNode
instance Storable TSNode where
alignment = cAlignment
sizeOf = cSizeOf
peek = cPeek
poke = cPoke
foreign import ccall "app/bridge.h ts_document_root_node_p" ts_document_root_node_p :: Ptr TSDocument -> Ptr TSNode -> IO ()
foreign import ccall "app/bridge.h ts_node_p_name" ts_node_p_name :: Ptr TSNode -> Ptr TSDocument -> IO CString
foreign import ccall "app/bridge.h ts_node_p_named_child_count" ts_node_p_named_child_count :: Ptr TSNode -> IO CSize
foreign import ccall "app/bridge.h ts_node_p_named_child" ts_node_p_named_child :: Ptr TSNode -> CSize -> Ptr TSNode -> IO CSize
2015-12-17 22:56:43 +03:00
foreign import ccall "app/bridge.h ts_node_p_start_char" ts_node_p_start_char :: Ptr TSNode -> CSize
foreign import ccall "app/bridge.h ts_node_p_end_char" ts_node_p_end_char :: Ptr TSNode -> CSize
-- | A programming language.
data Language =
C
| JavaScript
2016-02-03 21:24:17 +03:00
-- | Returns a Language based on the file extension (including the ".").
2015-12-18 22:48:52 +03:00
languageForType :: T.Text -> Maybe Language
languageForType mediaType = case mediaType of
".h" -> Just C
".c" -> Just C
".js" -> Just JavaScript
_ -> Nothing
-- | Returns the TreeSitter language for the given language.
grammarForLanguage :: Language -> Maybe (Ptr TSLanguage)
grammarForLanguage language = case language of
C -> Just ts_language_c
JavaScript -> Just ts_language_javascript
_ -> Nothing
-- | Returns a parser for the given TreeSitter language.
parserForGrammar :: Ptr TSLanguage -> Parser
parserForGrammar language contents = do
document <- ts_document_make
ts_document_set_language document language
withCString (toList contents) (\source -> do
ts_document_set_input_string document source
ts_document_parse document
term <- documentToTerm (termConstructor categoryForNodeName) document contents
ts_document_free document
return term)
-- | Returns a TreeSitter parser for the given language.
treeSitterParser :: Language -> Maybe Parser
treeSitterParser language = parserForGrammar <$> grammarForLanguage language
2016-02-09 22:34:23 +03:00
-- | Given a node name from TreeSitter, return the correct category.
categoryForNodeName :: String -> Set.Set Category
2016-02-09 22:34:23 +03:00
categoryForNodeName name = case name of
"function_call" -> Set.singleton FunctionCall
_ -> Set.singleton (Other name)
2016-02-09 22:34:23 +03:00
2016-02-03 21:24:17 +03:00
-- | Given a constructor and a tree sitter document, return a parser.
documentToTerm :: Constructor -> Ptr TSDocument -> Parser
2015-12-23 18:34:40 +03:00
documentToTerm constructor document contents = alloca $ \ root -> do
ts_document_root_node_p document root
(_, term) <- toTerm root
return term
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
count <- ts_node_p_named_child_count node
2015-12-23 18:47:15 +03:00
children <- mapM (alloca . getChild node) $ take (fromIntegral count) [0..]
2015-12-24 01:54:28 +03:00
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after weve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
2015-12-24 01:39:13 +03:00
range <- return $! Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
return (name, constructor contents range name children)
2015-12-23 18:47:15 +03:00
getChild node n out = do
_ <- ts_node_p_named_child node n out
2015-12-23 18:47:15 +03:00
toTerm out