2015-12-09 17:58:15 +03:00
module TreeSitter where
2016-02-09 22:34:23 +03:00
import Category
2015-12-09 17:58:15 +03:00
import Range
2015-12-17 00:24:23 +03:00
import Parser
2015-12-24 08:20:47 +03:00
import Source
2015-12-16 23:49:46 +03:00
import qualified Data.Set as Set
2015-12-09 17:58:15 +03:00
import Foreign
import Foreign.C
import Foreign.C.Types
2015-12-15 21:29:58 +03:00
import qualified Data.Text as T
2015-12-23 18:33:52 +03:00
import Foreign.CStorable
2015-12-23 18:33:36 +03:00
import qualified GHC.Generics as Generics
2015-12-09 17:58:15 +03:00
data TSLanguage = TsLanguage deriving ( Show , Eq )
2015-12-09 18:11:30 +03:00
foreign import ccall " prototype/doubt-difftool/doubt-difftool-Bridging-Header.h ts_language_c " ts_language_c :: Ptr TSLanguage
foreign import ccall " prototype/doubt-difftool/doubt-difftool-Bridging-Header.h ts_language_javascript " ts_language_javascript :: Ptr TSLanguage
2015-12-09 17:58:15 +03:00
data TSDocument = TsDocument deriving ( Show , Eq )
foreign import ccall " prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_make " ts_document_make :: IO ( Ptr TSDocument )
foreign import ccall " prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_set_language " ts_document_set_language :: Ptr TSDocument -> Ptr TSLanguage -> IO ()
foreign import ccall " prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_set_input_string " ts_document_set_input_string :: Ptr TSDocument -> CString -> IO ()
foreign import ccall " prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_parse " ts_document_parse :: Ptr TSDocument -> IO ()
foreign import ccall " prototype/External/tree-sitter/include/tree_sitter/runtime.h ts_document_free " ts_document_free :: Ptr TSDocument -> IO ()
2015-12-23 17:45:49 +03:00
data TSNode = TsNode { _data :: Ptr () , offset0 :: CSize , offset1 :: CSize , offset2 :: CSize }
2015-12-23 18:33:36 +03:00
deriving ( Show , Eq , Generics . Generic )
2015-12-09 17:58:15 +03:00
2015-12-23 18:33:52 +03:00
instance CStorable TSNode
2015-12-09 17:58:15 +03:00
instance Storable TSNode where
2015-12-23 18:34:08 +03:00
alignment = cAlignment
sizeOf = cSizeOf
peek = cPeek
poke = cPoke
2015-12-09 17:58:15 +03:00
foreign import ccall " app/bridge.h ts_document_root_node_p " ts_document_root_node_p :: Ptr TSDocument -> Ptr TSNode -> IO ()
foreign import ccall " app/bridge.h ts_node_p_name " ts_node_p_name :: Ptr TSNode -> Ptr TSDocument -> IO CString
foreign import ccall " app/bridge.h ts_node_p_named_child_count " ts_node_p_named_child_count :: Ptr TSNode -> IO CSize
foreign import ccall " app/bridge.h ts_node_p_named_child " ts_node_p_named_child :: Ptr TSNode -> CSize -> Ptr TSNode -> IO CSize
2015-12-17 22:56:43 +03:00
foreign import ccall " app/bridge.h ts_node_p_start_char " ts_node_p_start_char :: Ptr TSNode -> CSize
foreign import ccall " app/bridge.h ts_node_p_end_char " ts_node_p_end_char :: Ptr TSNode -> CSize
2015-12-09 17:58:15 +03:00
2016-02-03 21:24:17 +03:00
-- | A language in the eyes of semantic-diff.
2015-12-17 02:08:51 +03:00
data Language = Language { getTsLanguage :: Ptr TSLanguage , getConstructor :: Constructor }
2015-12-09 17:58:15 +03:00
2016-02-03 21:24:17 +03:00
-- | Returns a Language based on the file extension (including the ".").
2015-12-18 22:48:52 +03:00
languageForType :: T . Text -> Maybe Language
2015-12-15 22:39:54 +03:00
languageForType mediaType = case mediaType of
2016-02-10 21:03:15 +03:00
" .h " -> Just . Language ts_language_c $ termConstructor categoryForNodeName
" .c " -> Just . Language ts_language_c $ termConstructor categoryForNodeName
" .js " -> Just . Language ts_language_javascript $ termConstructor categoryForNodeName
2015-12-15 22:39:54 +03:00
_ -> Nothing
2015-12-09 17:58:15 +03:00
2016-02-03 21:24:17 +03:00
-- | Returns a parser for the given language.
2016-02-10 22:02:51 +03:00
treeSitterParser :: Language -> Parser
treeSitterParser ( Language language constructor ) contents = do
2015-12-09 17:58:15 +03:00
document <- ts_document_make
ts_document_set_language document language
2015-12-29 23:13:57 +03:00
withCString ( toList contents ) ( \ source -> do
2015-12-09 17:58:15 +03:00
ts_document_set_input_string document source
ts_document_parse document
2015-12-17 00:06:06 +03:00
term <- documentToTerm constructor document contents
2015-12-09 17:58:15 +03:00
ts_document_free document
return term )
2016-02-09 22:34:23 +03:00
-- | Given a node name from TreeSitter, return the correct category.
2016-02-10 21:03:15 +03:00
categoryForNodeName :: String -> Set . Set Category
2016-02-09 22:34:23 +03:00
categoryForNodeName name = case name of
2016-02-10 21:03:15 +03:00
" function_call " -> Set . singleton FunctionCall
_ -> Set . singleton ( Other name )
2016-02-09 22:34:23 +03:00
2016-02-03 21:24:17 +03:00
-- | Given a constructor and a tree sitter document, return a parser.
2015-12-17 00:24:23 +03:00
documentToTerm :: Constructor -> Ptr TSDocument -> Parser
2015-12-23 18:34:40 +03:00
documentToTerm constructor document contents = alloca $ \ root -> do
2015-12-09 17:58:15 +03:00
ts_document_root_node_p document root
2015-12-23 17:49:55 +03:00
( _ , term ) <- toTerm root
return term
where toTerm node = do
name <- ts_node_p_name node document
2015-12-31 01:52:51 +03:00
name <- peekCString name
2015-12-23 17:49:55 +03:00
count <- ts_node_p_named_child_count node
2015-12-23 18:47:15 +03:00
children <- mapM ( alloca . getChild node ) $ take ( fromIntegral count ) [ 0 .. ]
2015-12-24 01:54:28 +03:00
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after we’ ve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
2015-12-24 01:39:13 +03:00
range <- return $! Range { start = fromIntegral $ ts_node_p_start_char node , end = fromIntegral $ ts_node_p_end_char node }
2015-12-09 17:58:15 +03:00
2016-02-10 21:03:15 +03:00
return ( name , constructor contents range name children )
2015-12-23 18:47:15 +03:00
getChild node n out = do
2015-12-23 17:49:55 +03:00
_ <- ts_node_p_named_child node n out
2015-12-23 18:47:15 +03:00
toTerm out