mirror of
https://github.com/github/semantic.git
synced 2024-11-24 08:54:07 +03:00
Merge pull request #343 from github/haskell-parser
Maintain modularity of parsing
This commit is contained in:
commit
721d701287
14
app/Main.hs
14
app/Main.hs
@ -8,7 +8,6 @@ import Syntax
|
||||
import Range
|
||||
import Split
|
||||
import Term
|
||||
import TreeSitter
|
||||
import Unified
|
||||
import Control.Comonad.Cofree
|
||||
import qualified Data.ByteString.Char8 as B1
|
||||
@ -36,7 +35,7 @@ main = do
|
||||
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
|
||||
aContents <- readFile sourceAPath
|
||||
bContents <- readFile sourceBPath
|
||||
(aTerm, bTerm) <- let parse = (parserForType . takeExtension) sourceAPath in do
|
||||
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
|
||||
aTerm <- parse aContents
|
||||
bTerm <- parse bContents
|
||||
return (replaceLeavesWithWordBranches aContents aTerm, replaceLeavesWithWordBranches bContents bTerm)
|
||||
@ -57,20 +56,13 @@ main = do
|
||||
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
|
||||
write rendered h = B2.hPut h rendered
|
||||
|
||||
parserForType :: String -> P.Parser
|
||||
parserForType mediaType = maybe P.lineByLineParser parseTreeSitterFile $ case mediaType of
|
||||
".h" -> Just ts_language_c
|
||||
".c" -> Just ts_language_c
|
||||
".js" -> Just ts_language_javascript
|
||||
_ -> Nothing
|
||||
|
||||
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
|
||||
replaceLeavesWithWordBranches source term = replaceIn source 0 term
|
||||
where
|
||||
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
|
||||
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf substring startIndex categories <$> ranges
|
||||
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
|
||||
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
|
||||
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
|
||||
Keyed k -> Keyed $ replaceIn substring (start range) <$> k
|
||||
_ -> syntax
|
||||
makeLeaf source startIndex categories (range, substring) = Info range categories :< Leaf substring
|
||||
makeLeaf categories (range, substring) = Info range categories :< Leaf substring
|
||||
|
@ -4,18 +4,21 @@ import Diff
|
||||
import Range
|
||||
import Syntax
|
||||
import Term
|
||||
import TreeSitter
|
||||
import Control.Comonad.Cofree
|
||||
import qualified Data.Set as Set
|
||||
|
||||
type Parser = String -> IO (Term String Info)
|
||||
|
||||
parserForType :: String -> Parser
|
||||
parserForType mediaType = maybe lineByLineParser parseTreeSitterFile $ languageForType mediaType
|
||||
|
||||
lineByLineParser :: Parser
|
||||
lineByLineParser input = return . root . Indexed $ case foldl annotateLeaves ([], 0) lines of
|
||||
(leaves, _) -> leaves
|
||||
where
|
||||
lines = Prelude.lines input
|
||||
root syntax = Info (Range 0 $ length input) Set.empty :< syntax
|
||||
leaf charIndex line = Info (Range charIndex $ charIndex + length line) Set.empty :< Leaf line
|
||||
root syntax = Info (Range 0 $ length input) mempty :< syntax
|
||||
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line
|
||||
annotateLeaves (accum, charIndex) line =
|
||||
(accum ++ [ leaf charIndex line ]
|
||||
, charIndex + length line + 1)
|
@ -1,7 +1,6 @@
|
||||
module TreeSitter where
|
||||
|
||||
import Diff
|
||||
import Parser
|
||||
import Range
|
||||
import Syntax
|
||||
import Term
|
||||
@ -48,7 +47,14 @@ keyedProductions = fromList [ "object" ]
|
||||
fixedProductions :: Set String
|
||||
fixedProductions = fromList [ "pair", "rel_op", "math_op", "bool_op", "bitwise_op", "type_op", "math_assignment", "assignment", "subscript_access", "member_access", "new_expression", "function_call", "function", "ternary" ]
|
||||
|
||||
parseTreeSitterFile :: Ptr TSLanguage -> Parser
|
||||
languageForType :: String -> Maybe (Ptr TSLanguage)
|
||||
languageForType mediaType = case mediaType of
|
||||
".h" -> Just ts_language_c
|
||||
".c" -> Just ts_language_c
|
||||
".js" -> Just ts_language_javascript
|
||||
_ -> Nothing
|
||||
|
||||
parseTreeSitterFile :: Ptr TSLanguage -> String -> IO (Term String Info)
|
||||
parseTreeSitterFile language contents = do
|
||||
document <- ts_document_make
|
||||
ts_document_set_language document language
|
||||
@ -59,7 +65,7 @@ parseTreeSitterFile language contents = do
|
||||
ts_document_free document
|
||||
return term)
|
||||
|
||||
documentToTerm :: Ptr TSDocument -> Parser
|
||||
documentToTerm :: Ptr TSDocument -> String -> IO (Term String Info)
|
||||
documentToTerm document contents = alloca $ \root -> do
|
||||
ts_document_root_node_p document root
|
||||
snd <$> toTerm root where
|
||||
|
@ -19,7 +19,6 @@ library
|
||||
, Algorithm
|
||||
, Interpreter
|
||||
, OrderedMap
|
||||
, Parser
|
||||
, Patch
|
||||
, SES
|
||||
, Categorizable
|
||||
@ -42,6 +41,7 @@ executable semantic-diff-exe
|
||||
hs-source-dirs: app
|
||||
main-is: Main.hs
|
||||
other-modules: TreeSitter
|
||||
, Parser
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||
build-depends: base
|
||||
, containers
|
||||
@ -59,6 +59,7 @@ executable semantic-diff-profile
|
||||
hs-source-dirs: app
|
||||
main-is: Main.hs
|
||||
other-modules: TreeSitter
|
||||
, Parser
|
||||
ghc-options: -O2
|
||||
-threaded
|
||||
-fprof-auto
|
||||
|
Loading…
Reference in New Issue
Block a user