mirror of
https://github.com/github/semantic.git
synced 2024-11-24 17:04:47 +03:00
Merge pull request #343 from github/haskell-parser
Maintain modularity of parsing
This commit is contained in:
commit
721d701287
14
app/Main.hs
14
app/Main.hs
@ -8,7 +8,6 @@ import Syntax
|
|||||||
import Range
|
import Range
|
||||||
import Split
|
import Split
|
||||||
import Term
|
import Term
|
||||||
import TreeSitter
|
|
||||||
import Unified
|
import Unified
|
||||||
import Control.Comonad.Cofree
|
import Control.Comonad.Cofree
|
||||||
import qualified Data.ByteString.Char8 as B1
|
import qualified Data.ByteString.Char8 as B1
|
||||||
@ -36,7 +35,7 @@ main = do
|
|||||||
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
|
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
|
||||||
aContents <- readFile sourceAPath
|
aContents <- readFile sourceAPath
|
||||||
bContents <- readFile sourceBPath
|
bContents <- readFile sourceBPath
|
||||||
(aTerm, bTerm) <- let parse = (parserForType . takeExtension) sourceAPath in do
|
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
|
||||||
aTerm <- parse aContents
|
aTerm <- parse aContents
|
||||||
bTerm <- parse bContents
|
bTerm <- parse bContents
|
||||||
return (replaceLeavesWithWordBranches aContents aTerm, replaceLeavesWithWordBranches bContents bTerm)
|
return (replaceLeavesWithWordBranches aContents aTerm, replaceLeavesWithWordBranches bContents bTerm)
|
||||||
@ -57,20 +56,13 @@ main = do
|
|||||||
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
|
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
|
||||||
write rendered h = B2.hPut h rendered
|
write rendered h = B2.hPut h rendered
|
||||||
|
|
||||||
parserForType :: String -> P.Parser
|
|
||||||
parserForType mediaType = maybe P.lineByLineParser parseTreeSitterFile $ case mediaType of
|
|
||||||
".h" -> Just ts_language_c
|
|
||||||
".c" -> Just ts_language_c
|
|
||||||
".js" -> Just ts_language_javascript
|
|
||||||
_ -> Nothing
|
|
||||||
|
|
||||||
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
|
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
|
||||||
replaceLeavesWithWordBranches source term = replaceIn source 0 term
|
replaceLeavesWithWordBranches source term = replaceIn source 0 term
|
||||||
where
|
where
|
||||||
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
|
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
|
||||||
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf substring startIndex categories <$> ranges
|
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
|
||||||
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
|
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
|
||||||
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
|
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
|
||||||
Keyed k -> Keyed $ replaceIn substring (start range) <$> k
|
Keyed k -> Keyed $ replaceIn substring (start range) <$> k
|
||||||
_ -> syntax
|
_ -> syntax
|
||||||
makeLeaf source startIndex categories (range, substring) = Info range categories :< Leaf substring
|
makeLeaf categories (range, substring) = Info range categories :< Leaf substring
|
||||||
|
@ -4,18 +4,21 @@ import Diff
|
|||||||
import Range
|
import Range
|
||||||
import Syntax
|
import Syntax
|
||||||
import Term
|
import Term
|
||||||
|
import TreeSitter
|
||||||
import Control.Comonad.Cofree
|
import Control.Comonad.Cofree
|
||||||
import qualified Data.Set as Set
|
|
||||||
|
|
||||||
type Parser = String -> IO (Term String Info)
|
type Parser = String -> IO (Term String Info)
|
||||||
|
|
||||||
|
parserForType :: String -> Parser
|
||||||
|
parserForType mediaType = maybe lineByLineParser parseTreeSitterFile $ languageForType mediaType
|
||||||
|
|
||||||
lineByLineParser :: Parser
|
lineByLineParser :: Parser
|
||||||
lineByLineParser input = return . root . Indexed $ case foldl annotateLeaves ([], 0) lines of
|
lineByLineParser input = return . root . Indexed $ case foldl annotateLeaves ([], 0) lines of
|
||||||
(leaves, _) -> leaves
|
(leaves, _) -> leaves
|
||||||
where
|
where
|
||||||
lines = Prelude.lines input
|
lines = Prelude.lines input
|
||||||
root syntax = Info (Range 0 $ length input) Set.empty :< syntax
|
root syntax = Info (Range 0 $ length input) mempty :< syntax
|
||||||
leaf charIndex line = Info (Range charIndex $ charIndex + length line) Set.empty :< Leaf line
|
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line
|
||||||
annotateLeaves (accum, charIndex) line =
|
annotateLeaves (accum, charIndex) line =
|
||||||
(accum ++ [ leaf charIndex line ]
|
(accum ++ [ leaf charIndex line ]
|
||||||
, charIndex + length line + 1)
|
, charIndex + length line + 1)
|
@ -1,7 +1,6 @@
|
|||||||
module TreeSitter where
|
module TreeSitter where
|
||||||
|
|
||||||
import Diff
|
import Diff
|
||||||
import Parser
|
|
||||||
import Range
|
import Range
|
||||||
import Syntax
|
import Syntax
|
||||||
import Term
|
import Term
|
||||||
@ -48,7 +47,14 @@ keyedProductions = fromList [ "object" ]
|
|||||||
fixedProductions :: Set String
|
fixedProductions :: Set String
|
||||||
fixedProductions = fromList [ "pair", "rel_op", "math_op", "bool_op", "bitwise_op", "type_op", "math_assignment", "assignment", "subscript_access", "member_access", "new_expression", "function_call", "function", "ternary" ]
|
fixedProductions = fromList [ "pair", "rel_op", "math_op", "bool_op", "bitwise_op", "type_op", "math_assignment", "assignment", "subscript_access", "member_access", "new_expression", "function_call", "function", "ternary" ]
|
||||||
|
|
||||||
parseTreeSitterFile :: Ptr TSLanguage -> Parser
|
languageForType :: String -> Maybe (Ptr TSLanguage)
|
||||||
|
languageForType mediaType = case mediaType of
|
||||||
|
".h" -> Just ts_language_c
|
||||||
|
".c" -> Just ts_language_c
|
||||||
|
".js" -> Just ts_language_javascript
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
parseTreeSitterFile :: Ptr TSLanguage -> String -> IO (Term String Info)
|
||||||
parseTreeSitterFile language contents = do
|
parseTreeSitterFile language contents = do
|
||||||
document <- ts_document_make
|
document <- ts_document_make
|
||||||
ts_document_set_language document language
|
ts_document_set_language document language
|
||||||
@ -59,7 +65,7 @@ parseTreeSitterFile language contents = do
|
|||||||
ts_document_free document
|
ts_document_free document
|
||||||
return term)
|
return term)
|
||||||
|
|
||||||
documentToTerm :: Ptr TSDocument -> Parser
|
documentToTerm :: Ptr TSDocument -> String -> IO (Term String Info)
|
||||||
documentToTerm document contents = alloca $ \root -> do
|
documentToTerm document contents = alloca $ \root -> do
|
||||||
ts_document_root_node_p document root
|
ts_document_root_node_p document root
|
||||||
snd <$> toTerm root where
|
snd <$> toTerm root where
|
||||||
|
@ -19,7 +19,6 @@ library
|
|||||||
, Algorithm
|
, Algorithm
|
||||||
, Interpreter
|
, Interpreter
|
||||||
, OrderedMap
|
, OrderedMap
|
||||||
, Parser
|
|
||||||
, Patch
|
, Patch
|
||||||
, SES
|
, SES
|
||||||
, Categorizable
|
, Categorizable
|
||||||
@ -42,6 +41,7 @@ executable semantic-diff-exe
|
|||||||
hs-source-dirs: app
|
hs-source-dirs: app
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
other-modules: TreeSitter
|
other-modules: TreeSitter
|
||||||
|
, Parser
|
||||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||||
build-depends: base
|
build-depends: base
|
||||||
, containers
|
, containers
|
||||||
@ -59,6 +59,7 @@ executable semantic-diff-profile
|
|||||||
hs-source-dirs: app
|
hs-source-dirs: app
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
other-modules: TreeSitter
|
other-modules: TreeSitter
|
||||||
|
, Parser
|
||||||
ghc-options: -O2
|
ghc-options: -O2
|
||||||
-threaded
|
-threaded
|
||||||
-fprof-auto
|
-fprof-auto
|
||||||
|
Loading…
Reference in New Issue
Block a user