1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 08:54:07 +03:00

Merge pull request #343 from github/haskell-parser

Maintain modularity of parsing
This commit is contained in:
Josh Vera 2015-12-16 16:19:59 -05:00
commit 721d701287
4 changed files with 20 additions and 18 deletions

View File

@ -8,7 +8,6 @@ import Syntax
import Range
import Split
import Term
import TreeSitter
import Unified
import Control.Comonad.Cofree
import qualified Data.ByteString.Char8 as B1
@ -36,7 +35,7 @@ main = do
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
aContents <- readFile sourceAPath
bContents <- readFile sourceBPath
(aTerm, bTerm) <- let parse = (parserForType . takeExtension) sourceAPath in do
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
aTerm <- parse aContents
bTerm <- parse bContents
return (replaceLeavesWithWordBranches aContents aTerm, replaceLeavesWithWordBranches bContents bTerm)
@ -57,20 +56,13 @@ main = do
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
write rendered h = B2.hPut h rendered
parserForType :: String -> P.Parser
parserForType mediaType = maybe P.lineByLineParser parseTreeSitterFile $ case mediaType of
".h" -> Just ts_language_c
".c" -> Just ts_language_c
".js" -> Just ts_language_javascript
_ -> Nothing
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
replaceLeavesWithWordBranches source term = replaceIn source 0 term
where
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf substring startIndex categories <$> ranges
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
Keyed k -> Keyed $ replaceIn substring (start range) <$> k
_ -> syntax
makeLeaf source startIndex categories (range, substring) = Info range categories :< Leaf substring
makeLeaf categories (range, substring) = Info range categories :< Leaf substring

View File

@ -4,18 +4,21 @@ import Diff
import Range
import Syntax
import Term
import TreeSitter
import Control.Comonad.Cofree
import qualified Data.Set as Set
type Parser = String -> IO (Term String Info)
parserForType :: String -> Parser
parserForType mediaType = maybe lineByLineParser parseTreeSitterFile $ languageForType mediaType
lineByLineParser :: Parser
lineByLineParser input = return . root . Indexed $ case foldl annotateLeaves ([], 0) lines of
(leaves, _) -> leaves
where
lines = Prelude.lines input
root syntax = Info (Range 0 $ length input) Set.empty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + length line) Set.empty :< Leaf line
root syntax = Info (Range 0 $ length input) mempty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex line ]
, charIndex + length line + 1)

View File

@ -1,7 +1,6 @@
module TreeSitter where
import Diff
import Parser
import Range
import Syntax
import Term
@ -48,7 +47,14 @@ keyedProductions = fromList [ "object" ]
fixedProductions :: Set String
fixedProductions = fromList [ "pair", "rel_op", "math_op", "bool_op", "bitwise_op", "type_op", "math_assignment", "assignment", "subscript_access", "member_access", "new_expression", "function_call", "function", "ternary" ]
parseTreeSitterFile :: Ptr TSLanguage -> Parser
languageForType :: String -> Maybe (Ptr TSLanguage)
languageForType mediaType = case mediaType of
".h" -> Just ts_language_c
".c" -> Just ts_language_c
".js" -> Just ts_language_javascript
_ -> Nothing
parseTreeSitterFile :: Ptr TSLanguage -> String -> IO (Term String Info)
parseTreeSitterFile language contents = do
document <- ts_document_make
ts_document_set_language document language
@ -59,7 +65,7 @@ parseTreeSitterFile language contents = do
ts_document_free document
return term)
documentToTerm :: Ptr TSDocument -> Parser
documentToTerm :: Ptr TSDocument -> String -> IO (Term String Info)
documentToTerm document contents = alloca $ \root -> do
ts_document_root_node_p document root
snd <$> toTerm root where

View File

@ -19,7 +19,6 @@ library
, Algorithm
, Interpreter
, OrderedMap
, Parser
, Patch
, SES
, Categorizable
@ -42,6 +41,7 @@ executable semantic-diff-exe
hs-source-dirs: app
main-is: Main.hs
other-modules: TreeSitter
, Parser
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base
, containers
@ -59,6 +59,7 @@ executable semantic-diff-profile
hs-source-dirs: app
main-is: Main.hs
other-modules: TreeSitter
, Parser
ghc-options: -O2
-threaded
-fprof-auto