1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 17:04:47 +03:00

Merge pull request #343 from github/haskell-parser

Maintain modularity of parsing
This commit is contained in:
Josh Vera 2015-12-16 16:19:59 -05:00
commit 721d701287
4 changed files with 20 additions and 18 deletions

View File

@ -8,7 +8,6 @@ import Syntax
import Range import Range
import Split import Split
import Term import Term
import TreeSitter
import Unified import Unified
import Control.Comonad.Cofree import Control.Comonad.Cofree
import qualified Data.ByteString.Char8 as B1 import qualified Data.ByteString.Char8 as B1
@ -36,7 +35,7 @@ main = do
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments) let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
aContents <- readFile sourceAPath aContents <- readFile sourceAPath
bContents <- readFile sourceBPath bContents <- readFile sourceBPath
(aTerm, bTerm) <- let parse = (parserForType . takeExtension) sourceAPath in do (aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
aTerm <- parse aContents aTerm <- parse aContents
bTerm <- parse bContents bTerm <- parse bContents
return (replaceLeavesWithWordBranches aContents aTerm, replaceLeavesWithWordBranches bContents bTerm) return (replaceLeavesWithWordBranches aContents aTerm, replaceLeavesWithWordBranches bContents bTerm)
@ -57,20 +56,13 @@ main = do
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically") (fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
write rendered h = B2.hPut h rendered write rendered h = B2.hPut h rendered
parserForType :: String -> P.Parser
parserForType mediaType = maybe P.lineByLineParser parseTreeSitterFile $ case mediaType of
".h" -> Just ts_language_c
".c" -> Just ts_language_c
".js" -> Just ts_language_javascript
_ -> Nothing
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
replaceLeavesWithWordBranches source term = replaceIn source 0 term replaceLeavesWithWordBranches source term = replaceIn source 0 term
where where
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf substring startIndex categories <$> ranges Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
Indexed i -> Indexed $ replaceIn substring (start range) <$> i Indexed i -> Indexed $ replaceIn substring (start range) <$> i
Fixed f -> Fixed $ replaceIn substring (start range) <$> f Fixed f -> Fixed $ replaceIn substring (start range) <$> f
Keyed k -> Keyed $ replaceIn substring (start range) <$> k Keyed k -> Keyed $ replaceIn substring (start range) <$> k
_ -> syntax _ -> syntax
makeLeaf source startIndex categories (range, substring) = Info range categories :< Leaf substring makeLeaf categories (range, substring) = Info range categories :< Leaf substring

View File

@ -4,18 +4,21 @@ import Diff
import Range import Range
import Syntax import Syntax
import Term import Term
import TreeSitter
import Control.Comonad.Cofree import Control.Comonad.Cofree
import qualified Data.Set as Set
type Parser = String -> IO (Term String Info) type Parser = String -> IO (Term String Info)
parserForType :: String -> Parser
parserForType mediaType = maybe lineByLineParser parseTreeSitterFile $ languageForType mediaType
lineByLineParser :: Parser lineByLineParser :: Parser
lineByLineParser input = return . root . Indexed $ case foldl annotateLeaves ([], 0) lines of lineByLineParser input = return . root . Indexed $ case foldl annotateLeaves ([], 0) lines of
(leaves, _) -> leaves (leaves, _) -> leaves
where where
lines = Prelude.lines input lines = Prelude.lines input
root syntax = Info (Range 0 $ length input) Set.empty :< syntax root syntax = Info (Range 0 $ length input) mempty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + length line) Set.empty :< Leaf line leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line
annotateLeaves (accum, charIndex) line = annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex line ] (accum ++ [ leaf charIndex line ]
, charIndex + length line + 1) , charIndex + length line + 1)

View File

@ -1,7 +1,6 @@
module TreeSitter where module TreeSitter where
import Diff import Diff
import Parser
import Range import Range
import Syntax import Syntax
import Term import Term
@ -48,7 +47,14 @@ keyedProductions = fromList [ "object" ]
fixedProductions :: Set String fixedProductions :: Set String
fixedProductions = fromList [ "pair", "rel_op", "math_op", "bool_op", "bitwise_op", "type_op", "math_assignment", "assignment", "subscript_access", "member_access", "new_expression", "function_call", "function", "ternary" ] fixedProductions = fromList [ "pair", "rel_op", "math_op", "bool_op", "bitwise_op", "type_op", "math_assignment", "assignment", "subscript_access", "member_access", "new_expression", "function_call", "function", "ternary" ]
parseTreeSitterFile :: Ptr TSLanguage -> Parser languageForType :: String -> Maybe (Ptr TSLanguage)
languageForType mediaType = case mediaType of
".h" -> Just ts_language_c
".c" -> Just ts_language_c
".js" -> Just ts_language_javascript
_ -> Nothing
parseTreeSitterFile :: Ptr TSLanguage -> String -> IO (Term String Info)
parseTreeSitterFile language contents = do parseTreeSitterFile language contents = do
document <- ts_document_make document <- ts_document_make
ts_document_set_language document language ts_document_set_language document language
@ -59,7 +65,7 @@ parseTreeSitterFile language contents = do
ts_document_free document ts_document_free document
return term) return term)
documentToTerm :: Ptr TSDocument -> Parser documentToTerm :: Ptr TSDocument -> String -> IO (Term String Info)
documentToTerm document contents = alloca $ \root -> do documentToTerm document contents = alloca $ \root -> do
ts_document_root_node_p document root ts_document_root_node_p document root
snd <$> toTerm root where snd <$> toTerm root where

View File

@ -19,7 +19,6 @@ library
, Algorithm , Algorithm
, Interpreter , Interpreter
, OrderedMap , OrderedMap
, Parser
, Patch , Patch
, SES , SES
, Categorizable , Categorizable
@ -42,6 +41,7 @@ executable semantic-diff-exe
hs-source-dirs: app hs-source-dirs: app
main-is: Main.hs main-is: Main.hs
other-modules: TreeSitter other-modules: TreeSitter
, Parser
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base build-depends: base
, containers , containers
@ -59,6 +59,7 @@ executable semantic-diff-profile
hs-source-dirs: app hs-source-dirs: app
main-is: Main.hs main-is: Main.hs
other-modules: TreeSitter other-modules: TreeSitter
, Parser
ghc-options: -O2 ghc-options: -O2
-threaded -threaded
-fprof-auto -fprof-auto