mirror of
https://github.com/github/semantic.git
synced 2024-12-21 05:41:54 +03:00
Merge pull request #363 from github/parse-performance
Parse performance
This commit is contained in:
commit
59926be199
11
app/Main.hs
11
app/Main.hs
@ -9,6 +9,7 @@ import Range
|
||||
import Split
|
||||
import Term
|
||||
import Unified
|
||||
import Source
|
||||
import Control.Comonad.Cofree
|
||||
import qualified Data.ByteString.Char8 as B1
|
||||
import qualified Data.ByteString.Lazy as B2
|
||||
@ -33,8 +34,8 @@ main :: IO ()
|
||||
main = do
|
||||
arguments <- execParser opts
|
||||
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
|
||||
aContents <- readFile sourceAPath
|
||||
bContents <- readFile sourceBPath
|
||||
aContents <- fromList <$> readFile sourceAPath
|
||||
bContents <- fromList <$> readFile sourceBPath
|
||||
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
|
||||
aTerm <- parse aContents
|
||||
bTerm <- parse bContents
|
||||
@ -56,11 +57,11 @@ main = do
|
||||
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
|
||||
write rendered h = B2.hPut h rendered
|
||||
|
||||
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
|
||||
replaceLeavesWithWordBranches :: Source Char -> Term String Info -> Term String Info
|
||||
replaceLeavesWithWordBranches source term = replaceIn source 0 term
|
||||
where
|
||||
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
|
||||
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
|
||||
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- slice (offsetRange (negate startIndex) range) source = info :< case syntax of
|
||||
Leaf _ | ranges <- rangesAndWordsFrom (start range) (toList substring), length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
|
||||
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
|
||||
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
|
||||
Keyed k -> Keyed $ replaceIn substring (start range) <$> k
|
||||
|
@ -3,6 +3,7 @@ module Parsers where
|
||||
import Diff
|
||||
import Range
|
||||
import Parser
|
||||
import Source hiding ((++))
|
||||
import Syntax
|
||||
import TreeSitter
|
||||
import Control.Comonad.Cofree
|
||||
@ -15,9 +16,9 @@ lineByLineParser :: Parser
|
||||
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
|
||||
(leaves, _) -> leaves
|
||||
where
|
||||
lines = Prelude.lines input
|
||||
lines = actualLines input
|
||||
root syntax = Info (Range 0 $ length input) mempty :< syntax
|
||||
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line
|
||||
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf (Source.toList line)
|
||||
annotateLeaves (accum, charIndex) line =
|
||||
(accum ++ [ leaf charIndex line ]
|
||||
, charIndex + length line + 1)
|
||||
|
@ -1,25 +1,25 @@
|
||||
module Parser where
|
||||
|
||||
import Diff
|
||||
import Range
|
||||
import Syntax
|
||||
import Term
|
||||
import Control.Comonad.Cofree
|
||||
import qualified OrderedMap as Map
|
||||
import qualified Data.Set as Set
|
||||
import Source
|
||||
|
||||
type Parser = String -> IO (Term String Info)
|
||||
type Parser = Source Char -> IO (Term String Info)
|
||||
|
||||
-- | Given a source string and a term’s annotation & production/child pairs, construct the term.
|
||||
type Constructor = String -> Info -> [(String, Term String Info)] -> Term String Info
|
||||
type Constructor = Source Char -> Info -> [(String, Term String Info)] -> Term String Info
|
||||
|
||||
-- | Given two sets of production names, produce a Constructor.
|
||||
constructorForProductions :: Set.Set String -> Set.Set String -> Constructor
|
||||
constructorForProductions keyed fixed source info@(Info range categories) = (info :<) . construct
|
||||
where construct [] = Leaf (substring range source)
|
||||
where construct [] = Leaf . toList $ slice range source
|
||||
construct children | not . Set.null $ Set.intersection fixed categories = Fixed $ fmap snd children
|
||||
construct children | not . Set.null $ Set.intersection keyed categories = Keyed . Map.fromList $ assignKey <$> children
|
||||
construct children = Indexed $ fmap snd children
|
||||
assignKey ("pair", node@(_ :< Fixed (key : _))) = (getSubstring key, node)
|
||||
assignKey (_, node) = (getSubstring node, node)
|
||||
getSubstring (Info range _ :< _) = substring range source
|
||||
getSubstring (Info range _ :< _) = toList $ slice range source
|
||||
|
@ -35,3 +35,15 @@ break predicate (Source vector) = let (start, remainder) = Vector.break predicat
|
||||
|
||||
(++) :: Source a -> Source a -> Source a
|
||||
(++) (Source a) = Source . (a Vector.++) . getVector
|
||||
|
||||
actualLines :: Source Char -> [Source Char]
|
||||
actualLines source | Source.null source = [ source ]
|
||||
actualLines source = case Source.break (== '\n') source of
|
||||
(l, lines') -> case uncons lines' of
|
||||
Nothing -> [ l ]
|
||||
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
|
||||
|
||||
-- | Compute the line ranges within a given range of a string.
|
||||
actualLineRanges :: Range -> Source Char -> [Range]
|
||||
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
|
||||
where toRange previous string = Range (end previous) $ end previous + length string
|
||||
|
21
src/Split.hs
21
src/Split.hs
@ -18,14 +18,13 @@ import Text.Blaze.Html.Renderer.Utf8
|
||||
import Data.Monoid
|
||||
import qualified Data.Set as Set
|
||||
import Source hiding ((++))
|
||||
import qualified Source ((++))
|
||||
|
||||
type ClassName = String
|
||||
|
||||
classifyMarkup :: Foldable f => f String -> Markup -> Markup
|
||||
classifyMarkup categories element = maybe element ((element !) . A.class_ . stringValue . ("category-" ++)) $ maybeFirst categories
|
||||
|
||||
split :: Diff a Info -> String -> String -> IO ByteString
|
||||
split :: Diff a Info -> Source Char -> Source Char -> IO ByteString
|
||||
split diff before after = return . renderHtml
|
||||
. docTypeHtml
|
||||
. ((head $ link ! A.rel (stringValue "stylesheet") ! A.href (stringValue "style.css")) <>)
|
||||
@ -34,7 +33,7 @@ split diff before after = return . renderHtml
|
||||
((colgroup $ (col ! A.width (stringValue . show $ columnWidth)) <> col <> (col ! A.width (stringValue . show $ columnWidth)) <> col) <>)
|
||||
. mconcat $ numberedLinesToMarkup <$> reverse numbered
|
||||
where
|
||||
rows = fst (splitDiffByLines diff (0, 0) sources)
|
||||
rows = fst (splitDiffByLines diff (0, 0) (before, after))
|
||||
numbered = foldl numberRows [] rows
|
||||
maxNumber = case numbered of
|
||||
[] -> 0
|
||||
@ -47,14 +46,12 @@ split diff before after = return . renderHtml
|
||||
columnWidth = max (20 + digits maxNumber * 8) 40
|
||||
|
||||
numberedLinesToMarkup :: (Int, Line (SplitDiff a Info), Int, Line (SplitDiff a Info)) -> Markup
|
||||
numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable (fst sources) left) <> toMarkup (or $ hasChanges <$> right, n, renderable (snd sources) right) <> string "\n"
|
||||
numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable before left) <> toMarkup (or $ hasChanges <$> right, n, renderable after right) <> string "\n"
|
||||
|
||||
renderable source = fmap (Renderable . (,) source)
|
||||
|
||||
hasChanges diff = or $ const True <$> diff
|
||||
|
||||
sources = (fromList before, fromList after)
|
||||
|
||||
numberRows :: [(Int, Line a, Int, Line a)] -> Row a -> [(Int, Line a, Int, Line a)]
|
||||
numberRows [] (Row EmptyLine EmptyLine) = []
|
||||
numberRows [] (Row left@(Line _) EmptyLine) = [(1, left, 0, EmptyLine)]
|
||||
@ -153,15 +150,3 @@ openDiff source diff@(Pure term) = const diff <$> openTerm source term
|
||||
|
||||
zipWithDefaults :: (a -> b -> c) -> a -> b -> [a] -> [b] -> [c]
|
||||
zipWithDefaults f da db a b = take (max (length a) (length b)) $ zipWith f (a ++ repeat da) (b ++ repeat db)
|
||||
|
||||
actualLines :: Source Char -> [Source Char]
|
||||
actualLines source | length source == 0 = [ source ]
|
||||
actualLines source = case Source.break (== '\n') source of
|
||||
(l, lines') -> case uncons lines' of
|
||||
Nothing -> [ l ]
|
||||
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
|
||||
|
||||
-- | Compute the line ranges within a given range of a string.
|
||||
actualLineRanges :: Range -> Source Char -> [Range]
|
||||
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
|
||||
where toRange previous string = Range (end previous) $ end previous + length string
|
||||
|
@ -3,6 +3,7 @@ module TreeSitter where
|
||||
import Diff
|
||||
import Range
|
||||
import Parser
|
||||
import Source
|
||||
import qualified Data.Set as Set
|
||||
import Foreign
|
||||
import Foreign.C
|
||||
@ -54,7 +55,7 @@ parseTreeSitterFile :: Language -> Parser
|
||||
parseTreeSitterFile (Language language constructor) contents = do
|
||||
document <- ts_document_make
|
||||
ts_document_set_language document language
|
||||
withCString contents (\ source -> do
|
||||
withCString (toList contents) (\ source -> do
|
||||
ts_document_set_input_string document source
|
||||
ts_document_parse document
|
||||
term <- documentToTerm constructor document contents
|
||||
|
@ -5,6 +5,7 @@ import Patch
|
||||
import Syntax
|
||||
import Term
|
||||
import Range
|
||||
import Source hiding ((++))
|
||||
import Control.Arrow
|
||||
import Control.Monad.Free
|
||||
import Control.Comonad.Cofree
|
||||
@ -12,13 +13,13 @@ import Data.List hiding (foldl)
|
||||
import qualified OrderedMap as Map
|
||||
import Rainbow
|
||||
|
||||
unified :: Diff a Info -> String -> String -> IO ByteString
|
||||
unified :: Diff a Info -> Source Char -> Source Char -> IO ByteString
|
||||
unified diff before after = do
|
||||
renderer <- byteStringMakerFromEnvironment
|
||||
return . mconcat . chunksToByteStrings renderer . fst $ iter g mapped where
|
||||
mapped = fmap (unifiedPatch &&& range) diff
|
||||
g (Annotated (_, info) syntax) = annotationAndSyntaxToChunks after info syntax
|
||||
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk $ substring range source, Just range)
|
||||
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk . toList $ slice range source, Just range)
|
||||
annotationAndSyntaxToChunks source (Info range _) (Indexed i) = (unifiedRange range i source, Just range)
|
||||
annotationAndSyntaxToChunks source (Info range _) (Fixed f) = (unifiedRange range f source, Just range)
|
||||
annotationAndSyntaxToChunks source (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) source, Just range)
|
||||
@ -28,13 +29,13 @@ unified diff before after = do
|
||||
beforeChunk = maybe [] (change "-" . unifiedTerm before) $ Patch.before patch
|
||||
afterChunk = maybe [] (change "+" . unifiedTerm after) $ Patch.after patch
|
||||
|
||||
unifiedTerm :: String -> Term a Info -> [Chunk String]
|
||||
unifiedTerm :: Source Char -> Term a Info -> [Chunk String]
|
||||
unifiedTerm source term = fst $ cata (annotationAndSyntaxToChunks source) term
|
||||
|
||||
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> String -> [Chunk String]
|
||||
unifiedRange range children source = out <> (pure . chunk $ substring Range { start = previous, end = end range } source) where
|
||||
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> Source Char -> [Chunk String]
|
||||
unifiedRange range children source = out <> (pure . chunk . toList $ slice Range { start = previous, end = end range } source) where
|
||||
(out, previous) = foldl' accumulateContext ([], start range) children
|
||||
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk $ substring Range { start = previous, end = start range } source, child ], end range)
|
||||
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk . toList $ slice Range { start = previous, end = start range } source, child ], end range)
|
||||
accumulateContext (out, previous) (child, _) = (out <> child, previous)
|
||||
|
||||
range :: Patch (Term a Info) -> Maybe Range
|
||||
|
Loading…
Reference in New Issue
Block a user