1
1
mirror of https://github.com/github/semantic.git synced 2024-12-21 05:41:54 +03:00

Merge pull request #363 from github/parse-performance

Parse performance
This commit is contained in:
Josh Vera 2015-12-27 10:23:26 -08:00
commit 59926be199
7 changed files with 38 additions and 37 deletions

View File

@ -9,6 +9,7 @@ import Range
import Split
import Term
import Unified
import Source
import Control.Comonad.Cofree
import qualified Data.ByteString.Char8 as B1
import qualified Data.ByteString.Lazy as B2
@ -33,8 +34,8 @@ main :: IO ()
main = do
arguments <- execParser opts
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
aContents <- readFile sourceAPath
bContents <- readFile sourceBPath
aContents <- fromList <$> readFile sourceAPath
bContents <- fromList <$> readFile sourceBPath
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
aTerm <- parse aContents
bTerm <- parse bContents
@ -56,11 +57,11 @@ main = do
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
write rendered h = B2.hPut h rendered
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
replaceLeavesWithWordBranches :: Source Char -> Term String Info -> Term String Info
replaceLeavesWithWordBranches source term = replaceIn source 0 term
where
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- slice (offsetRange (negate startIndex) range) source = info :< case syntax of
Leaf _ | ranges <- rangesAndWordsFrom (start range) (toList substring), length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
Keyed k -> Keyed $ replaceIn substring (start range) <$> k

View File

@ -3,6 +3,7 @@ module Parsers where
import Diff
import Range
import Parser
import Source hiding ((++))
import Syntax
import TreeSitter
import Control.Comonad.Cofree
@ -15,9 +16,9 @@ lineByLineParser :: Parser
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> leaves
where
lines = Prelude.lines input
lines = actualLines input
root syntax = Info (Range 0 $ length input) mempty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf (Source.toList line)
annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex line ]
, charIndex + length line + 1)

View File

@ -1,25 +1,25 @@
module Parser where
import Diff
import Range
import Syntax
import Term
import Control.Comonad.Cofree
import qualified OrderedMap as Map
import qualified Data.Set as Set
import Source
type Parser = String -> IO (Term String Info)
type Parser = Source Char -> IO (Term String Info)
-- | Given a source string and a terms annotation & production/child pairs, construct the term.
type Constructor = String -> Info -> [(String, Term String Info)] -> Term String Info
type Constructor = Source Char -> Info -> [(String, Term String Info)] -> Term String Info
-- | Given two sets of production names, produce a Constructor.
constructorForProductions :: Set.Set String -> Set.Set String -> Constructor
constructorForProductions keyed fixed source info@(Info range categories) = (info :<) . construct
where construct [] = Leaf (substring range source)
where construct [] = Leaf . toList $ slice range source
construct children | not . Set.null $ Set.intersection fixed categories = Fixed $ fmap snd children
construct children | not . Set.null $ Set.intersection keyed categories = Keyed . Map.fromList $ assignKey <$> children
construct children = Indexed $ fmap snd children
assignKey ("pair", node@(_ :< Fixed (key : _))) = (getSubstring key, node)
assignKey (_, node) = (getSubstring node, node)
getSubstring (Info range _ :< _) = substring range source
getSubstring (Info range _ :< _) = toList $ slice range source

View File

@ -35,3 +35,15 @@ break predicate (Source vector) = let (start, remainder) = Vector.break predicat
(++) :: Source a -> Source a -> Source a
(++) (Source a) = Source . (a Vector.++) . getVector
actualLines :: Source Char -> [Source Char]
actualLines source | Source.null source = [ source ]
actualLines source = case Source.break (== '\n') source of
(l, lines') -> case uncons lines' of
Nothing -> [ l ]
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
-- | Compute the line ranges within a given range of a string.
actualLineRanges :: Range -> Source Char -> [Range]
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
where toRange previous string = Range (end previous) $ end previous + length string

View File

@ -18,14 +18,13 @@ import Text.Blaze.Html.Renderer.Utf8
import Data.Monoid
import qualified Data.Set as Set
import Source hiding ((++))
import qualified Source ((++))
type ClassName = String
classifyMarkup :: Foldable f => f String -> Markup -> Markup
classifyMarkup categories element = maybe element ((element !) . A.class_ . stringValue . ("category-" ++)) $ maybeFirst categories
split :: Diff a Info -> String -> String -> IO ByteString
split :: Diff a Info -> Source Char -> Source Char -> IO ByteString
split diff before after = return . renderHtml
. docTypeHtml
. ((head $ link ! A.rel (stringValue "stylesheet") ! A.href (stringValue "style.css")) <>)
@ -34,7 +33,7 @@ split diff before after = return . renderHtml
((colgroup $ (col ! A.width (stringValue . show $ columnWidth)) <> col <> (col ! A.width (stringValue . show $ columnWidth)) <> col) <>)
. mconcat $ numberedLinesToMarkup <$> reverse numbered
where
rows = fst (splitDiffByLines diff (0, 0) sources)
rows = fst (splitDiffByLines diff (0, 0) (before, after))
numbered = foldl numberRows [] rows
maxNumber = case numbered of
[] -> 0
@ -47,14 +46,12 @@ split diff before after = return . renderHtml
columnWidth = max (20 + digits maxNumber * 8) 40
numberedLinesToMarkup :: (Int, Line (SplitDiff a Info), Int, Line (SplitDiff a Info)) -> Markup
numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable (fst sources) left) <> toMarkup (or $ hasChanges <$> right, n, renderable (snd sources) right) <> string "\n"
numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable before left) <> toMarkup (or $ hasChanges <$> right, n, renderable after right) <> string "\n"
renderable source = fmap (Renderable . (,) source)
hasChanges diff = or $ const True <$> diff
sources = (fromList before, fromList after)
numberRows :: [(Int, Line a, Int, Line a)] -> Row a -> [(Int, Line a, Int, Line a)]
numberRows [] (Row EmptyLine EmptyLine) = []
numberRows [] (Row left@(Line _) EmptyLine) = [(1, left, 0, EmptyLine)]
@ -153,15 +150,3 @@ openDiff source diff@(Pure term) = const diff <$> openTerm source term
zipWithDefaults :: (a -> b -> c) -> a -> b -> [a] -> [b] -> [c]
zipWithDefaults f da db a b = take (max (length a) (length b)) $ zipWith f (a ++ repeat da) (b ++ repeat db)
actualLines :: Source Char -> [Source Char]
actualLines source | length source == 0 = [ source ]
actualLines source = case Source.break (== '\n') source of
(l, lines') -> case uncons lines' of
Nothing -> [ l ]
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
-- | Compute the line ranges within a given range of a string.
actualLineRanges :: Range -> Source Char -> [Range]
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
where toRange previous string = Range (end previous) $ end previous + length string

View File

@ -3,6 +3,7 @@ module TreeSitter where
import Diff
import Range
import Parser
import Source
import qualified Data.Set as Set
import Foreign
import Foreign.C
@ -54,7 +55,7 @@ parseTreeSitterFile :: Language -> Parser
parseTreeSitterFile (Language language constructor) contents = do
document <- ts_document_make
ts_document_set_language document language
withCString contents (\ source -> do
withCString (toList contents) (\ source -> do
ts_document_set_input_string document source
ts_document_parse document
term <- documentToTerm constructor document contents

View File

@ -5,6 +5,7 @@ import Patch
import Syntax
import Term
import Range
import Source hiding ((++))
import Control.Arrow
import Control.Monad.Free
import Control.Comonad.Cofree
@ -12,13 +13,13 @@ import Data.List hiding (foldl)
import qualified OrderedMap as Map
import Rainbow
unified :: Diff a Info -> String -> String -> IO ByteString
unified :: Diff a Info -> Source Char -> Source Char -> IO ByteString
unified diff before after = do
renderer <- byteStringMakerFromEnvironment
return . mconcat . chunksToByteStrings renderer . fst $ iter g mapped where
mapped = fmap (unifiedPatch &&& range) diff
g (Annotated (_, info) syntax) = annotationAndSyntaxToChunks after info syntax
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk $ substring range source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk . toList $ slice range source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Indexed i) = (unifiedRange range i source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Fixed f) = (unifiedRange range f source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) source, Just range)
@ -28,13 +29,13 @@ unified diff before after = do
beforeChunk = maybe [] (change "-" . unifiedTerm before) $ Patch.before patch
afterChunk = maybe [] (change "+" . unifiedTerm after) $ Patch.after patch
unifiedTerm :: String -> Term a Info -> [Chunk String]
unifiedTerm :: Source Char -> Term a Info -> [Chunk String]
unifiedTerm source term = fst $ cata (annotationAndSyntaxToChunks source) term
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> String -> [Chunk String]
unifiedRange range children source = out <> (pure . chunk $ substring Range { start = previous, end = end range } source) where
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> Source Char -> [Chunk String]
unifiedRange range children source = out <> (pure . chunk . toList $ slice Range { start = previous, end = end range } source) where
(out, previous) = foldl' accumulateContext ([], start range) children
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk $ substring Range { start = previous, end = start range } source, child ], end range)
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk . toList $ slice Range { start = previous, end = start range } source, child ], end range)
accumulateContext (out, previous) (child, _) = (out <> child, previous)
range :: Patch (Term a Info) -> Maybe Range