1
1
mirror of https://github.com/github/semantic.git synced 2024-12-21 13:51:44 +03:00

Merge pull request #363 from github/parse-performance

Parse performance
This commit is contained in:
Josh Vera 2015-12-27 10:23:26 -08:00
commit 59926be199
7 changed files with 38 additions and 37 deletions

View File

@ -9,6 +9,7 @@ import Range
import Split import Split
import Term import Term
import Unified import Unified
import Source
import Control.Comonad.Cofree import Control.Comonad.Cofree
import qualified Data.ByteString.Char8 as B1 import qualified Data.ByteString.Char8 as B1
import qualified Data.ByteString.Lazy as B2 import qualified Data.ByteString.Lazy as B2
@ -33,8 +34,8 @@ main :: IO ()
main = do main = do
arguments <- execParser opts arguments <- execParser opts
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments) let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
aContents <- readFile sourceAPath aContents <- fromList <$> readFile sourceAPath
bContents <- readFile sourceBPath bContents <- fromList <$> readFile sourceBPath
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do (aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
aTerm <- parse aContents aTerm <- parse aContents
bTerm <- parse bContents bTerm <- parse bContents
@ -56,11 +57,11 @@ main = do
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically") (fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
write rendered h = B2.hPut h rendered write rendered h = B2.hPut h rendered
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info replaceLeavesWithWordBranches :: Source Char -> Term String Info -> Term String Info
replaceLeavesWithWordBranches source term = replaceIn source 0 term replaceLeavesWithWordBranches source term = replaceIn source 0 term
where where
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- slice (offsetRange (negate startIndex) range) source = info :< case syntax of
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges Leaf _ | ranges <- rangesAndWordsFrom (start range) (toList substring), length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
Indexed i -> Indexed $ replaceIn substring (start range) <$> i Indexed i -> Indexed $ replaceIn substring (start range) <$> i
Fixed f -> Fixed $ replaceIn substring (start range) <$> f Fixed f -> Fixed $ replaceIn substring (start range) <$> f
Keyed k -> Keyed $ replaceIn substring (start range) <$> k Keyed k -> Keyed $ replaceIn substring (start range) <$> k

View File

@ -3,6 +3,7 @@ module Parsers where
import Diff import Diff
import Range import Range
import Parser import Parser
import Source hiding ((++))
import Syntax import Syntax
import TreeSitter import TreeSitter
import Control.Comonad.Cofree import Control.Comonad.Cofree
@ -15,9 +16,9 @@ lineByLineParser :: Parser
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> leaves (leaves, _) -> leaves
where where
lines = Prelude.lines input lines = actualLines input
root syntax = Info (Range 0 $ length input) mempty :< syntax root syntax = Info (Range 0 $ length input) mempty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf (Source.toList line)
annotateLeaves (accum, charIndex) line = annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex line ] (accum ++ [ leaf charIndex line ]
, charIndex + length line + 1) , charIndex + length line + 1)

View File

@ -1,25 +1,25 @@
module Parser where module Parser where
import Diff import Diff
import Range
import Syntax import Syntax
import Term import Term
import Control.Comonad.Cofree import Control.Comonad.Cofree
import qualified OrderedMap as Map import qualified OrderedMap as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import Source
type Parser = String -> IO (Term String Info) type Parser = Source Char -> IO (Term String Info)
-- | Given a source string and a terms annotation & production/child pairs, construct the term. -- | Given a source string and a terms annotation & production/child pairs, construct the term.
type Constructor = String -> Info -> [(String, Term String Info)] -> Term String Info type Constructor = Source Char -> Info -> [(String, Term String Info)] -> Term String Info
-- | Given two sets of production names, produce a Constructor. -- | Given two sets of production names, produce a Constructor.
constructorForProductions :: Set.Set String -> Set.Set String -> Constructor constructorForProductions :: Set.Set String -> Set.Set String -> Constructor
constructorForProductions keyed fixed source info@(Info range categories) = (info :<) . construct constructorForProductions keyed fixed source info@(Info range categories) = (info :<) . construct
where construct [] = Leaf (substring range source) where construct [] = Leaf . toList $ slice range source
construct children | not . Set.null $ Set.intersection fixed categories = Fixed $ fmap snd children construct children | not . Set.null $ Set.intersection fixed categories = Fixed $ fmap snd children
construct children | not . Set.null $ Set.intersection keyed categories = Keyed . Map.fromList $ assignKey <$> children construct children | not . Set.null $ Set.intersection keyed categories = Keyed . Map.fromList $ assignKey <$> children
construct children = Indexed $ fmap snd children construct children = Indexed $ fmap snd children
assignKey ("pair", node@(_ :< Fixed (key : _))) = (getSubstring key, node) assignKey ("pair", node@(_ :< Fixed (key : _))) = (getSubstring key, node)
assignKey (_, node) = (getSubstring node, node) assignKey (_, node) = (getSubstring node, node)
getSubstring (Info range _ :< _) = substring range source getSubstring (Info range _ :< _) = toList $ slice range source

View File

@ -35,3 +35,15 @@ break predicate (Source vector) = let (start, remainder) = Vector.break predicat
(++) :: Source a -> Source a -> Source a (++) :: Source a -> Source a -> Source a
(++) (Source a) = Source . (a Vector.++) . getVector (++) (Source a) = Source . (a Vector.++) . getVector
actualLines :: Source Char -> [Source Char]
actualLines source | Source.null source = [ source ]
actualLines source = case Source.break (== '\n') source of
(l, lines') -> case uncons lines' of
Nothing -> [ l ]
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
-- | Compute the line ranges within a given range of a string.
actualLineRanges :: Range -> Source Char -> [Range]
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
where toRange previous string = Range (end previous) $ end previous + length string

View File

@ -18,14 +18,13 @@ import Text.Blaze.Html.Renderer.Utf8
import Data.Monoid import Data.Monoid
import qualified Data.Set as Set import qualified Data.Set as Set
import Source hiding ((++)) import Source hiding ((++))
import qualified Source ((++))
type ClassName = String type ClassName = String
classifyMarkup :: Foldable f => f String -> Markup -> Markup classifyMarkup :: Foldable f => f String -> Markup -> Markup
classifyMarkup categories element = maybe element ((element !) . A.class_ . stringValue . ("category-" ++)) $ maybeFirst categories classifyMarkup categories element = maybe element ((element !) . A.class_ . stringValue . ("category-" ++)) $ maybeFirst categories
split :: Diff a Info -> String -> String -> IO ByteString split :: Diff a Info -> Source Char -> Source Char -> IO ByteString
split diff before after = return . renderHtml split diff before after = return . renderHtml
. docTypeHtml . docTypeHtml
. ((head $ link ! A.rel (stringValue "stylesheet") ! A.href (stringValue "style.css")) <>) . ((head $ link ! A.rel (stringValue "stylesheet") ! A.href (stringValue "style.css")) <>)
@ -34,7 +33,7 @@ split diff before after = return . renderHtml
((colgroup $ (col ! A.width (stringValue . show $ columnWidth)) <> col <> (col ! A.width (stringValue . show $ columnWidth)) <> col) <>) ((colgroup $ (col ! A.width (stringValue . show $ columnWidth)) <> col <> (col ! A.width (stringValue . show $ columnWidth)) <> col) <>)
. mconcat $ numberedLinesToMarkup <$> reverse numbered . mconcat $ numberedLinesToMarkup <$> reverse numbered
where where
rows = fst (splitDiffByLines diff (0, 0) sources) rows = fst (splitDiffByLines diff (0, 0) (before, after))
numbered = foldl numberRows [] rows numbered = foldl numberRows [] rows
maxNumber = case numbered of maxNumber = case numbered of
[] -> 0 [] -> 0
@ -47,14 +46,12 @@ split diff before after = return . renderHtml
columnWidth = max (20 + digits maxNumber * 8) 40 columnWidth = max (20 + digits maxNumber * 8) 40
numberedLinesToMarkup :: (Int, Line (SplitDiff a Info), Int, Line (SplitDiff a Info)) -> Markup numberedLinesToMarkup :: (Int, Line (SplitDiff a Info), Int, Line (SplitDiff a Info)) -> Markup
numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable (fst sources) left) <> toMarkup (or $ hasChanges <$> right, n, renderable (snd sources) right) <> string "\n" numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable before left) <> toMarkup (or $ hasChanges <$> right, n, renderable after right) <> string "\n"
renderable source = fmap (Renderable . (,) source) renderable source = fmap (Renderable . (,) source)
hasChanges diff = or $ const True <$> diff hasChanges diff = or $ const True <$> diff
sources = (fromList before, fromList after)
numberRows :: [(Int, Line a, Int, Line a)] -> Row a -> [(Int, Line a, Int, Line a)] numberRows :: [(Int, Line a, Int, Line a)] -> Row a -> [(Int, Line a, Int, Line a)]
numberRows [] (Row EmptyLine EmptyLine) = [] numberRows [] (Row EmptyLine EmptyLine) = []
numberRows [] (Row left@(Line _) EmptyLine) = [(1, left, 0, EmptyLine)] numberRows [] (Row left@(Line _) EmptyLine) = [(1, left, 0, EmptyLine)]
@ -153,15 +150,3 @@ openDiff source diff@(Pure term) = const diff <$> openTerm source term
zipWithDefaults :: (a -> b -> c) -> a -> b -> [a] -> [b] -> [c] zipWithDefaults :: (a -> b -> c) -> a -> b -> [a] -> [b] -> [c]
zipWithDefaults f da db a b = take (max (length a) (length b)) $ zipWith f (a ++ repeat da) (b ++ repeat db) zipWithDefaults f da db a b = take (max (length a) (length b)) $ zipWith f (a ++ repeat da) (b ++ repeat db)
actualLines :: Source Char -> [Source Char]
actualLines source | length source == 0 = [ source ]
actualLines source = case Source.break (== '\n') source of
(l, lines') -> case uncons lines' of
Nothing -> [ l ]
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
-- | Compute the line ranges within a given range of a string.
actualLineRanges :: Range -> Source Char -> [Range]
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
where toRange previous string = Range (end previous) $ end previous + length string

View File

@ -3,6 +3,7 @@ module TreeSitter where
import Diff import Diff
import Range import Range
import Parser import Parser
import Source
import qualified Data.Set as Set import qualified Data.Set as Set
import Foreign import Foreign
import Foreign.C import Foreign.C
@ -54,7 +55,7 @@ parseTreeSitterFile :: Language -> Parser
parseTreeSitterFile (Language language constructor) contents = do parseTreeSitterFile (Language language constructor) contents = do
document <- ts_document_make document <- ts_document_make
ts_document_set_language document language ts_document_set_language document language
withCString contents (\ source -> do withCString (toList contents) (\ source -> do
ts_document_set_input_string document source ts_document_set_input_string document source
ts_document_parse document ts_document_parse document
term <- documentToTerm constructor document contents term <- documentToTerm constructor document contents

View File

@ -5,6 +5,7 @@ import Patch
import Syntax import Syntax
import Term import Term
import Range import Range
import Source hiding ((++))
import Control.Arrow import Control.Arrow
import Control.Monad.Free import Control.Monad.Free
import Control.Comonad.Cofree import Control.Comonad.Cofree
@ -12,13 +13,13 @@ import Data.List hiding (foldl)
import qualified OrderedMap as Map import qualified OrderedMap as Map
import Rainbow import Rainbow
unified :: Diff a Info -> String -> String -> IO ByteString unified :: Diff a Info -> Source Char -> Source Char -> IO ByteString
unified diff before after = do unified diff before after = do
renderer <- byteStringMakerFromEnvironment renderer <- byteStringMakerFromEnvironment
return . mconcat . chunksToByteStrings renderer . fst $ iter g mapped where return . mconcat . chunksToByteStrings renderer . fst $ iter g mapped where
mapped = fmap (unifiedPatch &&& range) diff mapped = fmap (unifiedPatch &&& range) diff
g (Annotated (_, info) syntax) = annotationAndSyntaxToChunks after info syntax g (Annotated (_, info) syntax) = annotationAndSyntaxToChunks after info syntax
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk $ substring range source, Just range) annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk . toList $ slice range source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Indexed i) = (unifiedRange range i source, Just range) annotationAndSyntaxToChunks source (Info range _) (Indexed i) = (unifiedRange range i source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Fixed f) = (unifiedRange range f source, Just range) annotationAndSyntaxToChunks source (Info range _) (Fixed f) = (unifiedRange range f source, Just range)
annotationAndSyntaxToChunks source (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) source, Just range) annotationAndSyntaxToChunks source (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) source, Just range)
@ -28,13 +29,13 @@ unified diff before after = do
beforeChunk = maybe [] (change "-" . unifiedTerm before) $ Patch.before patch beforeChunk = maybe [] (change "-" . unifiedTerm before) $ Patch.before patch
afterChunk = maybe [] (change "+" . unifiedTerm after) $ Patch.after patch afterChunk = maybe [] (change "+" . unifiedTerm after) $ Patch.after patch
unifiedTerm :: String -> Term a Info -> [Chunk String] unifiedTerm :: Source Char -> Term a Info -> [Chunk String]
unifiedTerm source term = fst $ cata (annotationAndSyntaxToChunks source) term unifiedTerm source term = fst $ cata (annotationAndSyntaxToChunks source) term
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> String -> [Chunk String] unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> Source Char -> [Chunk String]
unifiedRange range children source = out <> (pure . chunk $ substring Range { start = previous, end = end range } source) where unifiedRange range children source = out <> (pure . chunk . toList $ slice Range { start = previous, end = end range } source) where
(out, previous) = foldl' accumulateContext ([], start range) children (out, previous) = foldl' accumulateContext ([], start range) children
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk $ substring Range { start = previous, end = start range } source, child ], end range) accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk . toList $ slice Range { start = previous, end = start range } source, child ], end range)
accumulateContext (out, previous) (child, _) = (out <> child, previous) accumulateContext (out, previous) (child, _) = (out <> child, previous)
range :: Patch (Term a Info) -> Maybe Range range :: Patch (Term a Info) -> Maybe Range