mirror of
https://github.com/github/semantic.git
synced 2024-12-21 13:51:44 +03:00
Merge pull request #363 from github/parse-performance
Parse performance
This commit is contained in:
commit
59926be199
11
app/Main.hs
11
app/Main.hs
@ -9,6 +9,7 @@ import Range
|
|||||||
import Split
|
import Split
|
||||||
import Term
|
import Term
|
||||||
import Unified
|
import Unified
|
||||||
|
import Source
|
||||||
import Control.Comonad.Cofree
|
import Control.Comonad.Cofree
|
||||||
import qualified Data.ByteString.Char8 as B1
|
import qualified Data.ByteString.Char8 as B1
|
||||||
import qualified Data.ByteString.Lazy as B2
|
import qualified Data.ByteString.Lazy as B2
|
||||||
@ -33,8 +34,8 @@ main :: IO ()
|
|||||||
main = do
|
main = do
|
||||||
arguments <- execParser opts
|
arguments <- execParser opts
|
||||||
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
|
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
|
||||||
aContents <- readFile sourceAPath
|
aContents <- fromList <$> readFile sourceAPath
|
||||||
bContents <- readFile sourceBPath
|
bContents <- fromList <$> readFile sourceBPath
|
||||||
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
|
(aTerm, bTerm) <- let parse = (P.parserForType . takeExtension) sourceAPath in do
|
||||||
aTerm <- parse aContents
|
aTerm <- parse aContents
|
||||||
bTerm <- parse bContents
|
bTerm <- parse bContents
|
||||||
@ -56,11 +57,11 @@ main = do
|
|||||||
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
|
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
|
||||||
write rendered h = B2.hPut h rendered
|
write rendered h = B2.hPut h rendered
|
||||||
|
|
||||||
replaceLeavesWithWordBranches :: String -> Term String Info -> Term String Info
|
replaceLeavesWithWordBranches :: Source Char -> Term String Info -> Term String Info
|
||||||
replaceLeavesWithWordBranches source term = replaceIn source 0 term
|
replaceLeavesWithWordBranches source term = replaceIn source 0 term
|
||||||
where
|
where
|
||||||
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- substring (offsetRange (negate startIndex) range) source = info :< case syntax of
|
replaceIn source startIndex (info@(Info range categories) :< syntax) | substring <- slice (offsetRange (negate startIndex) range) source = info :< case syntax of
|
||||||
Leaf _ | ranges <- rangesAndWordsFrom (start range) substring, length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
|
Leaf _ | ranges <- rangesAndWordsFrom (start range) (toList substring), length ranges > 1 -> Indexed $ makeLeaf categories <$> ranges
|
||||||
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
|
Indexed i -> Indexed $ replaceIn substring (start range) <$> i
|
||||||
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
|
Fixed f -> Fixed $ replaceIn substring (start range) <$> f
|
||||||
Keyed k -> Keyed $ replaceIn substring (start range) <$> k
|
Keyed k -> Keyed $ replaceIn substring (start range) <$> k
|
||||||
|
@ -3,6 +3,7 @@ module Parsers where
|
|||||||
import Diff
|
import Diff
|
||||||
import Range
|
import Range
|
||||||
import Parser
|
import Parser
|
||||||
|
import Source hiding ((++))
|
||||||
import Syntax
|
import Syntax
|
||||||
import TreeSitter
|
import TreeSitter
|
||||||
import Control.Comonad.Cofree
|
import Control.Comonad.Cofree
|
||||||
@ -15,9 +16,9 @@ lineByLineParser :: Parser
|
|||||||
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
|
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
|
||||||
(leaves, _) -> leaves
|
(leaves, _) -> leaves
|
||||||
where
|
where
|
||||||
lines = Prelude.lines input
|
lines = actualLines input
|
||||||
root syntax = Info (Range 0 $ length input) mempty :< syntax
|
root syntax = Info (Range 0 $ length input) mempty :< syntax
|
||||||
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf line
|
leaf charIndex line = Info (Range charIndex $ charIndex + length line) mempty :< Leaf (Source.toList line)
|
||||||
annotateLeaves (accum, charIndex) line =
|
annotateLeaves (accum, charIndex) line =
|
||||||
(accum ++ [ leaf charIndex line ]
|
(accum ++ [ leaf charIndex line ]
|
||||||
, charIndex + length line + 1)
|
, charIndex + length line + 1)
|
||||||
|
@ -1,25 +1,25 @@
|
|||||||
module Parser where
|
module Parser where
|
||||||
|
|
||||||
import Diff
|
import Diff
|
||||||
import Range
|
|
||||||
import Syntax
|
import Syntax
|
||||||
import Term
|
import Term
|
||||||
import Control.Comonad.Cofree
|
import Control.Comonad.Cofree
|
||||||
import qualified OrderedMap as Map
|
import qualified OrderedMap as Map
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
|
import Source
|
||||||
|
|
||||||
type Parser = String -> IO (Term String Info)
|
type Parser = Source Char -> IO (Term String Info)
|
||||||
|
|
||||||
-- | Given a source string and a term’s annotation & production/child pairs, construct the term.
|
-- | Given a source string and a term’s annotation & production/child pairs, construct the term.
|
||||||
type Constructor = String -> Info -> [(String, Term String Info)] -> Term String Info
|
type Constructor = Source Char -> Info -> [(String, Term String Info)] -> Term String Info
|
||||||
|
|
||||||
-- | Given two sets of production names, produce a Constructor.
|
-- | Given two sets of production names, produce a Constructor.
|
||||||
constructorForProductions :: Set.Set String -> Set.Set String -> Constructor
|
constructorForProductions :: Set.Set String -> Set.Set String -> Constructor
|
||||||
constructorForProductions keyed fixed source info@(Info range categories) = (info :<) . construct
|
constructorForProductions keyed fixed source info@(Info range categories) = (info :<) . construct
|
||||||
where construct [] = Leaf (substring range source)
|
where construct [] = Leaf . toList $ slice range source
|
||||||
construct children | not . Set.null $ Set.intersection fixed categories = Fixed $ fmap snd children
|
construct children | not . Set.null $ Set.intersection fixed categories = Fixed $ fmap snd children
|
||||||
construct children | not . Set.null $ Set.intersection keyed categories = Keyed . Map.fromList $ assignKey <$> children
|
construct children | not . Set.null $ Set.intersection keyed categories = Keyed . Map.fromList $ assignKey <$> children
|
||||||
construct children = Indexed $ fmap snd children
|
construct children = Indexed $ fmap snd children
|
||||||
assignKey ("pair", node@(_ :< Fixed (key : _))) = (getSubstring key, node)
|
assignKey ("pair", node@(_ :< Fixed (key : _))) = (getSubstring key, node)
|
||||||
assignKey (_, node) = (getSubstring node, node)
|
assignKey (_, node) = (getSubstring node, node)
|
||||||
getSubstring (Info range _ :< _) = substring range source
|
getSubstring (Info range _ :< _) = toList $ slice range source
|
||||||
|
@ -35,3 +35,15 @@ break predicate (Source vector) = let (start, remainder) = Vector.break predicat
|
|||||||
|
|
||||||
(++) :: Source a -> Source a -> Source a
|
(++) :: Source a -> Source a -> Source a
|
||||||
(++) (Source a) = Source . (a Vector.++) . getVector
|
(++) (Source a) = Source . (a Vector.++) . getVector
|
||||||
|
|
||||||
|
actualLines :: Source Char -> [Source Char]
|
||||||
|
actualLines source | Source.null source = [ source ]
|
||||||
|
actualLines source = case Source.break (== '\n') source of
|
||||||
|
(l, lines') -> case uncons lines' of
|
||||||
|
Nothing -> [ l ]
|
||||||
|
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
|
||||||
|
|
||||||
|
-- | Compute the line ranges within a given range of a string.
|
||||||
|
actualLineRanges :: Range -> Source Char -> [Range]
|
||||||
|
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
|
||||||
|
where toRange previous string = Range (end previous) $ end previous + length string
|
||||||
|
21
src/Split.hs
21
src/Split.hs
@ -18,14 +18,13 @@ import Text.Blaze.Html.Renderer.Utf8
|
|||||||
import Data.Monoid
|
import Data.Monoid
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
import Source hiding ((++))
|
import Source hiding ((++))
|
||||||
import qualified Source ((++))
|
|
||||||
|
|
||||||
type ClassName = String
|
type ClassName = String
|
||||||
|
|
||||||
classifyMarkup :: Foldable f => f String -> Markup -> Markup
|
classifyMarkup :: Foldable f => f String -> Markup -> Markup
|
||||||
classifyMarkup categories element = maybe element ((element !) . A.class_ . stringValue . ("category-" ++)) $ maybeFirst categories
|
classifyMarkup categories element = maybe element ((element !) . A.class_ . stringValue . ("category-" ++)) $ maybeFirst categories
|
||||||
|
|
||||||
split :: Diff a Info -> String -> String -> IO ByteString
|
split :: Diff a Info -> Source Char -> Source Char -> IO ByteString
|
||||||
split diff before after = return . renderHtml
|
split diff before after = return . renderHtml
|
||||||
. docTypeHtml
|
. docTypeHtml
|
||||||
. ((head $ link ! A.rel (stringValue "stylesheet") ! A.href (stringValue "style.css")) <>)
|
. ((head $ link ! A.rel (stringValue "stylesheet") ! A.href (stringValue "style.css")) <>)
|
||||||
@ -34,7 +33,7 @@ split diff before after = return . renderHtml
|
|||||||
((colgroup $ (col ! A.width (stringValue . show $ columnWidth)) <> col <> (col ! A.width (stringValue . show $ columnWidth)) <> col) <>)
|
((colgroup $ (col ! A.width (stringValue . show $ columnWidth)) <> col <> (col ! A.width (stringValue . show $ columnWidth)) <> col) <>)
|
||||||
. mconcat $ numberedLinesToMarkup <$> reverse numbered
|
. mconcat $ numberedLinesToMarkup <$> reverse numbered
|
||||||
where
|
where
|
||||||
rows = fst (splitDiffByLines diff (0, 0) sources)
|
rows = fst (splitDiffByLines diff (0, 0) (before, after))
|
||||||
numbered = foldl numberRows [] rows
|
numbered = foldl numberRows [] rows
|
||||||
maxNumber = case numbered of
|
maxNumber = case numbered of
|
||||||
[] -> 0
|
[] -> 0
|
||||||
@ -47,14 +46,12 @@ split diff before after = return . renderHtml
|
|||||||
columnWidth = max (20 + digits maxNumber * 8) 40
|
columnWidth = max (20 + digits maxNumber * 8) 40
|
||||||
|
|
||||||
numberedLinesToMarkup :: (Int, Line (SplitDiff a Info), Int, Line (SplitDiff a Info)) -> Markup
|
numberedLinesToMarkup :: (Int, Line (SplitDiff a Info), Int, Line (SplitDiff a Info)) -> Markup
|
||||||
numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable (fst sources) left) <> toMarkup (or $ hasChanges <$> right, n, renderable (snd sources) right) <> string "\n"
|
numberedLinesToMarkup (m, left, n, right) = tr $ toMarkup (or $ hasChanges <$> left, m, renderable before left) <> toMarkup (or $ hasChanges <$> right, n, renderable after right) <> string "\n"
|
||||||
|
|
||||||
renderable source = fmap (Renderable . (,) source)
|
renderable source = fmap (Renderable . (,) source)
|
||||||
|
|
||||||
hasChanges diff = or $ const True <$> diff
|
hasChanges diff = or $ const True <$> diff
|
||||||
|
|
||||||
sources = (fromList before, fromList after)
|
|
||||||
|
|
||||||
numberRows :: [(Int, Line a, Int, Line a)] -> Row a -> [(Int, Line a, Int, Line a)]
|
numberRows :: [(Int, Line a, Int, Line a)] -> Row a -> [(Int, Line a, Int, Line a)]
|
||||||
numberRows [] (Row EmptyLine EmptyLine) = []
|
numberRows [] (Row EmptyLine EmptyLine) = []
|
||||||
numberRows [] (Row left@(Line _) EmptyLine) = [(1, left, 0, EmptyLine)]
|
numberRows [] (Row left@(Line _) EmptyLine) = [(1, left, 0, EmptyLine)]
|
||||||
@ -153,15 +150,3 @@ openDiff source diff@(Pure term) = const diff <$> openTerm source term
|
|||||||
|
|
||||||
zipWithDefaults :: (a -> b -> c) -> a -> b -> [a] -> [b] -> [c]
|
zipWithDefaults :: (a -> b -> c) -> a -> b -> [a] -> [b] -> [c]
|
||||||
zipWithDefaults f da db a b = take (max (length a) (length b)) $ zipWith f (a ++ repeat da) (b ++ repeat db)
|
zipWithDefaults f da db a b = take (max (length a) (length b)) $ zipWith f (a ++ repeat da) (b ++ repeat db)
|
||||||
|
|
||||||
actualLines :: Source Char -> [Source Char]
|
|
||||||
actualLines source | length source == 0 = [ source ]
|
|
||||||
actualLines source = case Source.break (== '\n') source of
|
|
||||||
(l, lines') -> case uncons lines' of
|
|
||||||
Nothing -> [ l ]
|
|
||||||
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
|
|
||||||
|
|
||||||
-- | Compute the line ranges within a given range of a string.
|
|
||||||
actualLineRanges :: Range -> Source Char -> [Range]
|
|
||||||
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
|
|
||||||
where toRange previous string = Range (end previous) $ end previous + length string
|
|
||||||
|
@ -3,6 +3,7 @@ module TreeSitter where
|
|||||||
import Diff
|
import Diff
|
||||||
import Range
|
import Range
|
||||||
import Parser
|
import Parser
|
||||||
|
import Source
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
import Foreign
|
import Foreign
|
||||||
import Foreign.C
|
import Foreign.C
|
||||||
@ -54,7 +55,7 @@ parseTreeSitterFile :: Language -> Parser
|
|||||||
parseTreeSitterFile (Language language constructor) contents = do
|
parseTreeSitterFile (Language language constructor) contents = do
|
||||||
document <- ts_document_make
|
document <- ts_document_make
|
||||||
ts_document_set_language document language
|
ts_document_set_language document language
|
||||||
withCString contents (\ source -> do
|
withCString (toList contents) (\ source -> do
|
||||||
ts_document_set_input_string document source
|
ts_document_set_input_string document source
|
||||||
ts_document_parse document
|
ts_document_parse document
|
||||||
term <- documentToTerm constructor document contents
|
term <- documentToTerm constructor document contents
|
||||||
|
@ -5,6 +5,7 @@ import Patch
|
|||||||
import Syntax
|
import Syntax
|
||||||
import Term
|
import Term
|
||||||
import Range
|
import Range
|
||||||
|
import Source hiding ((++))
|
||||||
import Control.Arrow
|
import Control.Arrow
|
||||||
import Control.Monad.Free
|
import Control.Monad.Free
|
||||||
import Control.Comonad.Cofree
|
import Control.Comonad.Cofree
|
||||||
@ -12,13 +13,13 @@ import Data.List hiding (foldl)
|
|||||||
import qualified OrderedMap as Map
|
import qualified OrderedMap as Map
|
||||||
import Rainbow
|
import Rainbow
|
||||||
|
|
||||||
unified :: Diff a Info -> String -> String -> IO ByteString
|
unified :: Diff a Info -> Source Char -> Source Char -> IO ByteString
|
||||||
unified diff before after = do
|
unified diff before after = do
|
||||||
renderer <- byteStringMakerFromEnvironment
|
renderer <- byteStringMakerFromEnvironment
|
||||||
return . mconcat . chunksToByteStrings renderer . fst $ iter g mapped where
|
return . mconcat . chunksToByteStrings renderer . fst $ iter g mapped where
|
||||||
mapped = fmap (unifiedPatch &&& range) diff
|
mapped = fmap (unifiedPatch &&& range) diff
|
||||||
g (Annotated (_, info) syntax) = annotationAndSyntaxToChunks after info syntax
|
g (Annotated (_, info) syntax) = annotationAndSyntaxToChunks after info syntax
|
||||||
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk $ substring range source, Just range)
|
annotationAndSyntaxToChunks source (Info range _) (Leaf _) = (pure . chunk . toList $ slice range source, Just range)
|
||||||
annotationAndSyntaxToChunks source (Info range _) (Indexed i) = (unifiedRange range i source, Just range)
|
annotationAndSyntaxToChunks source (Info range _) (Indexed i) = (unifiedRange range i source, Just range)
|
||||||
annotationAndSyntaxToChunks source (Info range _) (Fixed f) = (unifiedRange range f source, Just range)
|
annotationAndSyntaxToChunks source (Info range _) (Fixed f) = (unifiedRange range f source, Just range)
|
||||||
annotationAndSyntaxToChunks source (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) source, Just range)
|
annotationAndSyntaxToChunks source (Info range _) (Keyed k) = (unifiedRange range (sort $ snd <$> Map.toList k) source, Just range)
|
||||||
@ -28,13 +29,13 @@ unified diff before after = do
|
|||||||
beforeChunk = maybe [] (change "-" . unifiedTerm before) $ Patch.before patch
|
beforeChunk = maybe [] (change "-" . unifiedTerm before) $ Patch.before patch
|
||||||
afterChunk = maybe [] (change "+" . unifiedTerm after) $ Patch.after patch
|
afterChunk = maybe [] (change "+" . unifiedTerm after) $ Patch.after patch
|
||||||
|
|
||||||
unifiedTerm :: String -> Term a Info -> [Chunk String]
|
unifiedTerm :: Source Char -> Term a Info -> [Chunk String]
|
||||||
unifiedTerm source term = fst $ cata (annotationAndSyntaxToChunks source) term
|
unifiedTerm source term = fst $ cata (annotationAndSyntaxToChunks source) term
|
||||||
|
|
||||||
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> String -> [Chunk String]
|
unifiedRange :: Range -> [([Chunk String], Maybe Range)] -> Source Char -> [Chunk String]
|
||||||
unifiedRange range children source = out <> (pure . chunk $ substring Range { start = previous, end = end range } source) where
|
unifiedRange range children source = out <> (pure . chunk . toList $ slice Range { start = previous, end = end range } source) where
|
||||||
(out, previous) = foldl' accumulateContext ([], start range) children
|
(out, previous) = foldl' accumulateContext ([], start range) children
|
||||||
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk $ substring Range { start = previous, end = start range } source, child ], end range)
|
accumulateContext (out, previous) (child, Just range) = (mconcat [ out, pure . chunk . toList $ slice Range { start = previous, end = start range } source, child ], end range)
|
||||||
accumulateContext (out, previous) (child, _) = (out <> child, previous)
|
accumulateContext (out, previous) (child, _) = (out <> child, previous)
|
||||||
|
|
||||||
range :: Patch (Term a Info) -> Maybe Range
|
range :: Patch (Term a Info) -> Maybe Range
|
||||||
|
Loading…
Reference in New Issue
Block a user