1
1
mirror of https://github.com/github/semantic.git synced 2025-01-03 21:16:12 +03:00

Merge pull request #1036 from github/bytestring-sexpression-output

Use ByteString for parse output (sexpression, json)
This commit is contained in:
Timothy Clem 2017-03-09 09:49:37 -08:00 committed by GitHub
commit 328ee8c2d9
13 changed files with 90 additions and 83 deletions

View File

@ -158,6 +158,7 @@ test-suite test
, array
, base
, bifunctors
, bytestring
, deepseq
, filepath
, gitlib

View File

@ -42,7 +42,7 @@ data Arguments = Arguments
, alternateObjectDirs :: [Text]
, format :: R.Format
, timeoutInMicroseconds :: Int
, output :: Maybe FilePath
, outputPath :: Maybe FilePath
, diffMode :: DiffMode
, runMode :: RunMode
, shaRange :: Both (Maybe String)
@ -56,7 +56,7 @@ programArguments CmdLineOptions{..} = do
pwd <- getCurrentDirectory
gitDir <- fromMaybe pwd <$> lookupEnv "GIT_DIR"
eitherObjectDirs <- try $ parseObjectDirs . toS <$> getEnv "GIT_ALTERNATE_OBJECT_DIRECTORIES"
output <- getOutputPath outputFilePath
outputPath <- getOutputPath outputFilePath
let alternateObjectDirs = case (eitherObjectDirs :: Either IOError [Text]) of
(Left _) -> []
(Right objectDirs) -> objectDirs
@ -67,7 +67,7 @@ programArguments CmdLineOptions{..} = do
, alternateObjectDirs = alternateObjectDirs
, format = outputFormat
, timeoutInMicroseconds = maybe defaultTimeout toMicroseconds maybeTimeout
, output = output
, outputPath = outputPath
, diffMode = case (noIndex, filePaths) of
(True, [fileA, fileB]) -> PathDiff (both fileA fileB)
(_, _) -> CommitDiff
@ -100,7 +100,7 @@ args gitDir sha1 sha2 filePaths format = Arguments
, alternateObjectDirs = []
, format = format
, timeoutInMicroseconds = defaultTimeout
, output = Nothing
, outputPath = Nothing
, diffMode = CommitDiff
, runMode = Diff
, shaRange = Just <$> both sha1 sha2
@ -114,7 +114,7 @@ diffPathsArgs gitDir paths format = Arguments
, alternateObjectDirs = []
, format = format
, timeoutInMicroseconds = defaultTimeout
, output = Nothing
, outputPath = Nothing
, diffMode = PathDiff paths
, runMode = Diff
, shaRange = both Nothing Nothing
@ -128,7 +128,7 @@ parseArgs filePaths format = Arguments
, alternateObjectDirs = []
, format = format
, timeoutInMicroseconds = defaultTimeout
, output = Nothing
, outputPath = Nothing
, diffMode = CommitDiff
, runMode = Parse
, shaRange = both Nothing Nothing

View File

@ -5,6 +5,7 @@ module Category where
import Prologue
import Data.Functor.Listable
import Data.Text (pack)
import Data.Text.Encoding (encodeUtf8)
-- | A standardized category of AST node. Used to determine the semantics for
-- | semantic diffing and define comparability of nodes.
@ -238,6 +239,9 @@ instance Hashable Category
instance (StringConv Category Text) where
strConv _ = pack . show
instance (StringConv Category ByteString) where
strConv _ = encodeUtf8 . show
instance Listable Category where
tiers = cons0 Program
\/ cons0 ParseError

View File

@ -5,6 +5,7 @@ import Data.Aeson hiding (json)
import Data.Functor.Both as Both
import Data.List ((\\))
import Data.String
import Data.Text.Encoding (encodeUtf8)
import GHC.Conc (numCapabilities)
import Prologue hiding (fst, snd, null)
import qualified Control.Concurrent.Async.Pool as Async
@ -36,13 +37,13 @@ import Source
import Syntax
import Term
diff :: Arguments -> IO Text
diff :: Arguments -> IO ByteString
diff args@Arguments{..} = case diffMode of
PathDiff paths -> diffPaths args paths
CommitDiff -> diffCommits args
-- | Compare changes between two commits.
diffCommits :: Arguments -> IO Text
diffCommits :: Arguments -> IO ByteString
diffCommits args@Arguments{..} = do
ts <- fetchTerms args
pure $ maybe mempty concatOutputs ts
@ -51,7 +52,7 @@ diffCommits args@Arguments{..} = do
else timeout timeoutInMicroseconds (fetchDiffs args)
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
diffPaths :: Arguments -> Both FilePath -> IO Text
diffPaths :: Arguments -> Both FilePath -> IO ByteString
diffPaths args@Arguments{..} paths = do
sources <- traverse readAndTranscodeFile paths
let sourceBlobs = SourceBlob <$> sources <*> pure mempty <*> paths <*> pure (Just defaultPlainBlob)
@ -188,15 +189,13 @@ truncatedDiff Arguments{..} sources = pure $ case format of
TOC -> TOCOutput mempty
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
printDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO Text
printDiff :: (ToJSON (Record fields), DefaultFields fields) => Parser (Syntax Text) (Record fields) -> Arguments -> Both SourceBlob -> IO ByteString
printDiff parser arguments sources = do
rendered <- textDiff parser arguments sources
pure $ case rendered of
SplitOutput text -> text
PatchOutput text -> text
SplitOutput text -> encodeUtf8 text
PatchOutput text -> encodeUtf8 text
SExpressionOutput text -> text
JSONOutput series -> encodingToText (toJSON series)
SummaryOutput summaries -> encodingToText (toJSON summaries)
TOCOutput summaries -> encodingToText (toJSON summaries)
where
encodingToText = toS . encode
JSONOutput series -> toS $ encode series
SummaryOutput summaries -> toS $ encode summaries
TOCOutput summaries -> toS $ encode summaries

View File

@ -3,10 +3,10 @@ module ParseCommand where
import Arguments
import Category
import Data.Aeson (ToJSON)
import Data.Aeson.Encode.Pretty
import Data.Aeson (ToJSON, encode)
import Data.Record
import qualified Data.Text as T
import qualified Data.ByteString as B
import Info
import Language
import Language.Markdown
@ -32,18 +32,17 @@ data ParseJSON = ParseJSON
, children :: [ParseJSON]
} deriving (Show, Generic, ToJSON)
parse :: Arguments -> IO Text
parse :: Arguments -> IO ByteString
parse Arguments{..} = do
sources <- traverse readAndTranscodeFile filePaths
terms <- zipWithM (\parser sourceBlob -> parser sourceBlob) parsers (sourceBlobs sources)
pure . T.intercalate "\n" $ case format of
SExpression -> [foldr (\t acc -> printTerm t 0 TreeOnly <> acc) "" terms]
_ -> toS . encodePretty . cata algebra <$> terms
pure $! toByteString terms
where
sourceBlobs sources = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob)
parsers = parserWithSource <$> filePaths
toByteString terms = case format of
SExpression -> printTerms TreeOnly terms
_ -> B.intercalate "\n" (toS . encode . cata algebra <$> terms) <> "\n"
algebra :: TermF (Syntax leaf) (Record '[SourceText, Range, Category, SourceSpan]) ParseJSON -> ParseJSON
algebra term = case term of

View File

@ -1,10 +1,10 @@
module Renderer (Renderer, Output(..), concatOutputs, toSummaryKey, Format(..)) where
import Data.Aeson (Value, toEncoding)
import Data.Aeson.Encoding (encodingToLazyByteString)
import Data.Aeson (Value, encode)
import Data.Functor.Both
import Data.Map as Map hiding (null)
import Data.Text as T (intercalate)
import Data.Text.Encoding (encodeUtf8)
import qualified Data.ByteString as B
import Data.Functor.Listable
import Prologue
import Source (SourceBlob)
@ -18,7 +18,7 @@ type Renderer annotation = Both SourceBlob -> Diff (Syntax Text) annotation -> O
data Format = Split | Patch | JSON | Summary | SExpression | TOC
deriving (Show)
data Output = SplitOutput Text | PatchOutput Text | JSONOutput (Map Text Value) | SummaryOutput (Map Text (Map Text [Value])) | SExpressionOutput Text | TOCOutput (Map Text (Map Text [Value]))
data Output = SplitOutput Text | PatchOutput Text | JSONOutput (Map Text Value) | SummaryOutput (Map Text (Map Text [Value])) | SExpressionOutput ByteString | TOCOutput (Map Text (Map Text [Value]))
deriving (Show)
-- Returns a key representing the filename. If the filenames are different,
@ -37,19 +37,20 @@ toSummaryKey = runBothWith $ \before after ->
-- For Summaries, each file output is merged into one 'Object' consisting of lists of
-- changes and errors.
-- Split and Patch output is appended together with newlines.
concatOutputs :: [Output] -> Text
concatOutputs list | isJSON list = toS . encodingToLazyByteString . toEncoding $ concatJSON list
concatOutputs :: [Output] -> ByteString
concatOutputs list | isJSON list = toS . encode $ concatJSON list
where
concatJSON :: [Output] -> Map Text Value
concatJSON (JSONOutput hash : rest) = Map.union hash (concatJSON rest)
concatJSON _ = mempty
concatOutputs list | isSummary list = toS . encodingToLazyByteString . toEncoding $ concatSummaries list
concatOutputs list | isSummary list = toS . encode $ concatSummaries list
where
concatSummaries :: [Output] -> Map Text (Map Text [Value])
concatSummaries (SummaryOutput hash : rest) = Map.unionWith (Map.unionWith (<>)) hash (concatSummaries rest)
concatSummaries (TOCOutput hash : rest) = Map.unionWith (Map.unionWith (<>)) hash (concatSummaries rest)
concatSummaries _ = mempty
concatOutputs list | isText list = T.intercalate "\n" (toText <$> list)
concatOutputs list | isByteString list = B.intercalate "\n" (toByteString <$> list)
concatOutputs list | isText list = B.intercalate "\n" (encodeUtf8 . toText <$> list)
concatOutputs _ = mempty
isJSON :: [Output] -> Bool
@ -64,15 +65,22 @@ isSummary _ = False
isText :: [Output] -> Bool
isText (SplitOutput _ : _) = True
isText (PatchOutput _ : _) = True
isText (SExpressionOutput _ : _) = True
isText _ = False
toText :: Output -> Text
toText (SplitOutput text) = text
toText (PatchOutput text) = text
toText (SExpressionOutput text) = text
toText _ = mempty
isByteString :: [Output] -> Bool
isByteString (SExpressionOutput _ : _) = True
isByteString _ = False
toByteString :: Output -> ByteString
toByteString (SExpressionOutput text) = text
toByteString _ = B.empty
instance Listable Format where
tiers = cons0 Split
\/ cons0 Patch

View File

@ -39,7 +39,7 @@ instance (ToJSON leaf, ToJSON (Record fields), HasField fields Category, HasFiel
instance ToJSON Category where
toJSON (Other s) = String s
toJSON s = String . T.pack $ show s
toJSON s = String (toS s)
instance ToJSON Range where
toJSON (Range start end) = A.Array . Vector.fromList $ toJSON <$> [ start, end ]

View File

@ -1,12 +1,10 @@
{-# LANGUAGE RankNTypes, ScopedTypeVariables #-}
module Renderer.SExpression (sExpression, printTerm, SExpressionFormat(..)) where
{-# LANGUAGE RankNTypes, ScopedTypeVariables, OverloadedStrings #-}
module Renderer.SExpression (sExpression, printTerm, printTerms, SExpressionFormat(..)) where
import Data.Bifunctor.Join
import Data.Foldable
import Data.Record
import Data.Text hiding (foldr, replicate)
import Prologue hiding (toList, intercalate)
import Data.ByteString hiding (foldr, spanEnd)
import Prologue hiding (replicate, encodeUtf8)
import Category as C
import Diff
import Renderer
@ -18,9 +16,9 @@ import Term
data SExpressionFormat = TreeOnly | TreeAndRanges
sExpression :: (HasField fields Category, HasField fields SourceSpan) => SExpressionFormat -> Renderer (Record fields)
sExpression format _ diff = SExpressionOutput $ printDiff diff 0 format
sExpression format _ diff = SExpressionOutput $ printDiff diff 0 format <> "\n"
printDiff :: (HasField fields Category, HasField fields SourceSpan) => Diff (Syntax Text) (Record fields) -> Int -> SExpressionFormat -> Text
printDiff :: (HasField fields Category, HasField fields SourceSpan) => Diff (Syntax Text) (Record fields) -> Int -> SExpressionFormat -> ByteString
printDiff diff level format = case runFree diff of
(Pure patch) -> case patch of
Insert term -> pad (level - 1) <> "{+" <> printTerm term level format <> "+}"
@ -28,21 +26,28 @@ printDiff diff level format = case runFree diff of
Replace a b -> pad (level - 1) <> "{ " <> printTerm a level format <> pad (level - 1) <> "->" <> printTerm b level format <> " }"
(Free (Join (_, annotation) :< syntax)) -> pad' level <> "(" <> showAnnotation annotation format <> foldr (\d acc -> printDiff d (level + 1) format <> acc) "" syntax <> ")"
where
pad' :: Int -> ByteString
pad' n = if n < 1 then "" else pad n
pad :: Int -> ByteString
pad n | n < 0 = ""
| n < 1 = "\n"
| otherwise = "\n" <> mconcat (replicate n " ")
| otherwise = "\n" <> replicate (2 * n) space
printTerm :: (HasField fields Category, HasField fields SourceSpan) => Term (Syntax t) (Record fields) -> Int -> SExpressionFormat -> Text
printTerms :: (HasField fields Category, HasField fields SourceSpan) => SExpressionFormat -> [Term (Syntax t) (Record fields)] -> ByteString
printTerms format terms = foldr (\t acc -> printTerm t 0 format <> acc) "" terms <> "\n"
printTerm :: (HasField fields Category, HasField fields SourceSpan) => Term (Syntax t) (Record fields) -> Int -> SExpressionFormat -> ByteString
printTerm term level format = go term level 0
where
pad :: Int -> Int -> ByteString
pad p n | n < 1 = ""
| otherwise = "\n" <> mconcat (replicate (p + n) " ")
| otherwise = "\n" <> replicate (2 * (p + n)) space
go :: (HasField fields Category, HasField fields SourceSpan) => Term (Syntax t) (Record fields) -> Int -> Int -> ByteString
go term parentLevel level = case runCofree term of
(annotation :< Leaf _) -> pad parentLevel level <> "(" <> showAnnotation annotation format <> ")"
(annotation :< syntax) -> pad parentLevel level <> "(" <> showAnnotation annotation format <> foldr (\t acc -> go t parentLevel (level + 1) <> acc) "" syntax <> ")"
showAnnotation :: (HasField fields Category, HasField fields SourceSpan) => Record fields -> SExpressionFormat -> Text
showAnnotation :: (HasField fields Category, HasField fields SourceSpan) => Record fields -> SExpressionFormat -> ByteString
showAnnotation annotation TreeOnly = categoryName annotation
showAnnotation annotation TreeAndRanges = categoryName annotation <> " " <> showSourceSpan annotation
where
@ -51,5 +56,8 @@ showAnnotation annotation TreeAndRanges = categoryName annotation <> " " <> show
end = showPoint . spanEnd . getField
showPoint SourcePos{..} = "[" <> show line <> ", " <> show column <> "]"
categoryName :: HasField fields Category => Record fields -> Text
categoryName :: HasField fields Category => Record fields -> ByteString
categoryName = toS . category
space :: Word8
space = fromIntegral $ ord ' '

View File

@ -13,9 +13,7 @@ import qualified Renderer as R
import Development.GitRev
import DiffCommand
import ParseCommand
import qualified Data.Text.IO as TextIO
import System.IO
import System.Environment (lookupEnv)
import qualified Data.ByteString as B
main :: IO ()
main = do
@ -23,7 +21,7 @@ main = do
text <- case runMode of
Diff -> diff args
Parse -> parse args
writeToOutput output text
writeToOutput outputPath text
-- | A parser for the application's command-line arguments.
argumentsParser :: ParserInfo CmdLineOptions
@ -59,17 +57,5 @@ versionString = "semantic-diff version " <> showVersion Library.version <> " ("
version :: Parser (a -> a)
version = infoOption versionString (long "version" <> short 'V' <> help "output the version of the program")
writeToOutput :: Maybe FilePath -> Text -> IO ()
writeToOutput output text = case output of
Nothing -> do
setEncoding
TextIO.hPutStrLn stdout text
Just path -> withFile path WriteMode (`TextIO.hPutStr` text)
where
setEncoding = do
lang <- lookupEnv "LANG"
case lang of
-- If LANG is set and isn't the empty string, leave the encoding.
Just x | x /= "" -> pure ()
-- Otherwise default to utf8.
_ -> hSetEncoding stdout utf8
writeToOutput :: Maybe FilePath -> ByteString -> IO ()
writeToOutput = maybe B.putStr B.writeFile

View File

@ -36,11 +36,11 @@ readAndTranscodeFile path = do
_ -> B.readFile path
transcode text
-- From https://github.com/haskell/bytestring/pull/79/files
-- Based on https://github.com/haskell/bytestring/pull/79/files
fileSize :: FilePath -> IO Integer
fileSize f = withBinaryFile f ReadMode $ \h -> do
-- hFileSize fails if file is not regular file (like /dev/null). Catch
-- exception and try reading anyway.
-- exception and return 0 in that case.
filesz <- catch (hFileSize h) useZeroIfNotRegularFile
pure $ fromIntegral filesz `max` 0
where useZeroIfNotRegularFile :: IOException -> IO Integer

View File

@ -9,8 +9,8 @@ import Prologue (($), fmap, (.), pure, for, panic)
import Test.Hspec hiding (shouldBe, shouldNotBe, shouldThrow, errorCall)
import Test.Hspec.Expectations.Pretty
import Test.Hspec.LeanCheck
import Data.Text.Lazy.Encoding as E
import Data.Text.Lazy as T
import qualified Data.ByteString.Lazy as BL
import Data.Map
import qualified Data.Vector as V
import Arguments
@ -58,7 +58,7 @@ spec = parallel $ do
fetchDiffsOutput :: (Object -> Text) -> Arguments -> IO (Maybe (Map Text Value), Maybe (Map Text [Text]))
fetchDiffsOutput f arguments = do
diffs <- fetchDiffs arguments
let json = fromJust . decode . E.encodeUtf8 $ T.fromChunks [concatOutputs diffs]
let json = fromJust . decode . BL.fromStrict $ concatOutputs diffs
pure (errors json, summaries f json)
-- Diff Summaries payloads look like this:

View File

@ -1,10 +1,12 @@
{-# LANGUAGE DataKinds, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DataKinds, GeneralizedNewtypeDeriving, OverloadedStrings #-}
module IntegrationSpec where
import Category as C
import Data.Functor.Both
import Data.Record
import qualified Data.Text as T
import qualified Data.ByteString as B
import Data.Text.Encoding (decodeUtf8)
import GHC.Show (Show(..))
import Data.List (union, concat, transpose)
import Info
@ -109,7 +111,7 @@ testParse path expectedOutput = do
let blob = sourceBlob source path
term <- parserWithSource path blob
let actual = (Verbatim . stripWhitespace) $ printTerm term 0 TreeOnly
expected <- (Verbatim . stripWhitespace) <$> readFile expectedOutput
expected <- (Verbatim . stripWhitespace) <$> B.readFile expectedOutput
actual `shouldBe` expected
testDiff :: Renderer (Record '[Range, Category, SourceSpan]) -> Both FilePath -> FilePath -> Expectation
@ -117,7 +119,7 @@ testDiff renderer paths diff = do
sources <- traverse readAndTranscodeFile' paths
diff' <- diffFiles parser renderer (sourceBlobs sources)
let actual = (Verbatim . stripWhitespace. concatOutputs . pure) diff'
expected <- (Verbatim . stripWhitespace) <$> readFile diff
expected <- (Verbatim . stripWhitespace) <$> B.readFile diff
actual `shouldBe` expected
where
parser = parserForFilepath filePath
@ -126,14 +128,14 @@ testDiff renderer paths diff = do
| otherwise = readAndTranscodeFile path
filePath = if fst paths /= "" then fst paths else snd paths
stripWhitespace :: Text -> Text
stripWhitespace = T.foldl' go T.empty
where go acc x | x `elem` [' ', '\t', '\n'] = acc
| otherwise = T.snoc acc x
stripWhitespace :: ByteString -> ByteString
stripWhitespace = B.foldl' go B.empty
where go acc x | x `B.elem` " \t\n" = acc
| otherwise = B.snoc acc x
-- | A wrapper around `Text` with a more readable `Show` instance.
newtype Verbatim = Verbatim Text
-- | A wrapper around `ByteString` with a more readable `Show` instance.
newtype Verbatim = Verbatim ByteString
deriving (Eq, NFData)
instance Show Verbatim where
showsPrec _ (Verbatim text) = ('\n':) . (T.unpack text ++)
showsPrec _ (Verbatim byteString) = ('\n':) . (T.unpack (decodeUtf8 byteString) ++)

View File

@ -112,13 +112,13 @@ spec = parallel $ do
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.B.rb")
let parser = parserForFilepath (path (fst sourceBlobs))
output <- diffFiles parser toc sourceBlobs
concatOutputs (pure output) `shouldBe` ("{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}" :: Text)
concatOutputs (pure output) `shouldBe` ("{\"changes\":{\"ruby/methods.A.rb -> ruby/methods.B.rb\":[{\"span\":{\"start\":[1,1],\"end\":[2,4]},\"category\":\"Method\",\"term\":\"self.foo\",\"changeType\":\"added\"},{\"span\":{\"start\":[4,1],\"end\":[6,4]},\"category\":\"Method\",\"term\":\"bar\",\"changeType\":\"modified\"},{\"span\":{\"start\":[4,1],\"end\":[5,4]},\"category\":\"Method\",\"term\":\"baz\",\"changeType\":\"removed\"}]},\"errors\":{}}" )
it "encodes to final JSON if there are parse errors" $ do
sourceBlobs <- blobsForPaths (both "ruby/methods.A.rb" "ruby/methods.X.rb")
let parser = parserForFilepath (path (fst sourceBlobs))
output <- diffFiles parser toc sourceBlobs
concatOutputs (pure output) `shouldBe` ("{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}" :: Text)
concatOutputs (pure output) `shouldBe` ("{\"changes\":{},\"errors\":{\"ruby/methods.A.rb -> ruby/methods.X.rb\":[{\"span\":{\"start\":[1,1],\"end\":[3,1]},\"error\":\"def bar\\nen\\n\"}]}}" )
type Diff' = SyntaxDiff String '[Range, Category, SourceSpan]
type Term' = SyntaxTerm String '[Range, Category, SourceSpan]