1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 00:42:33 +03:00

diffFiles :: Parser -> Renderer T.Text b -> (Source Char, Source Char) -> IO b

This commit is contained in:
Matt Diephouse 2016-02-16 16:09:58 -05:00
parent ac053940fe
commit 5127376e68
6 changed files with 93 additions and 89 deletions

View File

@ -1,16 +1,9 @@
module DiffOutput where
import Source
import Term
import Control.Comonad.Cofree
import qualified Data.Text as T
import Diff
import Syntax
import Range
import Renderer
import Diffing
import qualified Data.ByteString.Char8 as B1
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import Parser
import Source
import Split
import Unified
import System.Directory
@ -18,7 +11,6 @@ import System.FilePath
import qualified System.IO as IO
import qualified Data.Text.Lazy.IO as TextIO
import qualified PatchOutput
import qualified Parsers as P
import Rainbow
-- | The available types of diff rendering.
@ -26,40 +18,15 @@ data Format = Unified | Split | Patch
data DiffArguments = DiffArguments { format :: Format, output :: Maybe FilePath, outputPath :: FilePath }
parserForFilepath :: FilePath -> P.Parser
parserForFilepath = P.parserForType . T.pack . takeExtension
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
breakDownLeavesByWord source = cata replaceIn
where
replaceIn info@(Info range categories) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = info :< (Indexed $ makeLeaf categories <$> ranges)
replaceIn info syntax = info :< syntax
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (Source.toList $ slice range source)
makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring)
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
return $ Convert.toUnicode converter text
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
-- | Return a renderer from the command-line arguments that will print the diff.
printDiff :: DiffArguments -> Renderer T.Text (IO ())
printDiff arguments diff sources = case format arguments of
Unified -> put $ unified diff sources
printDiff :: Parser -> DiffArguments -> (Source Char, Source Char) -> IO ()
printDiff parser arguments sources = case format arguments of
Unified -> put =<< diffFiles parser unified sources
where
put chunks = do
renderer <- byteStringMakerFromEnvironment
B1.putStr $ mconcat $ chunksToByteStrings renderer chunks
Split -> put (output arguments) $ split diff sources
Split -> put (output arguments) =<< diffFiles parser split sources
where
put Nothing rendered = TextIO.putStr rendered
put (Just path) rendered = do
@ -68,4 +35,4 @@ printDiff arguments diff sources = case format arguments of
then path </> (takeFileName outputPath -<.> ".html")
else path
IO.withFile outputPath IO.WriteMode (flip TextIO.hPutStr rendered)
Patch -> putStr $ PatchOutput.patch diff sources
Patch -> putStr =<< diffFiles parser PatchOutput.patch sources

View File

@ -1,9 +1,9 @@
{-# LANGUAGE RecordWildCards #-}
module Main where
import Interpreter
import Options.Applicative
import Data.Bifunctor.Join
import Diffing
import Options.Applicative
import qualified DiffOutput as DO
data Arguments = Arguments { format :: DO.Format, output :: Maybe FilePath, sourceA :: FilePath, sourceB :: FilePath }
@ -20,12 +20,8 @@ arguments = Arguments
main :: IO ()
main = do
arguments <- execParser opts
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
sources <- sequence $ DO.readAndTranscodeFile <$> Join (sourceAPath, sourceBPath)
let parse = DO.parserForFilepath sourceAPath
terms <- sequence $ parse <$> sources
let replaceLeaves = DO.breakDownLeavesByWord <$> sources
DO.printDiff (args arguments) (uncurry diffTerms $ runJoin $ replaceLeaves <*> terms) (runJoin sources)
sources <- sequence $ readAndTranscodeFile <$> Join (sourceA arguments, sourceB arguments)
DO.printDiff (parserForFilepath $ sourceA arguments) (args arguments) (runJoin sources)
where opts = info (helper <*> arguments)
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
args Arguments{..} = DO.DiffArguments { format = format, output = output, outputPath = sourceA }

View File

@ -1,32 +0,0 @@
module Parsers (parserForType, Parser, lineByLineParser) where
import Diff
import Language
import Range
import Parser
import Source hiding ((++))
import Syntax
import TreeSitter
import Control.Comonad.Cofree
import qualified Data.Text as T
import Data.Foldable
-- | Return a parser based on the file extension (including the ".").
parserForType :: T.Text -> Parser
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
_ -> lineByLineParser
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> leaves
where
lines = actualLines input
root syntax = Info (Range 0 $ length input) mempty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) mempty :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex (toText line) ]
, charIndex + length line)
toText = T.pack . Source.toString

View File

@ -1,7 +1,7 @@
{-# LANGUAGE RecordWildCards #-}
module Main where
import Interpreter
import Diffing
import Source
import Options.Applicative
import qualified Data.ByteString.Char8 as B1
@ -36,10 +36,7 @@ main = do
let shas = Join (shaA, shaB)
forM_ filepaths $ \filepath -> do
sources <- sequence $ fetchFromGitRepo gitDir filepath <$> shas
let parse = DO.parserForFilepath filepath
terms <- sequence $ parse <$> sources
let replaceLeaves = DO.breakDownLeavesByWord <$> sources
DO.printDiff (args arguments filepath) (uncurry diffTerms $ runJoin $ replaceLeaves <*> terms) (runJoin sources)
DO.printDiff (parserForFilepath filepath) (args arguments filepath) (runJoin sources)
where opts = info (helper <*> arguments)
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
args Arguments{..} filepath = DO.DiffArguments { format = format, output = output, outputPath = filepath }
@ -60,4 +57,4 @@ fetchFromGitRepo repoPath path sha = join $ withRepository lgFactory repoPath $
blob <- lookupBlob blobEntryOid
let (BlobString s) = blobContents blob
return s
return $ DO.transcode bytestring
return $ transcode bytestring

View File

@ -17,6 +17,7 @@ library
, Syntax
, Operation
, Algorithm
, Diffing
, Interpreter
, Language
, Line
@ -35,15 +36,18 @@ library
, TreeSitter
, Source
build-depends: base >= 4.8 && < 5
, bifunctors
, blaze-html
, bytestring
, c-storable-deriving
, containers
, filepath
, free
, mtl
, rainbow
, semigroups
, text >= 1.2.1.3
, text-icu
, tree-sitter-parsers
, vector
default-language: Haskell2010
@ -53,7 +57,7 @@ library
executable semantic-diff-exe
hs-source-dirs: app
main-is: Main.hs
other-modules: Parsers
other-modules: DiffOutput
if os(darwin)
ghc-options: -threaded -rtsopts -with-rtsopts=-N -static
else
@ -88,7 +92,7 @@ executable semantic-diff-exe
executable semantic-diff
hs-source-dirs: app
main-is: SemanticDiff.hs
other-modules: Parsers
other-modules: DiffOutput
if os(darwin)
ghc-options: -threaded -rtsopts -with-rtsopts=-N -static
else

72
src/Diffing.hs Normal file
View File

@ -0,0 +1,72 @@
module Diffing where
import Diff
import Interpreter
import Language
import Parser
import Range
import Renderer
import Source hiding ((++))
import Syntax
import Term
import TreeSitter
import Control.Comonad.Cofree
import Data.Bifunctor.Join
import qualified Data.ByteString.Char8 as B1
import Data.Foldable
import qualified Data.Text as T
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import System.FilePath
-- | Return a parser based on the file extension (including the ".").
parserForType :: T.Text -> Parser
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
_ -> lineByLineParser
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> leaves
where
lines = actualLines input
root syntax = Info (Range 0 $ length input) mempty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) mempty :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex (toText line) ]
, charIndex + length line)
toText = T.pack . Source.toString
parserForFilepath :: FilePath -> Parser
parserForFilepath = parserForType . T.pack . takeExtension
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
breakDownLeavesByWord source = cata replaceIn
where
replaceIn info@(Info range categories) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = info :< (Indexed $ makeLeaf categories <$> ranges)
replaceIn info syntax = info :< syntax
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (Source.toList $ slice range source)
makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring)
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
return $ Convert.toUnicode converter text
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
diffFiles :: Parser -> Renderer T.Text b -> (Source Char, Source Char) -> IO b
diffFiles parser renderer sources = do
terms <- sequence $ parser <$> Join sources
let replaceLeaves = breakDownLeavesByWord <$> Join sources
return $ renderer (uncurry diffTerms $ runJoin $ replaceLeaves <*> terms) sources