From 5127376e68757b456e15c24e20ab52789e5fe74b Mon Sep 17 00:00:00 2001 From: Matt Diephouse Date: Tue, 16 Feb 2016 16:09:58 -0500 Subject: [PATCH] diffFiles :: Parser -> Renderer T.Text b -> (Source Char, Source Char) -> IO b --- app/DiffOutput.hs | 49 +++++------------------------- app/Main.hs | 12 +++----- app/Parsers.hs | 32 -------------------- app/SemanticDiff.hs | 9 ++---- semantic-diff.cabal | 8 +++-- src/Diffing.hs | 72 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 93 insertions(+), 89 deletions(-) delete mode 100644 app/Parsers.hs create mode 100644 src/Diffing.hs diff --git a/app/DiffOutput.hs b/app/DiffOutput.hs index 4f229f598..c199dd988 100644 --- a/app/DiffOutput.hs +++ b/app/DiffOutput.hs @@ -1,16 +1,9 @@ module DiffOutput where -import Source -import Term -import Control.Comonad.Cofree -import qualified Data.Text as T -import Diff -import Syntax -import Range -import Renderer +import Diffing import qualified Data.ByteString.Char8 as B1 -import qualified Data.Text.ICU.Detect as Detect -import qualified Data.Text.ICU.Convert as Convert +import Parser +import Source import Split import Unified import System.Directory @@ -18,7 +11,6 @@ import System.FilePath import qualified System.IO as IO import qualified Data.Text.Lazy.IO as TextIO import qualified PatchOutput -import qualified Parsers as P import Rainbow -- | The available types of diff rendering. @@ -26,40 +18,15 @@ data Format = Unified | Split | Patch data DiffArguments = DiffArguments { format :: Format, output :: Maybe FilePath, outputPath :: FilePath } -parserForFilepath :: FilePath -> P.Parser -parserForFilepath = P.parserForType . T.pack . takeExtension - --- | Replace every string leaf with leaves of the words in the string. -breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info -breakDownLeavesByWord source = cata replaceIn - where - replaceIn info@(Info range categories) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = info :< (Indexed $ makeLeaf categories <$> ranges) - replaceIn info syntax = info :< syntax - rangesAndWordsInSource range = rangesAndWordsFrom (start range) (Source.toList $ slice range source) - makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring) - --- | Transcode a file to a unicode source. -transcode :: B1.ByteString -> IO (Source Char) -transcode text = fromText <$> do - match <- Detect.detectCharset text - converter <- Convert.open match Nothing - return $ Convert.toUnicode converter text - --- | Read the file and convert it to Unicode. -readAndTranscodeFile :: FilePath -> IO (Source Char) -readAndTranscodeFile path = do - text <- B1.readFile path - transcode text - -- | Return a renderer from the command-line arguments that will print the diff. -printDiff :: DiffArguments -> Renderer T.Text (IO ()) -printDiff arguments diff sources = case format arguments of - Unified -> put $ unified diff sources +printDiff :: Parser -> DiffArguments -> (Source Char, Source Char) -> IO () +printDiff parser arguments sources = case format arguments of + Unified -> put =<< diffFiles parser unified sources where put chunks = do renderer <- byteStringMakerFromEnvironment B1.putStr $ mconcat $ chunksToByteStrings renderer chunks - Split -> put (output arguments) $ split diff sources + Split -> put (output arguments) =<< diffFiles parser split sources where put Nothing rendered = TextIO.putStr rendered put (Just path) rendered = do @@ -68,4 +35,4 @@ printDiff arguments diff sources = case format arguments of then path (takeFileName outputPath -<.> ".html") else path IO.withFile outputPath IO.WriteMode (flip TextIO.hPutStr rendered) - Patch -> putStr $ PatchOutput.patch diff sources + Patch -> putStr =<< diffFiles parser PatchOutput.patch sources diff --git a/app/Main.hs b/app/Main.hs index f07dd19c0..d79112474 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,9 +1,9 @@ {-# LANGUAGE RecordWildCards #-} module Main where -import Interpreter -import Options.Applicative import Data.Bifunctor.Join +import Diffing +import Options.Applicative import qualified DiffOutput as DO data Arguments = Arguments { format :: DO.Format, output :: Maybe FilePath, sourceA :: FilePath, sourceB :: FilePath } @@ -20,12 +20,8 @@ arguments = Arguments main :: IO () main = do arguments <- execParser opts - let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments) - sources <- sequence $ DO.readAndTranscodeFile <$> Join (sourceAPath, sourceBPath) - let parse = DO.parserForFilepath sourceAPath - terms <- sequence $ parse <$> sources - let replaceLeaves = DO.breakDownLeavesByWord <$> sources - DO.printDiff (args arguments) (uncurry diffTerms $ runJoin $ replaceLeaves <*> terms) (runJoin sources) + sources <- sequence $ readAndTranscodeFile <$> Join (sourceA arguments, sourceB arguments) + DO.printDiff (parserForFilepath $ sourceA arguments) (args arguments) (runJoin sources) where opts = info (helper <*> arguments) (fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically") args Arguments{..} = DO.DiffArguments { format = format, output = output, outputPath = sourceA } diff --git a/app/Parsers.hs b/app/Parsers.hs deleted file mode 100644 index ab8f0bd84..000000000 --- a/app/Parsers.hs +++ /dev/null @@ -1,32 +0,0 @@ -module Parsers (parserForType, Parser, lineByLineParser) where - -import Diff -import Language -import Range -import Parser -import Source hiding ((++)) -import Syntax -import TreeSitter -import Control.Comonad.Cofree -import qualified Data.Text as T -import Data.Foldable - --- | Return a parser based on the file extension (including the "."). -parserForType :: T.Text -> Parser -parserForType mediaType = case languageForType mediaType of - Just C -> treeSitterParser C ts_language_c - Just JavaScript -> treeSitterParser JavaScript ts_language_javascript - _ -> lineByLineParser - --- | A fallback parser that treats a file simply as rows of strings. -lineByLineParser :: Parser -lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of - (leaves, _) -> leaves - where - lines = actualLines input - root syntax = Info (Range 0 $ length input) mempty :< syntax - leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) mempty :< Leaf line - annotateLeaves (accum, charIndex) line = - (accum ++ [ leaf charIndex (toText line) ] - , charIndex + length line) - toText = T.pack . Source.toString diff --git a/app/SemanticDiff.hs b/app/SemanticDiff.hs index 6371e1ce9..bc1dab66d 100644 --- a/app/SemanticDiff.hs +++ b/app/SemanticDiff.hs @@ -1,7 +1,7 @@ {-# LANGUAGE RecordWildCards #-} module Main where -import Interpreter +import Diffing import Source import Options.Applicative import qualified Data.ByteString.Char8 as B1 @@ -36,10 +36,7 @@ main = do let shas = Join (shaA, shaB) forM_ filepaths $ \filepath -> do sources <- sequence $ fetchFromGitRepo gitDir filepath <$> shas - let parse = DO.parserForFilepath filepath - terms <- sequence $ parse <$> sources - let replaceLeaves = DO.breakDownLeavesByWord <$> sources - DO.printDiff (args arguments filepath) (uncurry diffTerms $ runJoin $ replaceLeaves <*> terms) (runJoin sources) + DO.printDiff (parserForFilepath filepath) (args arguments filepath) (runJoin sources) where opts = info (helper <*> arguments) (fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically") args Arguments{..} filepath = DO.DiffArguments { format = format, output = output, outputPath = filepath } @@ -60,4 +57,4 @@ fetchFromGitRepo repoPath path sha = join $ withRepository lgFactory repoPath $ blob <- lookupBlob blobEntryOid let (BlobString s) = blobContents blob return s - return $ DO.transcode bytestring + return $ transcode bytestring diff --git a/semantic-diff.cabal b/semantic-diff.cabal index 4fe8662cd..f56bc6c6d 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -17,6 +17,7 @@ library , Syntax , Operation , Algorithm + , Diffing , Interpreter , Language , Line @@ -35,15 +36,18 @@ library , TreeSitter , Source build-depends: base >= 4.8 && < 5 + , bifunctors , blaze-html , bytestring , c-storable-deriving , containers + , filepath , free , mtl , rainbow , semigroups , text >= 1.2.1.3 + , text-icu , tree-sitter-parsers , vector default-language: Haskell2010 @@ -53,7 +57,7 @@ library executable semantic-diff-exe hs-source-dirs: app main-is: Main.hs - other-modules: Parsers + other-modules: DiffOutput if os(darwin) ghc-options: -threaded -rtsopts -with-rtsopts=-N -static else @@ -88,7 +92,7 @@ executable semantic-diff-exe executable semantic-diff hs-source-dirs: app main-is: SemanticDiff.hs - other-modules: Parsers + other-modules: DiffOutput if os(darwin) ghc-options: -threaded -rtsopts -with-rtsopts=-N -static else diff --git a/src/Diffing.hs b/src/Diffing.hs new file mode 100644 index 000000000..3b25f8171 --- /dev/null +++ b/src/Diffing.hs @@ -0,0 +1,72 @@ +module Diffing where + +import Diff +import Interpreter +import Language +import Parser +import Range +import Renderer +import Source hiding ((++)) +import Syntax +import Term +import TreeSitter + +import Control.Comonad.Cofree +import Data.Bifunctor.Join +import qualified Data.ByteString.Char8 as B1 +import Data.Foldable +import qualified Data.Text as T +import qualified Data.Text.ICU.Detect as Detect +import qualified Data.Text.ICU.Convert as Convert +import System.FilePath + +-- | Return a parser based on the file extension (including the "."). +parserForType :: T.Text -> Parser +parserForType mediaType = case languageForType mediaType of + Just C -> treeSitterParser C ts_language_c + Just JavaScript -> treeSitterParser JavaScript ts_language_javascript + _ -> lineByLineParser + +-- | A fallback parser that treats a file simply as rows of strings. +lineByLineParser :: Parser +lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of + (leaves, _) -> leaves + where + lines = actualLines input + root syntax = Info (Range 0 $ length input) mempty :< syntax + leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) mempty :< Leaf line + annotateLeaves (accum, charIndex) line = + (accum ++ [ leaf charIndex (toText line) ] + , charIndex + length line) + toText = T.pack . Source.toString + +parserForFilepath :: FilePath -> Parser +parserForFilepath = parserForType . T.pack . takeExtension + +-- | Replace every string leaf with leaves of the words in the string. +breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info +breakDownLeavesByWord source = cata replaceIn + where + replaceIn info@(Info range categories) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = info :< (Indexed $ makeLeaf categories <$> ranges) + replaceIn info syntax = info :< syntax + rangesAndWordsInSource range = rangesAndWordsFrom (start range) (Source.toList $ slice range source) + makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring) + +-- | Transcode a file to a unicode source. +transcode :: B1.ByteString -> IO (Source Char) +transcode text = fromText <$> do + match <- Detect.detectCharset text + converter <- Convert.open match Nothing + return $ Convert.toUnicode converter text + +-- | Read the file and convert it to Unicode. +readAndTranscodeFile :: FilePath -> IO (Source Char) +readAndTranscodeFile path = do + text <- B1.readFile path + transcode text + +diffFiles :: Parser -> Renderer T.Text b -> (Source Char, Source Char) -> IO b +diffFiles parser renderer sources = do + terms <- sequence $ parser <$> Join sources + let replaceLeaves = breakDownLeavesByWord <$> Join sources + return $ renderer (uncurry diffTerms $ runJoin $ replaceLeaves <*> terms) sources