1
1
mirror of https://github.com/github/semantic.git synced 2024-12-27 08:54:14 +03:00

Merge pull request #407 from github/git-diff

Diff a list of files in a given repository
This commit is contained in:
Rob Rix 2016-02-01 16:40:45 -05:00
commit 0dc4927c5e
8 changed files with 199 additions and 58 deletions

3
.gitmodules vendored
View File

@ -4,3 +4,6 @@
[submodule "vendor/text-icu"]
path = vendor/text-icu
url = https://github.com/joshvera/text-icu
[submodule "vendor/gitlib"]
path = vendor/gitlib
url = https://github.com/jwiegley/gitlib

72
app/DiffOutput.hs Normal file
View File

@ -0,0 +1,72 @@
module DiffOutput where
import Source
import Term
import Control.Comonad.Cofree
import qualified Data.Text as T
import Diff
import Syntax
import Range
import qualified Data.ByteString.Char8 as B1
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import Split
import Unified
import System.Directory
import System.FilePath
import qualified System.IO as IO
import qualified Data.Text.Lazy.IO as TextIO
import qualified PatchOutput
import Interpreter
import qualified Parsers as P
import Rainbow
-- | The available types of diff rendering.
data Format = Unified | Split | Patch
data DiffArguments = DiffArguments { format :: Format, output :: Maybe FilePath, outputPath :: FilePath }
parserForFilepath :: FilePath -> P.Parser
parserForFilepath = P.parserForType . T.pack . takeExtension
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
breakDownLeavesByWord source = cata replaceIn
where
replaceIn info@(Info range categories) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = info :< (Indexed $ makeLeaf categories <$> ranges)
replaceIn info syntax = info :< syntax
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (Source.toList $ slice range source)
makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring)
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
return $ Convert.toUnicode converter text
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
printDiff :: DiffArguments -> (Source Char, Source Char) -> (Term T.Text Info, Term T.Text Info) -> IO ()
printDiff arguments (aSource, bSource) (aTerm, bTerm) = case format arguments of
Unified -> put $ unified diff (aSource, bSource)
where put chunks = do
renderer <- byteStringMakerFromEnvironment
B1.putStr $ mconcat $ chunksToByteStrings renderer chunks
Split -> do
rendered <- split diff (aSource, bSource)
case output arguments of
Just path -> do
isDir <- doesDirectoryExist path
let outputPath = if isDir
then path </> (takeFileName outputPath -<.> ".html")
else path
IO.withFile outputPath IO.WriteMode (write rendered)
Nothing -> TextIO.putStr rendered
Patch -> putStr $ PatchOutput.patch diff (aSource, bSource)
where diff = diffTerms aTerm bTerm
write rendered h = TextIO.hPutStr h rendered

View File

@ -1,4 +1,5 @@
module Main where
{-# LANGUAGE RecordWildCards #-}
module Main where
import Categorizable
import Diff
@ -7,7 +8,6 @@ import qualified Parsers as P
import Syntax
import Range
import qualified PatchOutput
import Renderer
import Split
import Term
import Unified
@ -23,20 +23,15 @@ import qualified System.IO as IO
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import Data.Bifunctor.Join
import Rainbow
import qualified DiffOutput as DO
-- | The available types of diff rendering.
data Format = Unified | Split | Patch
data Arguments = Arguments { format :: DO.Format, output :: Maybe FilePath, sourceA :: FilePath, sourceB :: FilePath }
-- | The command line arguments to the application.
data Arguments = Arguments { format :: Format, output :: Maybe FilePath, sourceA :: FilePath, sourceB :: FilePath }
-- | A parser for the application's command-line arguments.
arguments :: Parser Arguments
arguments = Arguments
<$> (flag Split Unified (long "unified" <> help "output a unified diff")
<|> flag Split Patch (long "patch" <> help "output a patch(1)-compatible diff")
<|> flag' Split (long "split" <> help "output a split diff"))
<$> (flag DO.Split DO.Unified (long "unified" <> help "output a unified diff")
<|> flag DO.Split DO.Patch (long "patch" <> help "output a patch(1)-compatible diff")
<|> flag' DO.Split (long "split" <> help "output a split diff"))
<*> optional (strOption (long "output" <> short 'o' <> help "output directory for split diffs, defaulting to stdout if unspecified"))
<*> strArgument (metavar "FILE a")
<*> strArgument (metavar "FILE b")
@ -45,50 +40,11 @@ main :: IO ()
main = do
arguments <- execParser opts
let (sourceAPath, sourceBPath) = (sourceA arguments, sourceB arguments)
sources <- sequence $ readAndTranscodeFile <$> Join (sourceAPath, sourceBPath)
let parse = (P.parserForType . T.pack . takeExtension) sourceAPath
sources <- sequence $ DO.readAndTranscodeFile <$> Join (sourceAPath, sourceBPath)
let parse = DO.parserForFilepath sourceAPath
terms <- sequence $ parse <$> sources
let replaceLeaves = breakDownLeavesByWord <$> sources
printDiff arguments (uncurry diff $ runJoin $ replaceLeaves <*> terms) (runJoin sources)
let replaceLeaves = DO.breakDownLeavesByWord <$> sources
DO.printDiff (args arguments) (runJoin sources) (runJoin $ replaceLeaves <*> terms)
where opts = info (helper <*> arguments)
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
-- | Diff two terms.
diff :: (Eq a, Eq annotation, Categorizable annotation) => Term a annotation -> Term a annotation -> Diff a annotation
diff = interpret comparable
-- | Return a renderer from the command-line arguments that will print the diff.
printDiff :: Arguments -> Renderer T.Text (IO ())
printDiff arguments diff sources = case format arguments of
Unified -> put $ unified diff sources
where
put chunks = do
renderer <- byteStringMakerFromEnvironment
B1.putStr $ mconcat $ chunksToByteStrings renderer chunks
Split -> put (output arguments) =<< split diff sources
where
put Nothing rendered = TextIO.putStr rendered
put (Just path) rendered = do
isDir <- doesDirectoryExist path
let outputPath = if isDir
then path </> (takeFileName (sourceB arguments) -<.> ".html")
else path
IO.withFile outputPath IO.WriteMode (flip TextIO.hPutStr rendered)
Patch -> putStr $ PatchOutput.patch diff sources
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
breakDownLeavesByWord source = cata replaceIn
where
replaceIn info@(Info range categories) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = info :< (Indexed $ makeLeaf categories <$> ranges)
replaceIn info syntax = info :< syntax
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toList $ slice range source)
makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring)
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = fromText <$> do
text <- B1.readFile path
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
return $ Convert.toUnicode converter text
args Arguments{..} = DO.DiffArguments { format = format, output = output, outputPath = sourceA }

View File

@ -1,4 +1,4 @@
module Parsers where
module Parsers (parserForType, Parser, lineByLineParser) where
import Diff
import Range
@ -24,3 +24,4 @@ lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([
(accum ++ [ leaf charIndex (toText line) ]
, charIndex + length line)
toText = T.pack . Source.toString

64
app/SemanticDiff.hs Normal file
View File

@ -0,0 +1,64 @@
{-# LANGUAGE RecordWildCards #-}
module Main where
import Interpreter
import Renderer
import Source
import Options.Applicative
import qualified Data.ByteString.Char8 as B1
import qualified Data.Text as T
import Data.Bifunctor.Join
import Git.Libgit2
import Git.Types
import Git.Repository
import Data.Tagged
import Control.Monad.Reader
import System.Environment
import qualified DiffOutput as DO
-- | The command line arguments to the application.
data Arguments = Arguments { format :: DO.Format, output :: Maybe FilePath, shaA :: String, shaB :: String, filepaths :: [FilePath] }
-- | A parser for the application's command-line arguments.
arguments :: Parser Arguments
arguments = Arguments
<$> (flag DO.Split DO.Unified (long "unified" <> help "output a unified diff")
<|> flag DO.Split DO.Patch (long "patch" <> help "output a patch(1)-compatible diff")
<|> flag' DO.Split (long "split" <> help "output a split diff"))
<*> optional (strOption (long "output" <> short 'o' <> help "output directory for split diffs, defaulting to stdout if unspecified"))
<*> strArgument (metavar "SHA_A")
<*> strArgument (metavar "SHA_B")
<*> many (strArgument (metavar "FILE"))
main :: IO ()
main = do
gitDir <- getEnv "GIT_DIR"
arguments@Arguments{..} <- execParser opts
let shas = Join (shaA, shaB)
forM_ filepaths $ \filepath -> do
sources <- sequence $ fetchFromGitRepo gitDir filepath <$> shas
let parse = DO.parserForFilepath filepath
terms <- sequence $ parse <$> sources
let replaceLeaves = DO.breakDownLeavesByWord <$> sources
DO.printDiff (args arguments filepath) (runJoin sources) (runJoin $ replaceLeaves <*> terms)
where opts = info (helper <*> arguments)
(fullDesc <> progDesc "Diff some things" <> header "semantic-diff - diff semantically")
args Arguments{..} filepath = DO.DiffArguments { format = format, output = output, outputPath = filepath }
-- | Returns a file source given an absolute repo path, a relative file path, and the sha to look up.
fetchFromGitRepo :: FilePath -> FilePath -> String -> IO (Source Char)
fetchFromGitRepo repoPath path sha = join $ withRepository lgFactory repoPath $ do
object <- unTagged <$> parseObjOid (T.pack sha)
commitIHope <- lookupObject object
commit <- case commitIHope of
(CommitObj commit) -> return commit
_ -> error "Expected commit SHA"
tree <- lookupTree (commitTree commit)
entry <- treeEntry tree (B1.pack path)
bytestring <- case entry of
Nothing -> return mempty
Just BlobEntry {..} -> do
blob <- lookupBlob blobEntryOid
let (BlobString s) = blobContents blob
return s
return $ DO.transcode bytestring

View File

@ -71,6 +71,45 @@ executable semantic-diff-exe
, semantic-diff
, text >= 1.2.1.3
, text-icu
, gitlib
, gitlib-libgit2
, tagged
, mtl
default-language: Haskell2010
default-extensions: OverloadedStrings
if os(darwin)
extra-libraries: stdc++ icuuc icudata icui18n
if os(darwin)
extra-lib-dirs: /usr/local/opt/icu4c/lib
if os(darwin)
include-dirs: /usr/local/opt/icu4c/include
executable semantic-diff
hs-source-dirs: app
main-is: SemanticDiff.hs
other-modules: Parsers
if os(darwin)
ghc-options: -threaded -rtsopts -with-rtsopts=-N -static
else
ghc-options: -threaded -rtsopts -with-rtsopts=-N -static -pgml=script/g++
cc-options: -DU_STATIC_IMPLEMENTATION=1
cpp-options: -DU_STATIC_IMPLEMENTATION=1
build-depends: base
, bifunctors
, bytestring
, containers
, directory
, filepath
, free
, optparse-applicative
, semantic-diff
, text >= 1.2.1.3
, text-icu
, gitlib
, gitlib-libgit2
, rainbow
, tagged
, mtl
default-language: Haskell2010
default-extensions: OverloadedStrings
if os(darwin)

View File

@ -1,4 +1,4 @@
module Interpreter (interpret, Comparable) where
module Interpreter (interpret, Comparable, diffTerms) where
import Prelude hiding (lookup)
import Algorithm
@ -8,6 +8,7 @@ import Patch
import SES
import Syntax
import Term
import Categorizable
import Control.Monad.Free
import Control.Comonad.Cofree hiding (unwrap)
import qualified Data.OrderedMap as Map
@ -20,6 +21,10 @@ import Data.Maybe
-- | Returns whether two terms are comparable
type Comparable a annotation = Term a annotation -> Term a annotation -> Bool
-- | Diff two terms, given the default Categorizable.comparable function.
diffTerms :: (Eq a, Eq annotation, Categorizable annotation) => Term a annotation -> Term a annotation -> Diff a annotation
diffTerms = interpret comparable
-- | Diff two terms, given a function that determines whether two terms can be compared.
interpret :: (Eq a, Eq annotation) => Comparable a annotation -> Term a annotation -> Term a annotation -> Diff a annotation
interpret comparable a b = fromMaybe (Pure $ Replace a b) $ constructAndRun comparable a b

1
vendor/gitlib vendored Submodule

@ -0,0 +1 @@
Subproject commit b0c3ad9a7d453fce30364b4a277799c5e2f26947