1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 06:41:45 +03:00
semantic/src/SemanticDiff.hs
2016-10-12 14:51:41 -04:00

165 lines
7.2 KiB
Haskell

{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-}
module SemanticDiff (main, fetchDiff, fetchDiffs) where
import Arguments
import Prologue hiding ((<>), fst, snd)
import Data.String
import Data.Functor.Both
import Data.Version (showVersion)
import Text.Regex
import Diffing
import Git.Libgit2
import Git.Repository
import Git.Blob
import Git.Types
import Git.Libgit2.Backend
import Options.Applicative hiding (action)
import System.Timeout as Timeout
import Data.List ((\\))
import qualified Diffing as D
import qualified Git
import qualified Paths_semantic_diff as Library (version)
import qualified Renderer as R
import qualified Source
import qualified Control.Concurrent.Async.Pool as Async
import GHC.Conc (numCapabilities)
import Development.GitRev
main :: IO ()
main = do
args@Arguments{..} <- programArguments =<< execParser argumentsParser
case diffMode of
PathDiff paths -> diffPaths args paths
CommitDiff -> diffCommits args
-- | A parser for the application's command-line arguments.
argumentsParser :: ParserInfo CmdLineOptions
argumentsParser = info (version <*> helper <*> argumentsP)
(fullDesc <> progDesc "Set the GIT_DIR environment variable to specify the git repository. Set GIT_ALTERNATE_OBJECT_DIRECTORIES to specify location of alternates."
<> header "semantic-diff - Show semantic changes between commits")
where
argumentsP :: Parser CmdLineOptions
argumentsP = CmdLineOptions
<$> (flag R.Split R.Patch (long "patch" <> help "output a patch(1)-compatible diff")
<|> flag R.Split R.JSON (long "json" <> help "output a json diff")
<|> flag' R.Split (long "split" <> help "output a split diff")
<|> flag' R.Summary (long "summary" <> help "output a diff summary"))
<*> optional (option auto (long "timeout" <> help "timeout for per-file diffs in seconds, defaults to 7 seconds"))
<*> optional (strOption (long "output" <> short 'o' <> help "output directory for split diffs, defaults to stdout if unspecified"))
<*> switch (long "no-index" <> help "compare two paths on the filesystem")
<*> some (argument (eitherReader parseShasAndFiles) (metavar "SHA_A..SHAB FILES..."))
<*> switch (long "development" <> short 'd' <> help "set development mode which prevents timeout behavior by default")
where
parseShasAndFiles :: String -> Either String ExtraArg
parseShasAndFiles s = case matchRegex regex s of
Just ["", sha2] -> Right . ShaPair $ both Nothing (Just sha2)
Just [sha1, sha2] -> Right . ShaPair $ Just <$> both sha1 sha2
_ -> Right $ FileArg s
where regex = mkRegexWithOpts "([0-9a-f]{40})\\.\\.([0-9a-f]{40})" True False
versionString :: String
versionString = "semantic-diff version " <> showVersion Library.version <> " (" <> $(gitHash) <> ")"
version :: Parser (a -> a)
version = infoOption versionString (long "version" <> short 'V' <> help "output the version of the program")
-- | Compare changes between two commits.
diffCommits :: Arguments -> IO ()
diffCommits args@Arguments{..} = do
ts <- fetchTerms args
writeToOutput output (maybe mempty R.concatOutputs ts)
where fetchTerms args = if developmentMode
then Just <$> fetchDiffs args
else Timeout.timeout timeoutInMicroseconds (fetchDiffs args)
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
diffPaths :: Arguments -> Both FilePath -> IO ()
diffPaths args@Arguments{..} paths = do
sources <- sequence $ readAndTranscodeFile <$> paths
let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> paths <*> pure (Just Source.defaultPlainBlob)
D.printDiff (parserForFilepath (fst paths)) (diffArgs args) sourceBlobs
where
diffArgs Arguments{..} = R.DiffArguments { format = format, output = output }
fetchDiffs :: Arguments -> IO [R.Output]
fetchDiffs args@Arguments{..} = do
paths <- case(filePaths, shaRange) of
([], Join (Just a, Just b)) -> pathsToDiff args (both a b)
(ps, _) -> pure ps
Async.withTaskGroup numCapabilities $ \p ->
Async.mapTasks p (fetchDiff args <$> paths)
fetchDiff :: Arguments -> FilePath -> IO R.Output
fetchDiff args@Arguments{..} filepath = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (fetchDiff' args filepath) repo
fetchDiff' :: Arguments -> FilePath -> ReaderT LgRepo IO R.Output
fetchDiff' Arguments{..} filepath = do
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
let sources = fromMaybe (Source.emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = Source.idOrEmptySourceBlob <$> sources
let textDiff = D.textDiff (parserForFilepath filepath) diffArguments sourceBlobs
text <- fetchText textDiff
truncatedPatch <- liftIO $ D.truncatedDiff diffArguments sourceBlobs
pure $ fromMaybe truncatedPatch text
where
diffArguments = R.DiffArguments { format = format, output = output }
fetchText textDiff = if developmentMode
then liftIO $ Just <$> textDiff
else liftIO $ Timeout.timeout timeoutInMicroseconds textDiff
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (pathsToDiff' shas) repo
-- | Returns a list of relative file paths that have changed between the given commit shas.
pathsToDiff' :: Both String -> ReaderT LgRepo IO [FilePath]
pathsToDiff' shas = do
entries <- blobEntriesToDiff shas
pure $ (\(p, _, _) -> toS p) <$> entries
-- | Returns a list of blob entries that have changed between the given commits shas.
blobEntriesToDiff :: Both String -> ReaderT LgRepo IO [(TreeFilePath, Git.BlobOid LgRepo, BlobKind)]
blobEntriesToDiff shas = do
a <- blobEntries (fst shas)
b <- blobEntries (snd shas)
pure $ (a \\ b) <> (b \\ a)
where blobEntries sha = treeForCommitSha sha >>= treeBlobEntries
-- | Returns a Git.Tree for a commit sha
treeForCommitSha :: String -> ReaderT LgRepo IO (Git.Tree LgRepo)
treeForCommitSha sha = do
object <- parseObjOid (toS sha)
commit <- lookupCommit object
lookupTree (commitTree commit)
-- | Returns a SourceBlob given a relative file path, and the sha to look up.
getSourceBlob :: FilePath -> String -> ReaderT LgRepo IO Source.SourceBlob
getSourceBlob path sha = do
tree <- treeForCommitSha sha
entry <- treeEntry tree (toS path)
(bytestring, oid, mode) <- case entry of
Nothing -> pure (mempty, mempty, Nothing)
Just (BlobEntry entryOid entryKind) -> do
blob <- lookupBlob entryOid
let (BlobString s) = blobContents blob
let oid = renderObjOid $ blobOid blob
pure (s, oid, Just entryKind)
s <- liftIO $ transcode bytestring
pure $ Source.SourceBlob s (toS oid) path (toSourceKind <$> mode)
where
toSourceKind :: Git.BlobKind -> Source.SourceKind
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode