sapling/hgext3rd/crdump.py
Mateusz Kwapich 0db7fe6449 crdump: fix the binary file dumping on windows
Summary:
Stupid bug corrupted files with extra \r on windows.

From python manual:



>On Windows, 'b' appended to the mode opens the file in binary mode, so there are also modes like 'rb', 'wb', and 'r+b'. Python on Windows makes a distinction between text and binary files; the end-of-line characters in text files are automatically altered slightly when data is read or written. This behind-the-scenes modification to file data is fine for ASCII text files, but it’ll corrupt binary data like that in JPEG or EXE files. Be very careful to use binary mode when reading and writing such files. On Unix, it doesn’t hurt to append a 'b' to the mode, so you can use it platform-independently for all binary files.


Test Plan: the crdump tests should now pass on windows as well (didn't actually run them on Windows because I don't really know how)

Reviewers: #mercurial, rmcelroy, ikostia, medson

Reviewed By: medson

Subscribers: mjpieters, medson

Differential Revision: https://phabricator.intern.facebook.com/D6151931

Tasks: T23027803

Signature: 6151931:1508964509:65806961cbde26884cd45def17767135ceeac05f
2017-10-25 13:59:24 -07:00

188 lines
5.9 KiB
Python

# crdump.py - dump changesets information to filesystem
#
from __future__ import absolute_import
import json, re, shutil, tempfile
from os import path
from mercurial import (
error,
extensions,
phases,
registrar,
scmutil,
)
from mercurial.i18n import _
from mercurial.node import hex
DIFFERENTIAL_REGEX = re.compile(
'Differential Revision: http.+?/' # Line start, URL
'D(?P<id>[0-9]+)', # Differential ID, just numeric part
flags = re.LOCALE
)
cmdtable = {}
command = registrar.command(cmdtable)
@command('debugcrdump', [
('r', 'rev', [], _("revisions to dump")),
# We use 1<<15 for "as much context as possible"
('U', 'unified',
1 << 15, _('number of lines of context to show'), _('NUM')),
],
_('hg debugcrdump [OPTION]... [-r] [REV]'))
def crdump(ui, repo, *revs, **opts):
"""
Dump the info about the revisions in format that's friendly for sending the
patches for code review.
The output is a JSON list with dictionary for each specified revision: ::
{
"output_directory": an output directory for all temporary files
"commits": [
{
"node": commit hash,
"date": date in format [unixtime, timezone offset],
"desc": commit message,
"patch_file": path to file containing patch in unified diff format
relative to output_directory,
"files": list of files touched by commit,
"binary_files": [
{
"filename": path to file relative to repo root,
"old_file": path to file (relative to output_directory) with
a dump of the old version of the file,
"new_file": path to file (relative to output_directory) with
a dump of the newversion of the file,
},
...
],
"user": commit author,
"p1": {
"node": hash,
"differential_revision": xxxx
},
"public_base": {
"node": public base commit hash,
"svnrev": svn revision of public base (if hgsvn repo),
}
},
...
]
}
"""
revs = list(revs)
revs.extend(opts['rev'])
if not revs:
raise error.Abort(_('revisions must be specified'))
revs = scmutil.revrange(repo, revs)
if 'unified' in opts:
contextlines = opts['unified']
cdata = []
outdir = tempfile.mkdtemp(suffix='hg.crdump')
try:
for rev in revs:
ctx = repo[rev]
rdata = {
'node': hex(ctx.node()),
'date': map(int, ctx.date()),
'desc': ctx.description(),
'files': ctx.files(),
'p1': {
'node': ctx.parents()[0].hex(),
},
'user': ctx.user(),
}
if ctx.parents()[0].phase() != phases.public:
# we need this only if parent is in the same draft stack
rdata['p1']['differential_revision'] = \
phabricatorrevision(ctx.parents()[0])
pbctx = publicbase(repo, ctx)
if pbctx:
rdata['public_base'] = {
'node': hex(pbctx.node()),
}
try:
hgsubversion = extensions.find('hgsubversion')
svnrev = hgsubversion.util.getsvnrev(pbctx)
# There are no abstractions in hgsubversion for doing
# it see hgsubversion/svncommands.py:267
rdata['public_base']['svnrev'] = \
svnrev.split('@')[1] if svnrev else None
except KeyError:
pass
rdata['patch_file'] = dumppatch(ui, repo, ctx, outdir, contextlines)
rdata['binary_files'] = dumpbinaryfiles(ui, repo, ctx, outdir)
cdata.append(rdata)
ui.write(json.dumps({
'output_directory': outdir,
'commits': cdata,
}, sort_keys=True, indent=4, separators=(',', ': ')))
ui.write('\n')
except Exception as e:
shutil.rmtree(outdir)
raise e
def dumppatch(ui, repo, ctx, outdir, contextlines):
chunks = ctx.diff(git=True, unified=contextlines, binary=False)
patchfile = '%s.patch' % hex(ctx.node())
with open(path.join(outdir, patchfile), 'wb') as f:
for chunk in chunks:
f.write(chunk)
return patchfile
def dumpfctx(outdir, fctx):
outfile = '%s' % hex(fctx.filenode())
writepath = path.join(outdir, outfile)
if not path.isfile(writepath):
with open(writepath, 'wb') as f:
f.write(fctx.data())
return outfile
def dumpbinaryfiles(ui, repo, ctx, outdir):
binaryfiles = []
pctx = ctx.parents()[0]
for fname in ctx.files():
oldfile = newfile = None
dump = False
fctx = ctx[fname] if fname in ctx else None
pfctx = pctx[fname] if fname in pctx else None
# if one of the versions is binary file the whole change will show
# up as binary in diff output so we need to dump both versions
if fctx and fctx.isbinary():
dump = True
if pfctx and pfctx.isbinary():
dump = True
if dump:
if fctx:
newfile = dumpfctx(outdir, fctx)
if pfctx:
oldfile = dumpfctx(outdir, pfctx)
binaryfiles.append({
'file_name': fname,
'old_file': oldfile,
'new_file': newfile,
})
return binaryfiles
def phabricatorrevision(ctx):
match = DIFFERENTIAL_REGEX.search(ctx.description())
return match.group(1) if match else ''
def publicbase(repo, ctx):
base = repo.revs('last(::%d & public())', ctx.rev())
if len(base):
return repo[base.first()]
return None