sapling/hgext/crdump.py

# crdump.py - dump changesets information to filesystem
#
from __future__ import absolute_import

import json, re, shutil, tempfile
from os import path

from mercurial import (
    error,
    extensions,
    obsutil,
    phases,
    registrar,
    scmutil,
)

from mercurial.i18n import _
from mercurial.node import hex

DIFFERENTIAL_REGEX = re.compile(
    'Differential Revision: http.+?/'  # Line start, URL
    'D(?P<id>[0-9]+)',  # Differential ID, just numeric part
    flags = re.LOCALE
)
cmdtable = {}
command = registrar.command(cmdtable)

@command('debugcrdump', [
         ('r', 'rev', [], _("revisions to dump")),
         # We use 1<<15 for "as much context as possible"
         ('U', 'unified',
          1 << 15, _('number of lines of context to show'), _('NUM')),
         ('l', 'lfs', False, 'Provide sha256 for lfs files instead of dumping'),
         ('', 'obsolete', False,
          'add obsolete markers related to the given revisions'),
         ],
         _('hg debugcrdump [OPTION]... [-r] [REV]'))
def crdump(ui, repo, *revs, **opts):
    """
    Dump the info about the revisions in format that's friendly for sending the
    patches for code review.

    The output is a JSON list with dictionary for each specified revision: ::

        {
          "output_directory": an output directory for all temporary files
          "commits": [
          {
            "node": commit hash,
            "date": date in format [unixtime, timezone offset],
            "desc": commit message,
            "patch_file": path to file containing patch in unified diff format
                          relative to output_directory,
            "files": list of files touched by commit,
            "binary_files": [
              {
                "filename": path to file relative to repo root,
                "old_file": path to file (relative to output_directory) with
                            a dump of the old version of the file,
                "new_file": path to file (relative to output_directory) with
                            a dump of the newversion of the file,
              },
              ...
            ],
            "user": commit author,
            "p1": {
              "node": hash,
              "differential_revision": xxxx
            },
            "public_base": {
              "node": public base commit hash,
              "svnrev": svn revision of public base (if hgsvn repo),
            },
            "obsolete": {
                "date": [
                    time,
                    timezone
                ],
                "flag": marker's flags,
                "metadata": {
                    "operation": changes made,
                    "user": user name
                },
                "prednode": predecessor commit in hash,
                "succnodes": [
                    successors in hash
                ]
            }
          },
          ...
          ]
        }
    """

    revs = list(revs)
    revs.extend(opts['rev'])

    if not revs:
        raise error.Abort(_('revisions must be specified'))
    revs = scmutil.revrange(repo, revs)

    if 'unified' in opts:
        contextlines = opts['unified']

    cdata = []
    outdir = tempfile.mkdtemp(suffix='hg.crdump')
    try:
        lfs = None
        if opts['lfs']:
            try:
                lfs = extensions.find('lfs')
            except KeyError:
                pass # lfs extension is not enabled
        for rev in revs:
            ctx = repo[rev]
            rdata = {
                'node': hex(ctx.node()),
                'date': map(int, ctx.date()),
                'desc': ctx.description(),
                'files': ctx.files(),
                'p1': {
                    'node': ctx.parents()[0].hex(),
                },
                'user': ctx.user(),
                'bookmarks': ctx.bookmarks(),
            }
            if ctx.parents()[0].phase() != phases.public:
                # we need this only if parent is in the same draft stack
                rdata['p1']['differential_revision'] = \
                    phabricatorrevision(ctx.parents()[0])

            if opts['obsolete']:
                markers = obsutil.getmarkers(repo, [ctx.node()])
                obsolete = dumpmarkers(markers)
                if obsolete:
                    rdata['obsolete'] = obsolete

            pbctx = publicbase(repo, ctx)
            if pbctx:
                rdata['public_base'] = {
                    'node': hex(pbctx.node()),
                }
                try:
                    hgsubversion = extensions.find('hgsubversion')
                    svnrev = hgsubversion.util.getsvnrev(pbctx)
                    # There are no abstractions in hgsubversion for doing
                    # it see hgsubversion/svncommands.py:267
                    rdata['public_base']['svnrev'] = \
                        svnrev.split('@')[1] if svnrev else None
                except KeyError:
                    pass
            rdata['patch_file'] = dumppatch(ui, repo, ctx, outdir, contextlines)
            rdata['binary_files'] = dumpbinaryfiles(ui, repo, ctx, outdir, lfs)
            cdata.append(rdata)

        ui.write(json.dumps({
            'output_directory': outdir,
            'commits': cdata,
        }, sort_keys=True, indent=4, separators=(',', ': ')))
        ui.write('\n')
    except Exception as e:
        shutil.rmtree(outdir)
        raise e

def dumppatch(ui, repo, ctx, outdir, contextlines):
    chunks = ctx.diff(git=True, unified=contextlines, binary=False)
    patchfile = '%s.patch' % hex(ctx.node())
    with open(path.join(outdir, patchfile), 'wb') as f:
        for chunk in chunks:
            f.write(chunk)
    return patchfile

def dumpfctx(outdir, fctx):
    outfile = '%s' % hex(fctx.filenode())
    writepath = path.join(outdir, outfile)
    if not path.isfile(writepath):
        with open(writepath, 'wb') as f:
            f.write(fctx.data())
    return outfile

def _getfilesizeandsha256(flog, ctx, fname, lfs):
    try:
        fnode = ctx.filenode(fname)
    except error.ManifestLookupError:
        return None, None

    if lfs.wrapper._islfs(flog, node=fnode): # if file was uploaded to lfs
        rawtext = flog.revision(fnode, raw=True)
        gitlfspointer = lfs.pointer.deserialize(rawtext)
        sha256_hash = gitlfspointer.oid()
        filesize = gitlfspointer.size()
        return sha256_hash, filesize
    return None, None

def dumpbinaryfiles(ui, repo, ctx, outdir, lfs):
    binaryfiles = []
    pctx = ctx.parents()[0]
    with ui.configoverride({('remotefilelog', 'dolfsprefetch'): False}):
        for fname in ctx.files():
            oldfile = newfile = None
            dump = False
            oldfilesha256 = newfilesha256 = None
            oldfilesize = newfilesize = None
            if lfs is not None:
                flog = repo.file(fname)
                newfilesha256, newfilesize = _getfilesizeandsha256(flog, ctx,
                                                                    fname, lfs)
                oldfilesha256, oldfilesize = _getfilesizeandsha256(flog, pctx,
                                                                    fname, lfs)
            # if one of the versions is binary file which is not in lfs
            # the whole change will show up as binary in diff output
            if not newfilesha256:
                fctx = ctx[fname] if fname in ctx else None
                if fctx and fctx.isbinary():
                    dump = True

            if not oldfilesha256:
                pfctx = pctx[fname] if fname in pctx else None
                if pfctx and pfctx.isbinary():
                    dump = True

            if dump:
                if not newfilesha256 and fctx:
                    newfile = dumpfctx(outdir, fctx)
                if not oldfilesha256 and pfctx:
                    oldfile = dumpfctx(outdir, pfctx)

            if lfs is None and dump:
                binaryfiles.append({
                    'file_name': fname,
                    'old_file': oldfile,
                    'new_file': newfile,
                })
            elif newfile or newfilesha256 or oldfile or oldfilesha256:
                binaryfiles.append({
                    'file_name': fname,
                    'old_file': oldfile,
                    'new_file': newfile,
                    'new_file_sha256': newfilesha256,
                    'old_file_sha256': oldfilesha256,
                    'new_file_size': newfilesize,
                    'old_file_size': oldfilesize,
                })

    return binaryfiles

def phabricatorrevision(ctx):
    match = DIFFERENTIAL_REGEX.search(ctx.description())
    return match.group(1) if match else ''

def publicbase(repo, ctx):
    base = repo.revs('last(::%d & public())', ctx.rev())
    if len(base):
        return repo[base.first()]
    return None

def dumpmarkers(rawmarkers):
    markers = []
    for rm in rawmarkers:
        marker = {
            'date': rm.date(),
            'flag': rm.flags(),
            'metadata': rm.metadata(),
            'prednode': hex(rm.prednode())
        }
        if rm.succnodes():
            marker['succnodes'] = map(hex, rm.succnodes())
        if rm.parentnodes():
            marker['parents'] = map(hex, rm.parentnodes())

        markers.append(marker)

    return markers