mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 16:31:02 +03:00
dc07b62633
Summary: Previously we can only answer the "path" information when the revision is in the linelog revmap, and the code would crash if a revision is in a side branch, and the user requests path information. This diff fixes it. Besides, this diff improves rename handling. For example, given the following chart: ``` o---o -o file name: a / o---o- file name: b ^ ^ ^ 1 2 3 revisions ``` Depending on the position of the `main branch` reference, fastannotate may or may not use linelog: - main branch is at rev 2, annotate a -r 3 will not take advantage of linelog (fallback to slow annotate) - main branch is at rev 3, annotate a -r 2 will not take advantage of linelog This is not ideal, but seems to be the best we can do for now. Test Plan: Added a new test, updated existing relevant tests. Some debug messages are changed to reflect internals more precisely. Reviewers: #sourcecontrol, stash Reviewed By: stash Subscribers: stash, mjpieters Differential Revision: https://phabricator.intern.facebook.com/D4010964 Signature: t1:4010964:1476458201:79875d96399d023d0000d0c4bb8b8d40ea43eef0
230 lines
7.7 KiB
Python
230 lines
7.7 KiB
Python
# Copyright 2016-present Facebook. All Rights Reserved.
|
|
#
|
|
# revmap: trivial hg hash - linelog rev bidirectional map
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
import bisect
|
|
import os
|
|
import struct
|
|
|
|
from fastannotate import error
|
|
|
|
# the revmap file format is straightforward:
|
|
#
|
|
# 8 bytes: header
|
|
# 1 byte : flag for linelog revision 1
|
|
# ? bytes: (optional) '\0'-terminated path string
|
|
# only exists if (flag & renameflag) != 0
|
|
# 20 bytes: hg hash for linelog revision 1
|
|
# 1 byte : flag for linelog revision 2
|
|
# ? bytes: (optional) '\0'-terminated path string
|
|
# 20 bytes: hg hash for linelog revision 2
|
|
# ....
|
|
#
|
|
# the implementation is kinda stupid: __init__ loads the whole revmap.
|
|
# no laziness. benchmark shows loading 10000 revisions is about 0.015
|
|
# seconds, which looks enough for our use-case. if this implementation
|
|
# becomes a bottleneck, we can change it to lazily read the file
|
|
# from the end.
|
|
|
|
# whether the changeset is in the side branch. i.e. not in the linear main
|
|
# branch but only got referenced by lines in merge changesets.
|
|
sidebranchflag = 1
|
|
|
|
# whether the changeset changes the file path (ie. is a rename)
|
|
renameflag = 2
|
|
|
|
# len(mercurial.node.nullid)
|
|
_hshlen = 20
|
|
|
|
class revmap(object):
|
|
"""trivial hg bin hash - linelog rev bidirectional map
|
|
|
|
also stores a flag (uint8) for each revision, and track renames.
|
|
"""
|
|
|
|
HEADER = b'REVMAP1\0'
|
|
|
|
def __init__(self, path=None):
|
|
"""create or load the revmap, optionally associate to a file
|
|
|
|
if path is None, the revmap is entirely in-memory. the caller is
|
|
responsible for locking. concurrent writes to a same file is unsafe.
|
|
the caller needs to make sure one file is associated to at most one
|
|
revmap object at a time."""
|
|
self.path = path
|
|
self._rev2hsh = [None]
|
|
self._rev2flag = [None]
|
|
self._hsh2rev = {}
|
|
# since rename does not happen frequently, do not store path for every
|
|
# revision. self._renamerevs can be used for bisecting.
|
|
self._renamerevs = [0]
|
|
self._renamepaths = ['']
|
|
self._lastmaxrev = -1
|
|
if path:
|
|
if os.path.exists(path):
|
|
self._load()
|
|
else:
|
|
# write the header so "append" can do incremental updates
|
|
self.flush()
|
|
|
|
def copyfrom(self, rhs):
|
|
"""copy the map data from another revmap. do not affect self.path"""
|
|
self._rev2hsh = rhs._rev2hsh[:]
|
|
self._rev2flag = rhs._rev2flag[:]
|
|
self._hsh2rev = rhs._hsh2rev.copy()
|
|
self._renamerevs = rhs._renamerevs[:]
|
|
self._renamepaths = rhs._renamepaths[:]
|
|
self._lastmaxrev = -1
|
|
|
|
@property
|
|
def maxrev(self):
|
|
"""return max linelog revision number"""
|
|
return len(self._rev2hsh) - 1
|
|
|
|
def append(self, hsh, sidebranch=False, path=None, flush=False):
|
|
"""add a binary hg hash and return the mapped linelog revision.
|
|
if flush is True, incrementally update the file.
|
|
"""
|
|
assert hsh not in self._hsh2rev
|
|
assert len(hsh) == _hshlen
|
|
idx = len(self._rev2hsh)
|
|
flag = 0
|
|
if sidebranch:
|
|
flag |= sidebranchflag
|
|
if path is not None and path != self._renamepaths[-1]:
|
|
flag |= renameflag
|
|
self._renamerevs.append(idx)
|
|
self._renamepaths.append(path)
|
|
self._rev2hsh.append(hsh)
|
|
self._rev2flag.append(flag)
|
|
self._hsh2rev[hsh] = idx
|
|
if flush:
|
|
self.flush()
|
|
return idx
|
|
|
|
def rev2hsh(self, rev):
|
|
"""convert linelog revision to hg hash. return None if not found."""
|
|
if rev > self.maxrev or rev < 0:
|
|
return None
|
|
return self._rev2hsh[rev]
|
|
|
|
def rev2flag(self, rev):
|
|
"""get the flag (uint8) for a given linelog revision.
|
|
return None if revision does not exist.
|
|
"""
|
|
if rev > self.maxrev or rev < 0:
|
|
return None
|
|
return self._rev2flag[rev]
|
|
|
|
def rev2path(self, rev):
|
|
"""get the path for a given linelog revision.
|
|
return None if revision does not exist.
|
|
"""
|
|
if rev > self.maxrev or rev < 0:
|
|
return None
|
|
idx = bisect.bisect_right(self._renamerevs, rev) - 1
|
|
return self._renamepaths[idx]
|
|
|
|
def hsh2rev(self, hsh):
|
|
"""convert hg hash to linelog revision. return None if not found."""
|
|
return self._hsh2rev.get(hsh)
|
|
|
|
def clear(self, flush=False):
|
|
"""make the map empty. if flush is True, write to disk"""
|
|
# rev 0 is reserved, real rev starts from 1
|
|
self._rev2hsh = [None]
|
|
self._rev2flag = [None]
|
|
self._hsh2rev = {}
|
|
self._rev2path = ['']
|
|
self._lastmaxrev = -1
|
|
if flush:
|
|
self.flush()
|
|
|
|
def flush(self):
|
|
"""write the state down to the file"""
|
|
if not self.path:
|
|
return
|
|
if self._lastmaxrev == -1: # write the entire file
|
|
with open(self.path, 'wb') as f:
|
|
f.write(self.HEADER)
|
|
for i in xrange(1, len(self._rev2hsh)):
|
|
self._writerev(i, f)
|
|
else: # append incrementally
|
|
with open(self.path, 'ab') as f:
|
|
for i in xrange(self._lastmaxrev + 1, len(self._rev2hsh)):
|
|
self._writerev(i, f)
|
|
self._lastmaxrev = self.maxrev
|
|
|
|
def _load(self):
|
|
"""load state from file"""
|
|
if not self.path:
|
|
return
|
|
# use local variables in a loop. CPython uses LOAD_FAST for them,
|
|
# which is faster than both LOAD_CONST and LOAD_GLOBAL.
|
|
flaglen = 1
|
|
hshlen = _hshlen
|
|
with open(self.path, 'rb') as f:
|
|
if f.read(len(self.HEADER)) != self.HEADER:
|
|
raise error.CorruptedFileError()
|
|
self.clear(flush=False)
|
|
while True:
|
|
buf = f.read(flaglen)
|
|
if not buf:
|
|
break
|
|
flag = ord(buf)
|
|
rev = len(self._rev2hsh)
|
|
if flag & renameflag:
|
|
path = self._readcstr(f)
|
|
self._renamerevs.append(rev)
|
|
self._renamepaths.append(path)
|
|
hsh = f.read(hshlen)
|
|
if len(hsh) != hshlen:
|
|
raise error.CorruptedFileError()
|
|
self._hsh2rev[hsh] = rev
|
|
self._rev2flag.append(flag)
|
|
self._rev2hsh.append(hsh)
|
|
self._lastmaxrev = self.maxrev
|
|
|
|
def _writerev(self, rev, f):
|
|
"""append a revision data to file"""
|
|
flag = self._rev2flag[rev]
|
|
hsh = self._rev2hsh[rev]
|
|
f.write(struct.pack('B', flag))
|
|
if flag & renameflag:
|
|
path = self.rev2path(rev)
|
|
assert path is not None
|
|
f.write(path + '\0')
|
|
f.write(hsh)
|
|
|
|
@staticmethod
|
|
def _readcstr(f):
|
|
"""read a C-language-like '\0'-terminated string"""
|
|
buf = ''
|
|
while True:
|
|
ch = f.read(1)
|
|
if not ch: # unexpected eof
|
|
raise error.CorruptedFileError()
|
|
if ch == '\0':
|
|
break
|
|
buf += ch
|
|
return buf
|
|
|
|
def __contains__(self, f):
|
|
"""(fctx or (node, path)) -> bool.
|
|
test if (node, path) is in the map, and is not in a side branch.
|
|
f can be either a tuple of (node, path), or a fctx.
|
|
"""
|
|
if isinstance(f, tuple): # f: (node, path)
|
|
hsh, path = f
|
|
else: # f: fctx
|
|
hsh, path = f.node(), f.path()
|
|
rev = self.hsh2rev(hsh)
|
|
if rev is None:
|
|
return False
|
|
if path is not None and path != self.rev2path(rev):
|
|
return False
|
|
return (self.rev2flag(rev) & sidebranchflag) == 0
|