fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
# Copyright 2016-present Facebook. All Rights Reserved.
|
|
|
|
#
|
|
|
|
# revmap: trivial hg hash - linelog rev bidirectional map
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
2016-09-19 12:51:54 +03:00
|
|
|
import bisect
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
import os
|
|
|
|
import struct
|
|
|
|
|
2016-09-08 16:47:49 +03:00
|
|
|
from fastannotate import error
|
|
|
|
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
# the revmap file format is straightforward:
|
|
|
|
#
|
|
|
|
# 8 bytes: header
|
|
|
|
# 1 byte : flag for linelog revision 1
|
2016-09-19 12:51:54 +03:00
|
|
|
# ? bytes: (optional) '\0'-terminated path string
|
|
|
|
# only exists if (flag & renameflag) != 0
|
|
|
|
# 20 bytes: hg hash for linelog revision 1
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
# 1 byte : flag for linelog revision 2
|
2016-09-19 12:51:54 +03:00
|
|
|
# ? bytes: (optional) '\0'-terminated path string
|
|
|
|
# 20 bytes: hg hash for linelog revision 2
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
# ....
|
|
|
|
#
|
|
|
|
# the implementation is kinda stupid: __init__ loads the whole revmap.
|
|
|
|
# no laziness. benchmark shows loading 10000 revisions is about 0.015
|
|
|
|
# seconds, which looks enough for our use-case. if this implementation
|
|
|
|
# becomes a bottleneck, we can change it to lazily read the file
|
|
|
|
# from the end.
|
|
|
|
|
2016-09-07 17:05:55 +03:00
|
|
|
# whether the changeset is in the side branch. i.e. not in the linear main
|
|
|
|
# branch but only got referenced by lines in merge changesets.
|
|
|
|
sidebranchflag = 1
|
|
|
|
|
2016-09-19 12:51:54 +03:00
|
|
|
# whether the changeset changes the file path (ie. is a rename)
|
|
|
|
renameflag = 2
|
|
|
|
|
|
|
|
# len(mercurial.node.nullid)
|
|
|
|
_hshlen = 20
|
|
|
|
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
class revmap(object):
|
|
|
|
"""trivial hg bin hash - linelog rev bidirectional map
|
|
|
|
|
2016-09-19 12:51:54 +03:00
|
|
|
also stores a flag (uint8) for each revision, and track renames.
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
"""
|
|
|
|
|
2016-09-19 12:51:54 +03:00
|
|
|
HEADER = b'REVMAP1\0'
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
|
|
|
|
def __init__(self, path=None):
|
|
|
|
"""create or load the revmap, optionally associate to a file
|
|
|
|
|
|
|
|
if path is None, the revmap is entirely in-memory. the caller is
|
|
|
|
responsible for locking. concurrent writes to a same file is unsafe.
|
|
|
|
the caller needs to make sure one file is associated to at most one
|
|
|
|
revmap object at a time."""
|
|
|
|
self.path = path
|
|
|
|
self._rev2hsh = [None]
|
|
|
|
self._rev2flag = [None]
|
|
|
|
self._hsh2rev = {}
|
2016-09-19 12:51:54 +03:00
|
|
|
# since rename does not happen frequently, do not store path for every
|
|
|
|
# revision. self._renamerevs can be used for bisecting.
|
|
|
|
self._renamerevs = [0]
|
|
|
|
self._renamepaths = ['']
|
2016-09-08 23:01:47 +03:00
|
|
|
self._lastmaxrev = -1
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
if path:
|
|
|
|
if os.path.exists(path):
|
|
|
|
self._load()
|
|
|
|
else:
|
|
|
|
# write the header so "append" can do incremental updates
|
|
|
|
self.flush()
|
|
|
|
|
2016-09-11 18:44:57 +03:00
|
|
|
def copyfrom(self, rhs):
|
|
|
|
"""copy the map data from another revmap. do not affect self.path"""
|
|
|
|
self._rev2hsh = rhs._rev2hsh[:]
|
|
|
|
self._rev2flag = rhs._rev2flag[:]
|
|
|
|
self._hsh2rev = rhs._hsh2rev.copy()
|
|
|
|
self._renamerevs = rhs._renamerevs[:]
|
|
|
|
self._renamepaths = rhs._renamepaths[:]
|
|
|
|
self._lastmaxrev = -1
|
|
|
|
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
@property
|
|
|
|
def maxrev(self):
|
|
|
|
"""return max linelog revision number"""
|
|
|
|
return len(self._rev2hsh) - 1
|
|
|
|
|
2016-09-19 12:51:54 +03:00
|
|
|
def append(self, hsh, sidebranch=False, path=None, flush=False):
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
"""add a binary hg hash and return the mapped linelog revision.
|
|
|
|
if flush is True, incrementally update the file.
|
|
|
|
"""
|
|
|
|
assert hsh not in self._hsh2rev
|
2016-09-19 12:51:54 +03:00
|
|
|
assert len(hsh) == _hshlen
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
idx = len(self._rev2hsh)
|
2016-09-19 12:51:54 +03:00
|
|
|
flag = 0
|
|
|
|
if sidebranch:
|
|
|
|
flag |= sidebranchflag
|
|
|
|
if path is not None and path != self._renamepaths[-1]:
|
|
|
|
flag |= renameflag
|
|
|
|
self._renamerevs.append(idx)
|
|
|
|
self._renamepaths.append(path)
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
self._rev2hsh.append(hsh)
|
|
|
|
self._rev2flag.append(flag)
|
|
|
|
self._hsh2rev[hsh] = idx
|
|
|
|
if flush and self.path: # incremental update
|
|
|
|
with open(self.path, 'a') as f:
|
2016-09-19 12:51:54 +03:00
|
|
|
self._writerev(idx, f)
|
2016-09-08 23:01:47 +03:00
|
|
|
self._lastmaxrev = self.maxrev
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
return idx
|
|
|
|
|
|
|
|
def rev2hsh(self, rev):
|
|
|
|
"""convert linelog revision to hg hash. return None if not found."""
|
|
|
|
if rev > self.maxrev or rev < 0:
|
|
|
|
return None
|
|
|
|
return self._rev2hsh[rev]
|
|
|
|
|
|
|
|
def rev2flag(self, rev):
|
|
|
|
"""get the flag (uint8) for a given linelog revision.
|
|
|
|
return None if revision does not exist.
|
|
|
|
"""
|
|
|
|
if rev > self.maxrev or rev < 0:
|
|
|
|
return None
|
|
|
|
return self._rev2flag[rev]
|
|
|
|
|
2016-09-19 12:51:54 +03:00
|
|
|
def rev2path(self, rev):
|
|
|
|
"""get the path for a given linelog revision.
|
|
|
|
return None if revision does not exist.
|
|
|
|
"""
|
|
|
|
if rev > self.maxrev or rev < 0:
|
|
|
|
return None
|
|
|
|
idx = bisect.bisect_right(self._renamerevs, rev) - 1
|
|
|
|
return self._renamepaths[idx]
|
|
|
|
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
def hsh2rev(self, hsh):
|
|
|
|
"""convert hg hash to linelog revision. return None if not found."""
|
|
|
|
return self._hsh2rev.get(hsh)
|
|
|
|
|
|
|
|
def clear(self, flush=False):
|
|
|
|
"""make the map empty. if flush is True, write to disk"""
|
|
|
|
# rev 0 is reserved, real rev starts from 1
|
|
|
|
self._rev2hsh = [None]
|
|
|
|
self._rev2flag = [None]
|
|
|
|
self._hsh2rev = {}
|
2016-09-19 12:51:54 +03:00
|
|
|
self._rev2path = ['']
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
if flush:
|
|
|
|
self.flush()
|
|
|
|
|
|
|
|
def flush(self):
|
|
|
|
"""write the state down to the file"""
|
2016-09-08 23:01:47 +03:00
|
|
|
if not self.path or self.maxrev == self._lastmaxrev: # nothing changed
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
return
|
|
|
|
with open(self.path, 'wb') as f:
|
|
|
|
f.write(self.HEADER)
|
2016-09-19 12:51:54 +03:00
|
|
|
for i in xrange(1, len(self._rev2hsh)):
|
|
|
|
self._writerev(i, f)
|
2016-09-08 23:01:47 +03:00
|
|
|
self._lastmaxrev = self.maxrev
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
|
|
|
|
def _load(self):
|
|
|
|
"""load state from file"""
|
|
|
|
if not self.path:
|
|
|
|
return
|
2016-09-19 12:51:54 +03:00
|
|
|
# use local variables in a loop. CPython uses LOAD_FAST for them,
|
|
|
|
# which is faster than both LOAD_CONST and LOAD_GLOBAL.
|
|
|
|
flaglen = 1
|
|
|
|
hshlen = _hshlen
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
with open(self.path, 'rb') as f:
|
|
|
|
if f.read(len(self.HEADER)) != self.HEADER:
|
2016-09-08 16:47:49 +03:00
|
|
|
raise error.CorruptedFileError()
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
self.clear(flush=False)
|
|
|
|
while True:
|
2016-09-19 12:51:54 +03:00
|
|
|
buf = f.read(flaglen)
|
|
|
|
if not buf:
|
fastannotate: implement a simple revision map
Summary:
To use linelog, we need to be able to translate between hg commit hashes and
linelog revision numbers. This diff implements such a revmap using the most
direct way.
The revmap also contains an extra "flag" for each revision, which will be used
to mark if the revision is in the main branch or not, to handle merge commits.
Test Plan:
`import revmap` from IPython and test its interface manually. Also have a
simple script to get some idea about its perf with 10000 revisions:
```
import contextlib, time, os, random, revmap, sys
@contextlib.contextmanager
def benchmark(msg):
sys.stderr.write('%s: ' % msg)
t1 = time.time()
yield
t2 = time.time()
sys.stderr.write('%f seconds\n' % (t2 - t1))
def randomid():
return ''.join([chr(random.randint(0,255)) for _ in xrange(0, 20)])
rm = revmap.revmap('revmap1')
with benchmark('insert 10000 random revisions'): # ~0.3 seconds
for i in xrange(0, 10000):
rm.append(randomid(), flag=1, flush=False)
with benchmark('writing to disk'): # 0.02 seconds
rm.flush()
os.rename('revmap1', 'revmap2')
with benchmark('loading'): # ~0.015 seconds
rm = revmap.revmap('revmap2')
```
Reviewers: ttung, #sourcecontrol, ikostia
Reviewed By: ikostia
Subscribers: ikostia, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3709706
Signature: t1:3709706:1471936489:0bbe35ed39a2af3f06e1000c4f9674149ad43995
2016-08-23 18:24:13 +03:00
|
|
|
break
|
2016-09-19 12:51:54 +03:00
|
|
|
flag = ord(buf)
|
|
|
|
rev = len(self._rev2hsh)
|
|
|
|
if flag & renameflag:
|
|
|
|
path = self._readcstr(f)
|
|
|
|
self._renamerevs.append(rev)
|
|
|
|
self._renamepaths.append(path)
|
|
|
|
hsh = f.read(hshlen)
|
|
|
|
if len(hsh) != hshlen:
|
2016-09-08 16:47:49 +03:00
|
|
|
raise error.CorruptedFileError()
|
2016-09-19 12:51:54 +03:00
|
|
|
self._hsh2rev[hsh] = rev
|
|
|
|
self._rev2flag.append(flag)
|
|
|
|
self._rev2hsh.append(hsh)
|
2016-09-08 23:01:47 +03:00
|
|
|
self._lastmaxrev = self.maxrev
|
2016-09-07 17:05:55 +03:00
|
|
|
|
2016-09-19 12:51:54 +03:00
|
|
|
def _writerev(self, rev, f):
|
|
|
|
"""append a revision data to file"""
|
|
|
|
flag = self._rev2flag[rev]
|
|
|
|
hsh = self._rev2hsh[rev]
|
|
|
|
f.write(struct.pack('B', flag))
|
|
|
|
if flag & renameflag:
|
|
|
|
path = self.rev2path(rev)
|
|
|
|
assert path is not None
|
|
|
|
f.write(path + '\0')
|
|
|
|
f.write(hsh)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _readcstr(f):
|
|
|
|
"""read a C-language-like '\0'-terminated string"""
|
|
|
|
buf = ''
|
|
|
|
while True:
|
|
|
|
ch = f.read(1)
|
|
|
|
if not ch: # unexpected eof
|
|
|
|
raise error.CorruptedFileError()
|
|
|
|
if ch == '\0':
|
|
|
|
break
|
|
|
|
buf += ch
|
|
|
|
return buf
|
|
|
|
|
2016-09-07 17:05:55 +03:00
|
|
|
def __contains__(self, f):
|
|
|
|
"""(fctx or node) -> bool.
|
|
|
|
test if f is in the map and is not in a side branch.
|
|
|
|
"""
|
|
|
|
if isinstance(f, str):
|
|
|
|
hsh = f
|
|
|
|
else:
|
|
|
|
hsh = f.node()
|
|
|
|
rev = self.hsh2rev(hsh)
|
|
|
|
if rev is None:
|
|
|
|
return False
|
|
|
|
return (self.rev2flag(rev) & sidebranchflag) == 0
|