mirror of
https://github.com/facebook/sapling.git
synced 2025-01-07 14:10:42 +03:00
Improved binary diff from Christopher Li
This is more intelligent/efficient by combining neighboring inserts, replaces and deletes. Passes test of converting kernel repo, but doesn't appear to substantially affect compression or performance.
This commit is contained in:
parent
dbf12abbe4
commit
b43cebfe40
@ -19,28 +19,25 @@ def textdiff(a, b):
|
|||||||
|
|
||||||
def sortdiff(a, b):
|
def sortdiff(a, b):
|
||||||
la = lb = 0
|
la = lb = 0
|
||||||
|
lena = len(a)
|
||||||
|
lenb = len(b)
|
||||||
while 1:
|
while 1:
|
||||||
if la >= len(a) or lb >= len(b): break
|
am, bm, = la, lb
|
||||||
if b[lb] < a[la]:
|
while lb < lenb and la < len and a[la] == b[lb] :
|
||||||
si = lb
|
|
||||||
while lb < len(b) and b[lb] < a[la] : lb += 1
|
|
||||||
yield "insert", la, la, si, lb
|
|
||||||
elif a[la] < b[lb]:
|
|
||||||
si = la
|
|
||||||
while la < len(a) and a[la] < b[lb]: la += 1
|
|
||||||
yield "delete", si, la, lb, lb
|
|
||||||
else:
|
|
||||||
la += 1
|
la += 1
|
||||||
lb += 1
|
lb += 1
|
||||||
|
if la>am: yield (am, bm, la-am)
|
||||||
if lb < len(b):
|
while lb < lenb and b[lb] < a[la]: lb += 1
|
||||||
yield "insert", la, la, lb, len(b)
|
if lb>=lenb: break
|
||||||
|
while la < lena and b[lb] > a[la]: la += 1
|
||||||
if la < len(a):
|
if la>=lena: break
|
||||||
yield "delete", la, len(a), lb, lb
|
yield (lena, lenb, 0)
|
||||||
|
|
||||||
def diff(a, b, sorted=0):
|
def diff(a, b, sorted=0):
|
||||||
|
if not a:
|
||||||
|
s = "".join(b)
|
||||||
|
return s and (struct.pack(">lll", 0, 0, len(s)) + s)
|
||||||
|
|
||||||
bin = []
|
bin = []
|
||||||
p = [0]
|
p = [0]
|
||||||
for i in a: p.append(p[-1] + len(i))
|
for i in a: p.append(p[-1] + len(i))
|
||||||
@ -48,13 +45,16 @@ def diff(a, b, sorted=0):
|
|||||||
if sorted:
|
if sorted:
|
||||||
d = sortdiff(a, b)
|
d = sortdiff(a, b)
|
||||||
else:
|
else:
|
||||||
d = difflib.SequenceMatcher(None, a, b).get_opcodes()
|
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
|
||||||
|
la = 0
|
||||||
for o, m, n, s, t in d:
|
lb = 0
|
||||||
if o == 'equal': continue
|
for am, bm, size in d:
|
||||||
s = "".join(b[s:t])
|
s = "".join(b[lb:bm])
|
||||||
bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s)
|
if am > la or s:
|
||||||
|
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
|
||||||
|
la = am + size
|
||||||
|
lb = bm + size
|
||||||
|
|
||||||
return "".join(bin)
|
return "".join(bin)
|
||||||
|
|
||||||
def patchtext(bin):
|
def patchtext(bin):
|
||||||
|
Loading…
Reference in New Issue
Block a user