Improved binary diff from Christopher Li

This is more intelligent/efficient by combining neighboring inserts,
replaces and deletes. Passes test of converting kernel repo, but
doesn't appear to substantially affect compression or performance.
This commit is contained in:
mpm@selenic.com 2005-05-27 19:38:34 -08:00
parent dbf12abbe4
commit b43cebfe40

View File

@ -19,28 +19,25 @@ def textdiff(a, b):
def sortdiff(a, b): def sortdiff(a, b):
la = lb = 0 la = lb = 0
lena = len(a)
lenb = len(b)
while 1: while 1:
if la >= len(a) or lb >= len(b): break am, bm, = la, lb
if b[lb] < a[la]: while lb < lenb and la < len and a[la] == b[lb] :
si = lb
while lb < len(b) and b[lb] < a[la] : lb += 1
yield "insert", la, la, si, lb
elif a[la] < b[lb]:
si = la
while la < len(a) and a[la] < b[lb]: la += 1
yield "delete", si, la, lb, lb
else:
la += 1 la += 1
lb += 1 lb += 1
if la>am: yield (am, bm, la-am)
if lb < len(b): while lb < lenb and b[lb] < a[la]: lb += 1
yield "insert", la, la, lb, len(b) if lb>=lenb: break
while la < lena and b[lb] > a[la]: la += 1
if la < len(a): if la>=lena: break
yield "delete", la, len(a), lb, lb yield (lena, lenb, 0)
def diff(a, b, sorted=0): def diff(a, b, sorted=0):
if not a:
s = "".join(b)
return s and (struct.pack(">lll", 0, 0, len(s)) + s)
bin = [] bin = []
p = [0] p = [0]
for i in a: p.append(p[-1] + len(i)) for i in a: p.append(p[-1] + len(i))
@ -48,13 +45,16 @@ def diff(a, b, sorted=0):
if sorted: if sorted:
d = sortdiff(a, b) d = sortdiff(a, b)
else: else:
d = difflib.SequenceMatcher(None, a, b).get_opcodes() d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
la = 0
for o, m, n, s, t in d: lb = 0
if o == 'equal': continue for am, bm, size in d:
s = "".join(b[s:t]) s = "".join(b[lb:bm])
bin.append(struct.pack(">lll", p[m], p[n], len(s)) + s) if am > la or s:
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
la = am + size
lb = bm + size
return "".join(bin) return "".join(bin)
def patchtext(bin): def patchtext(bin):