bdiff: tweaks for large files

- adjust the common line threshold to .1%
  this speeds up a delta of 7M lines of source from 10m to 40s
- adjust the scaling of the hash array down a bit as it was raising the peak
  memory usage significantly
This commit is contained in:
Matt Mackall 2007-10-11 00:46:56 -05:00
parent 81a40fb9b2
commit 0c1648fb37

View File

@ -106,19 +106,19 @@ int inline cmp(struct line *a, struct line *b)
static int equatelines(struct line *a, int an, struct line *b, int bn)
{
int i, j, buckets = 1, t;
int scale = 32;
struct pos *h;
int i, j, buckets = 1, t, scale;
struct pos *h = NULL;
/* build a hash table of the next highest power of 2 */
while (buckets < bn + 1)
buckets *= 2;
/* try to allocate a large hash table to avoid collisions */
do {
scale /= 2;
for (scale = 4; scale; scale /= 2) {
h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
} while (!h && scale != 1);
if (h)
break;
}
if (!h)
return 0;
@ -147,7 +147,7 @@ static int equatelines(struct line *a, int an, struct line *b, int bn)
}
/* compute popularity threshold */
t = (bn >= 200) ? bn / 100 : bn + 1;
t = (bn >= 4000) ? bn / 1000 : bn + 1;
/* match items in a to their equivalence class in b */
for (i = 0; i < an; i++) {