bdiff: tweaks for large files

- adjust the common line threshold to .1% this speeds up a delta of 7M lines of source from 10m to 40s - adjust the scaling of the hash array down a bit as it was raising the peak memory usage significantly
2024-10-07 07:17:55 +03:00 · 2007-10-11 00:46:56 -05:00 · 2007-10-11 00:46:56 -05:00 · 0c1648fb37
commit 0c1648fb37
parent 81a40fb9b2
1 changed files with 7 additions and 7 deletions
--- a/mercurial/bdiff.c
+++ b/mercurial/bdiff.c
@ -106,19 +106,19 @@ int inline cmp(struct line *a, struct line *b)

 static int equatelines(struct line *a, int an, struct line *b, int bn)
 {
-	int i, j, buckets = 1, t;
-	int scale = 32;
-	struct pos *h;
+	int i, j, buckets = 1, t, scale;
+	struct pos *h = NULL;

 	/* build a hash table of the next highest power of 2 */
 	while (buckets < bn + 1)
 		buckets *= 2;

 	/* try to allocate a large hash table to avoid collisions */
-	do {
-		scale /= 2;
+	for (scale = 4; scale; scale /= 2) {
 		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
-	} while (!h && scale != 1);
+		if (h)
+			break;
+	}

 	if (!h)
 		return 0;
@ -147,7 +147,7 @@ static int equatelines(struct line *a, int an, struct line *b, int bn)
 	}

 	/* compute popularity threshold */
-	t = (bn >= 200) ? bn / 100 : bn + 1;
+	t = (bn >= 4000) ? bn / 1000 : bn + 1;

 	/* match items in a to their equivalence class in b */
 	for (i = 0; i < an; i++) {