Introduce a new fingerprint field to TextSummary

This is calculated in `Rope` and uses the `bromberg_sl2` homomorphic
hash function to determine the fingerprint of a single chunk and
compose each chunk fingerprint into a single fingerprint for the entire
rope that is equivalent to hashing all the rope's bytes at once.
This commit is contained in:
Antonio Scandurra 2022-06-17 11:45:26 +02:00
parent cef85f5d84
commit c31a233aad
6 changed files with 33 additions and 22 deletions

20
Cargo.lock generated
View File

@ -561,6 +561,18 @@ dependencies = [
"workspace",
]
[[package]]
name = "bromberg_sl2"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ed88064f69518b7e3ea50ecfc1b61d43f19248618a377b95ae5c8b611134d4d"
dependencies = [
"digest 0.9.0",
"lazy_static",
"rayon",
"seq-macro",
]
[[package]]
name = "bstr"
version = "0.2.17"
@ -4156,6 +4168,12 @@ dependencies = [
"pest",
]
[[package]]
name = "seq-macro"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99"
[[package]]
name = "serde"
version = "1.0.137"
@ -4806,9 +4824,11 @@ version = "0.1.0"
dependencies = [
"anyhow",
"arrayvec 0.7.2",
"bromberg_sl2",
"clock",
"collections",
"ctor",
"digest 0.9.0",
"env_logger",
"gpui",
"lazy_static",

View File

@ -370,22 +370,10 @@ impl FoldMap {
if fold.end > fold.start {
let output_text = "";
let chars = output_text.chars().count() as u32;
let lines = Point::new(0, output_text.len() as u32);
let lines_utf16 =
PointUtf16::new(0, output_text.encode_utf16().count() as u32);
new_transforms.push(
Transform {
summary: TransformSummary {
output: TextSummary {
bytes: output_text.len(),
lines,
lines_utf16,
first_line_chars: chars,
last_line_chars: chars,
longest_row: 0,
longest_row_chars: chars,
},
output: TextSummary::from(output_text),
input: new_buffer.text_summary_for_range(fold.start..fold.end),
},
output_text: Some(output_text),

View File

@ -1923,15 +1923,7 @@ impl MultiBufferSnapshot {
);
if range.end > end_before_newline {
summary.add_assign(&D::from_text_summary(&TextSummary {
bytes: 1,
lines: Point::new(1 as u32, 0),
lines_utf16: PointUtf16::new(1 as u32, 0),
first_line_chars: 0,
last_line_chars: 0,
longest_row: 0,
longest_row_chars: 0,
}));
summary.add_assign(&D::from_text_summary(&TextSummary::from("\n")));
}
cursor.next(&());

View File

@ -16,6 +16,8 @@ collections = { path = "../collections" }
sum_tree = { path = "../sum_tree" }
anyhow = "1.0.38"
arrayvec = "0.7.1"
digest = { version = "0.9", features = ["std"] }
bromberg_sl2 = "0.6"
lazy_static = "1.4"
log = { version = "0.4.16", features = ["kv_unstable_serde"] }
parking_lot = "0.11"

View File

@ -2,6 +2,7 @@ use crate::PointUtf16;
use super::Point;
use arrayvec::ArrayString;
use bromberg_sl2::HashMatrix;
use smallvec::SmallVec;
use std::{cmp, fmt, io, mem, ops::Range, str};
use sum_tree::{Bias, Dimension, SumTree};
@ -725,6 +726,7 @@ pub struct TextSummary {
pub last_line_chars: u32,
pub longest_row: u32,
pub longest_row_chars: u32,
pub fingerprint: HashMatrix,
}
impl<'a> From<&'a str> for TextSummary {
@ -764,6 +766,7 @@ impl<'a> From<&'a str> for TextSummary {
last_line_chars,
longest_row,
longest_row_chars,
fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
}
}
}
@ -810,6 +813,7 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
self.bytes += other.bytes;
self.lines += other.lines;
self.lines_utf16 += other.lines_utf16;
self.fingerprint = self.fingerprint * other.fingerprint;
}
}

View File

@ -226,6 +226,7 @@ fn test_text_summary_for_range() {
last_line_chars: 0,
longest_row: 0,
longest_row_chars: 1,
fingerprint: bromberg_sl2::hash_strict(b"b\n")
}
);
assert_eq!(
@ -238,6 +239,7 @@ fn test_text_summary_for_range() {
last_line_chars: 0,
longest_row: 2,
longest_row_chars: 4,
fingerprint: bromberg_sl2::hash_strict(b"b\nefg\nhklm\n")
}
);
assert_eq!(
@ -250,6 +252,7 @@ fn test_text_summary_for_range() {
last_line_chars: 1,
longest_row: 3,
longest_row_chars: 6,
fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\nt")
}
);
assert_eq!(
@ -262,6 +265,7 @@ fn test_text_summary_for_range() {
last_line_chars: 3,
longest_row: 3,
longest_row_chars: 6,
fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\ntuv")
}
);
assert_eq!(
@ -274,6 +278,7 @@ fn test_text_summary_for_range() {
last_line_chars: 3,
longest_row: 1,
longest_row_chars: 6,
fingerprint: bromberg_sl2::hash_strict(b"hklm\nnopqrs\ntuv")
}
);
}