Revert "language: Remove buffer fingerprinting (#9007)"

This reverts commit 6f2f61c9b1.
This commit is contained in:
Conrad Irwin 2024-03-21 14:10:18 -06:00
parent 35e3935e8f
commit caed275fbf
12 changed files with 122 additions and 22 deletions

18
Cargo.lock generated
View File

@ -1567,6 +1567,17 @@ dependencies = [
"workspace",
]
[[package]]
name = "bromberg_sl2"
version = "0.6.0"
source = "git+https://github.com/zed-industries/bromberg_sl2?rev=950bc5482c216c395049ae33ae4501e08975f17f#950bc5482c216c395049ae33ae4501e08975f17f"
dependencies = [
"digest 0.9.0",
"lazy_static",
"rayon",
"seq-macro",
]
[[package]]
name = "bstr"
version = "1.6.2"
@ -7746,6 +7757,7 @@ name = "rope"
version = "0.1.0"
dependencies = [
"arrayvec 0.7.4",
"bromberg_sl2",
"gpui",
"log",
"rand 0.8.5",
@ -8325,6 +8337,12 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
[[package]]
name = "seq-macro"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99"
[[package]]
name = "serde"
version = "1.0.196"

View File

@ -1347,11 +1347,13 @@ impl RandomizedTest for ProjectCollaborationTest {
client.username
);
let host_is_dirty = host_buffer.read_with(host_cx, |b, _| b.is_dirty());
let guest_is_dirty = guest_buffer.read_with(client_cx, |b, _| b.is_dirty());
let host_saved_version_fingerprint =
host_buffer.read_with(host_cx, |b, _| b.saved_version_fingerprint());
let guest_saved_version_fingerprint =
guest_buffer.read_with(client_cx, |b, _| b.saved_version_fingerprint());
assert_eq!(
guest_is_dirty, host_is_dirty,
"guest {} dirty state does not match host's for path {path:?} in project {project_id}",
guest_saved_version_fingerprint, host_saved_version_fingerprint,
"guest {} saved fingerprint does not match host's for path {path:?} in project {project_id}",
client.username
);

View File

@ -1263,6 +1263,7 @@ mod tests {
&self,
_: BufferId,
_: &clock::Global,
_: language::RopeFingerprint,
_: language::LineEnding,
_: Option<std::time::SystemTime>,
_: &mut AppContext,

View File

@ -730,8 +730,9 @@ impl Item for Editor {
buffer
.update(&mut cx, |buffer, cx| {
let version = buffer.saved_version().clone();
let fingerprint = buffer.saved_version_fingerprint();
let mtime = buffer.saved_mtime();
buffer.did_save(version, mtime, cx);
buffer.did_save(version, fingerprint, mtime, cx);
})
.ok();
}

View File

@ -45,9 +45,9 @@ use text::operation_queue::OperationQueue;
use text::*;
pub use text::{
Anchor, Bias, Buffer as TextBuffer, BufferId, BufferSnapshot as TextBufferSnapshot, Edit,
OffsetRangeExt, OffsetUtf16, Patch, Point, PointUtf16, Rope, Selection, SelectionGoal,
Subscription, TextDimension, TextSummary, ToOffset, ToOffsetUtf16, ToPoint, ToPointUtf16,
Transaction, TransactionId, Unclipped,
OffsetRangeExt, OffsetUtf16, Patch, Point, PointUtf16, Rope, RopeFingerprint, Selection,
SelectionGoal, Subscription, TextDimension, TextSummary, ToOffset, ToOffsetUtf16, ToPoint,
ToPointUtf16, Transaction, TransactionId, Unclipped,
};
use theme::SyntaxTheme;
#[cfg(any(test, feature = "test-support"))]
@ -87,6 +87,8 @@ pub struct Buffer {
/// The version vector when this buffer was last loaded from
/// or saved to disk.
saved_version: clock::Global,
/// A hash of the current contents of the buffer's file.
file_fingerprint: RopeFingerprint,
transaction_depth: usize,
was_dirty_before_starting_transaction: Option<bool>,
reload_task: Option<Task<Result<()>>>,
@ -405,6 +407,7 @@ pub trait LocalFile: File {
&self,
buffer_id: BufferId,
version: &clock::Global,
fingerprint: RopeFingerprint,
line_ending: LineEnding,
mtime: Option<SystemTime>,
cx: &mut AppContext,
@ -574,6 +577,7 @@ impl Buffer {
.ok_or_else(|| anyhow!("missing line_ending"))?,
));
this.saved_version = proto::deserialize_version(&message.saved_version);
this.file_fingerprint = proto::deserialize_fingerprint(&message.saved_version_fingerprint)?;
this.saved_mtime = message.saved_mtime.map(|time| time.into());
Ok(this)
}
@ -587,6 +591,7 @@ impl Buffer {
diff_base: self.diff_base.as_ref().map(|h| h.to_string()),
line_ending: proto::serialize_line_ending(self.line_ending()) as i32,
saved_version: proto::serialize_version(&self.saved_version),
saved_version_fingerprint: proto::serialize_fingerprint(self.file_fingerprint),
saved_mtime: self.saved_mtime.map(|time| time.into()),
}
}
@ -666,6 +671,7 @@ impl Buffer {
Self {
saved_mtime,
saved_version: buffer.version(),
file_fingerprint: buffer.as_rope().fingerprint(),
reload_task: None,
transaction_depth: 0,
was_dirty_before_starting_transaction: None,
@ -740,6 +746,11 @@ impl Buffer {
&self.saved_version
}
/// The fingerprint of the buffer's text when the buffer was last saved or reloaded from disk.
pub fn saved_version_fingerprint(&self) -> RopeFingerprint {
self.file_fingerprint
}
/// The mtime of the buffer's file when the buffer was last saved or reloaded from disk.
pub fn saved_mtime(&self) -> Option<SystemTime> {
self.saved_mtime
@ -772,11 +783,13 @@ impl Buffer {
pub fn did_save(
&mut self,
version: clock::Global,
fingerprint: RopeFingerprint,
mtime: Option<SystemTime>,
cx: &mut ModelContext<Self>,
) {
self.saved_version = version;
self.has_conflict = false;
self.file_fingerprint = fingerprint;
self.saved_mtime = mtime;
cx.emit(Event::Saved);
cx.notify();
@ -808,7 +821,13 @@ impl Buffer {
this.apply_diff(diff, cx);
tx.send(this.finalize_last_transaction().cloned()).ok();
this.has_conflict = false;
this.did_reload(this.version(), this.line_ending(), new_mtime, cx);
this.did_reload(
this.version(),
this.as_rope().fingerprint(),
this.line_ending(),
new_mtime,
cx,
);
} else {
if !diff.edits.is_empty()
|| this
@ -819,7 +838,13 @@ impl Buffer {
this.has_conflict = true;
}
this.did_reload(prev_version, this.line_ending(), this.saved_mtime, cx);
this.did_reload(
prev_version,
Rope::text_fingerprint(&new_text),
this.line_ending(),
this.saved_mtime,
cx,
);
}
this.reload_task.take();
@ -832,17 +857,20 @@ impl Buffer {
pub fn did_reload(
&mut self,
version: clock::Global,
fingerprint: RopeFingerprint,
line_ending: LineEnding,
mtime: Option<SystemTime>,
cx: &mut ModelContext<Self>,
) {
self.saved_version = version;
self.file_fingerprint = fingerprint;
self.text.set_line_ending(line_ending);
self.saved_mtime = mtime;
if let Some(file) = self.file.as_ref().and_then(|f| f.as_local()) {
file.buffer_reloaded(
self.remote_id(),
&self.saved_version,
self.file_fingerprint,
self.line_ending(),
self.saved_mtime,
cx,

View File

@ -13,6 +13,17 @@ use text::*;
pub use proto::{BufferState, Operation};
/// Serializes a [`RopeFingerprint`] to be sent over RPC.
pub fn serialize_fingerprint(fingerprint: RopeFingerprint) -> String {
fingerprint.to_hex()
}
/// Deserializes a [`RopeFingerprint`] from the RPC representation.
pub fn deserialize_fingerprint(fingerprint: &str) -> Result<RopeFingerprint> {
RopeFingerprint::from_hex(fingerprint)
.map_err(|error| anyhow!("invalid fingerprint: {}", error))
}
/// Deserializes a `[text::LineEnding]` from the RPC representation.
pub fn deserialize_line_ending(message: proto::LineEnding) -> text::LineEnding {
match message {

View File

@ -37,8 +37,8 @@ use language::{
language_settings::{language_settings, FormatOnSave, Formatter, InlayHintKind},
markdown, point_to_lsp,
proto::{
deserialize_anchor, deserialize_line_ending, deserialize_version, serialize_anchor,
serialize_version, split_operations,
deserialize_anchor, deserialize_fingerprint, deserialize_line_ending, deserialize_version,
serialize_anchor, serialize_version, split_operations,
},
range_from_lsp, Bias, Buffer, BufferSnapshot, CachedLspAdapter, Capability, CodeAction,
CodeLabel, Completion, Diagnostic, DiagnosticEntry, DiagnosticSet, Diff, Documentation,
@ -7922,6 +7922,7 @@ impl Project {
buffer_id: buffer_id.into(),
version: serialize_version(buffer.saved_version()),
mtime: buffer.saved_mtime().map(|time| time.into()),
fingerprint: language::proto::serialize_fingerprint(buffer.saved_version_fingerprint()),
})
}
@ -8014,6 +8015,9 @@ impl Project {
buffer_id: buffer_id.into(),
version: language::proto::serialize_version(buffer.saved_version()),
mtime: buffer.saved_mtime().map(|time| time.into()),
fingerprint: language::proto::serialize_fingerprint(
buffer.saved_version_fingerprint(),
),
line_ending: language::proto::serialize_line_ending(
buffer.line_ending(),
) as i32,
@ -8890,6 +8894,7 @@ impl Project {
_: Arc<Client>,
mut cx: AsyncAppContext,
) -> Result<()> {
let fingerprint = deserialize_fingerprint(&envelope.payload.fingerprint)?;
let version = deserialize_version(&envelope.payload.version);
let buffer_id = BufferId::new(envelope.payload.buffer_id)?;
let mtime = envelope.payload.mtime.map(|time| time.into());
@ -8906,7 +8911,7 @@ impl Project {
});
if let Some(buffer) = buffer {
buffer.update(cx, |buffer, cx| {
buffer.did_save(version, mtime, cx);
buffer.did_save(version, fingerprint, mtime, cx);
});
}
Ok(())
@ -8921,6 +8926,7 @@ impl Project {
) -> Result<()> {
let payload = envelope.payload;
let version = deserialize_version(&payload.version);
let fingerprint = deserialize_fingerprint(&payload.fingerprint)?;
let line_ending = deserialize_line_ending(
proto::LineEnding::from_i32(payload.line_ending)
.ok_or_else(|| anyhow!("missing line ending"))?,
@ -8940,7 +8946,7 @@ impl Project {
});
if let Some(buffer) = buffer {
buffer.update(cx, |buffer, cx| {
buffer.did_reload(version, line_ending, mtime, cx);
buffer.did_reload(version, fingerprint, line_ending, mtime, cx);
});
}
Ok(())

View File

@ -3118,7 +3118,12 @@ async fn test_buffer_is_dirty(cx: &mut gpui::TestAppContext) {
&[language::Event::Edited, language::Event::DirtyChanged]
);
events.lock().clear();
buffer.did_save(buffer.version(), buffer.file().unwrap().mtime(), cx);
buffer.did_save(
buffer.version(),
buffer.as_rope().fingerprint(),
buffer.file().unwrap().mtime(),
cx,
);
});
// after saving, the buffer is not dirty, and emits a saved event.

View File

@ -13,6 +13,7 @@ path = "src/rope.rs"
[dependencies]
arrayvec = "0.7.1"
bromberg_sl2 = { git = "https://github.com/zed-industries/bromberg_sl2", rev = "950bc5482c216c395049ae33ae4501e08975f17f" }
log.workspace = true
smallvec.workspace = true
sum_tree.workspace = true

View File

@ -4,6 +4,7 @@ mod point_utf16;
mod unclipped;
use arrayvec::ArrayString;
use bromberg_sl2::HashMatrix;
use smallvec::SmallVec;
use std::{
cmp, fmt, io, mem,
@ -24,6 +25,12 @@ const CHUNK_BASE: usize = 6;
#[cfg(not(test))]
const CHUNK_BASE: usize = 16;
/// Type alias to [`HashMatrix`], an implementation of a homomorphic hash function. Two [`Rope`] instances
/// containing the same text will produce the same fingerprint. This hash function is special in that
/// it allows us to hash individual chunks and aggregate them up the [`Rope`]'s tree, with the resulting
/// hash being equivalent to hashing all the text contained in the [`Rope`] at once.
pub type RopeFingerprint = HashMatrix;
#[derive(Clone, Default)]
pub struct Rope {
chunks: SumTree<Chunk>,
@ -34,6 +41,10 @@ impl Rope {
Self::default()
}
pub fn text_fingerprint(text: &str) -> RopeFingerprint {
bromberg_sl2::hash_strict(text.as_bytes())
}
pub fn append(&mut self, rope: Rope) {
let mut chunks = rope.chunks.cursor::<()>();
chunks.next(&());
@ -412,6 +423,10 @@ impl Rope {
self.clip_point(Point::new(row, u32::MAX), Bias::Left)
.column
}
pub fn fingerprint(&self) -> RopeFingerprint {
self.chunks.summary().fingerprint
}
}
impl<'a> From<&'a str> for Rope {
@ -962,12 +977,14 @@ impl sum_tree::Item for Chunk {
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct ChunkSummary {
text: TextSummary,
fingerprint: RopeFingerprint,
}
impl<'a> From<&'a str> for ChunkSummary {
fn from(text: &'a str) -> Self {
Self {
text: TextSummary::from(text),
fingerprint: Rope::text_fingerprint(text),
}
}
}
@ -977,6 +994,7 @@ impl sum_tree::Summary for ChunkSummary {
fn add_summary(&mut self, summary: &Self, _: &()) {
self.text += &summary.text;
self.fingerprint = self.fingerprint * summary.fingerprint;
}
}

View File

@ -674,7 +674,7 @@ message BufferSaved {
uint64 buffer_id = 2;
repeated VectorClockEntry version = 3;
Timestamp mtime = 4;
reserved 5;
string fingerprint = 5;
}
message BufferReloaded {
@ -682,7 +682,7 @@ message BufferReloaded {
uint64 buffer_id = 2;
repeated VectorClockEntry version = 3;
Timestamp mtime = 4;
reserved 5;
string fingerprint = 5;
LineEnding line_ending = 6;
}
@ -1502,7 +1502,7 @@ message BufferState {
optional string diff_base = 4;
LineEnding line_ending = 5;
repeated VectorClockEntry saved_version = 6;
reserved 7;
string saved_version_fingerprint = 7;
Timestamp saved_mtime = 8;
}

View File

@ -31,8 +31,12 @@ use gpui::{
use ignore::IgnoreStack;
use itertools::Itertools;
use language::{
proto::{deserialize_version, serialize_line_ending, serialize_version},
Buffer, Capability, DiagnosticEntry, File as _, LineEnding, PointUtf16, Rope, Unclipped,
proto::{
deserialize_fingerprint, deserialize_version, serialize_fingerprint, serialize_line_ending,
serialize_version,
},
Buffer, Capability, DiagnosticEntry, File as _, LineEnding, PointUtf16, Rope, RopeFingerprint,
Unclipped,
};
use lsp::{DiagnosticSeverity, LanguageServerId};
use parking_lot::Mutex;
@ -1147,6 +1151,7 @@ impl LocalWorktree {
}
let text = buffer.as_rope().clone();
let fingerprint = text.fingerprint();
let version = buffer.version();
let save = self.write_file(path.as_ref(), text, buffer.line_ending(), cx);
let fs = Arc::clone(&self.fs);
@ -1209,11 +1214,12 @@ impl LocalWorktree {
buffer_id,
version: serialize_version(&version),
mtime: mtime.map(|time| time.into()),
fingerprint: serialize_fingerprint(fingerprint),
})?;
}
buffer_handle.update(&mut cx, |buffer, cx| {
buffer.did_save(version.clone(), mtime, cx);
buffer.did_save(version.clone(), fingerprint, mtime, cx);
})?;
Ok(())
@ -1614,10 +1620,11 @@ impl RemoteWorktree {
})
.await?;
let version = deserialize_version(&response.version);
let fingerprint = deserialize_fingerprint(&response.fingerprint)?;
let mtime = response.mtime.map(|mtime| mtime.into());
buffer_handle.update(&mut cx, |buffer, cx| {
buffer.did_save(version.clone(), mtime, cx);
buffer.did_save(version.clone(), fingerprint, mtime, cx);
})?;
Ok(())
@ -2999,6 +3006,7 @@ impl language::LocalFile for File {
&self,
buffer_id: BufferId,
version: &clock::Global,
fingerprint: RopeFingerprint,
line_ending: LineEnding,
mtime: Option<SystemTime>,
cx: &mut AppContext,
@ -3012,6 +3020,7 @@ impl language::LocalFile for File {
buffer_id: buffer_id.into(),
version: serialize_version(version),
mtime: mtime.map(|time| time.into()),
fingerprint: serialize_fingerprint(fingerprint),
line_ending: serialize_line_ending(line_ending) as i32,
})
.log_err();