From caed275fbf58af9c3ac89ef11229d0a6bd13016f Mon Sep 17 00:00:00 2001 From: Conrad Irwin Date: Thu, 21 Mar 2024 14:10:18 -0600 Subject: [PATCH] Revert "language: Remove buffer fingerprinting (#9007)" This reverts commit 6f2f61c9b1bf7a7285ba2a27d10c4c7a6022e670. --- Cargo.lock | 18 +++++++++ .../random_project_collaboration_tests.rs | 10 +++-- crates/copilot/src/copilot.rs | 1 + crates/editor/src/items.rs | 3 +- crates/language/src/buffer.rs | 38 ++++++++++++++++--- crates/language/src/proto.rs | 11 ++++++ crates/project/src/project.rs | 14 +++++-- crates/project/src/project_tests.rs | 7 +++- crates/rope/Cargo.toml | 1 + crates/rope/src/rope.rs | 18 +++++++++ crates/rpc/proto/zed.proto | 6 +-- crates/worktree/src/worktree.rs | 17 +++++++-- 12 files changed, 122 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04bfc860bf..c6bd889ea7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1567,6 +1567,17 @@ dependencies = [ "workspace", ] +[[package]] +name = "bromberg_sl2" +version = "0.6.0" +source = "git+https://github.com/zed-industries/bromberg_sl2?rev=950bc5482c216c395049ae33ae4501e08975f17f#950bc5482c216c395049ae33ae4501e08975f17f" +dependencies = [ + "digest 0.9.0", + "lazy_static", + "rayon", + "seq-macro", +] + [[package]] name = "bstr" version = "1.6.2" @@ -7746,6 +7757,7 @@ name = "rope" version = "0.1.0" dependencies = [ "arrayvec 0.7.4", + "bromberg_sl2", "gpui", "log", "rand 0.8.5", @@ -8325,6 +8337,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +[[package]] +name = "seq-macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99" + [[package]] name = "serde" version = "1.0.196" diff --git a/crates/collab/src/tests/random_project_collaboration_tests.rs b/crates/collab/src/tests/random_project_collaboration_tests.rs index d0dafa8299..008aed8880 100644 --- a/crates/collab/src/tests/random_project_collaboration_tests.rs +++ b/crates/collab/src/tests/random_project_collaboration_tests.rs @@ -1347,11 +1347,13 @@ impl RandomizedTest for ProjectCollaborationTest { client.username ); - let host_is_dirty = host_buffer.read_with(host_cx, |b, _| b.is_dirty()); - let guest_is_dirty = guest_buffer.read_with(client_cx, |b, _| b.is_dirty()); + let host_saved_version_fingerprint = + host_buffer.read_with(host_cx, |b, _| b.saved_version_fingerprint()); + let guest_saved_version_fingerprint = + guest_buffer.read_with(client_cx, |b, _| b.saved_version_fingerprint()); assert_eq!( - guest_is_dirty, host_is_dirty, - "guest {} dirty state does not match host's for path {path:?} in project {project_id}", + guest_saved_version_fingerprint, host_saved_version_fingerprint, + "guest {} saved fingerprint does not match host's for path {path:?} in project {project_id}", client.username ); diff --git a/crates/copilot/src/copilot.rs b/crates/copilot/src/copilot.rs index e1d9d5f977..71d9554ac6 100644 --- a/crates/copilot/src/copilot.rs +++ b/crates/copilot/src/copilot.rs @@ -1263,6 +1263,7 @@ mod tests { &self, _: BufferId, _: &clock::Global, + _: language::RopeFingerprint, _: language::LineEnding, _: Option, _: &mut AppContext, diff --git a/crates/editor/src/items.rs b/crates/editor/src/items.rs index 102e654bba..944f63133d 100644 --- a/crates/editor/src/items.rs +++ b/crates/editor/src/items.rs @@ -730,8 +730,9 @@ impl Item for Editor { buffer .update(&mut cx, |buffer, cx| { let version = buffer.saved_version().clone(); + let fingerprint = buffer.saved_version_fingerprint(); let mtime = buffer.saved_mtime(); - buffer.did_save(version, mtime, cx); + buffer.did_save(version, fingerprint, mtime, cx); }) .ok(); } diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index c473385827..9bbba14474 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -45,9 +45,9 @@ use text::operation_queue::OperationQueue; use text::*; pub use text::{ Anchor, Bias, Buffer as TextBuffer, BufferId, BufferSnapshot as TextBufferSnapshot, Edit, - OffsetRangeExt, OffsetUtf16, Patch, Point, PointUtf16, Rope, Selection, SelectionGoal, - Subscription, TextDimension, TextSummary, ToOffset, ToOffsetUtf16, ToPoint, ToPointUtf16, - Transaction, TransactionId, Unclipped, + OffsetRangeExt, OffsetUtf16, Patch, Point, PointUtf16, Rope, RopeFingerprint, Selection, + SelectionGoal, Subscription, TextDimension, TextSummary, ToOffset, ToOffsetUtf16, ToPoint, + ToPointUtf16, Transaction, TransactionId, Unclipped, }; use theme::SyntaxTheme; #[cfg(any(test, feature = "test-support"))] @@ -87,6 +87,8 @@ pub struct Buffer { /// The version vector when this buffer was last loaded from /// or saved to disk. saved_version: clock::Global, + /// A hash of the current contents of the buffer's file. + file_fingerprint: RopeFingerprint, transaction_depth: usize, was_dirty_before_starting_transaction: Option, reload_task: Option>>, @@ -405,6 +407,7 @@ pub trait LocalFile: File { &self, buffer_id: BufferId, version: &clock::Global, + fingerprint: RopeFingerprint, line_ending: LineEnding, mtime: Option, cx: &mut AppContext, @@ -574,6 +577,7 @@ impl Buffer { .ok_or_else(|| anyhow!("missing line_ending"))?, )); this.saved_version = proto::deserialize_version(&message.saved_version); + this.file_fingerprint = proto::deserialize_fingerprint(&message.saved_version_fingerprint)?; this.saved_mtime = message.saved_mtime.map(|time| time.into()); Ok(this) } @@ -587,6 +591,7 @@ impl Buffer { diff_base: self.diff_base.as_ref().map(|h| h.to_string()), line_ending: proto::serialize_line_ending(self.line_ending()) as i32, saved_version: proto::serialize_version(&self.saved_version), + saved_version_fingerprint: proto::serialize_fingerprint(self.file_fingerprint), saved_mtime: self.saved_mtime.map(|time| time.into()), } } @@ -666,6 +671,7 @@ impl Buffer { Self { saved_mtime, saved_version: buffer.version(), + file_fingerprint: buffer.as_rope().fingerprint(), reload_task: None, transaction_depth: 0, was_dirty_before_starting_transaction: None, @@ -740,6 +746,11 @@ impl Buffer { &self.saved_version } + /// The fingerprint of the buffer's text when the buffer was last saved or reloaded from disk. + pub fn saved_version_fingerprint(&self) -> RopeFingerprint { + self.file_fingerprint + } + /// The mtime of the buffer's file when the buffer was last saved or reloaded from disk. pub fn saved_mtime(&self) -> Option { self.saved_mtime @@ -772,11 +783,13 @@ impl Buffer { pub fn did_save( &mut self, version: clock::Global, + fingerprint: RopeFingerprint, mtime: Option, cx: &mut ModelContext, ) { self.saved_version = version; self.has_conflict = false; + self.file_fingerprint = fingerprint; self.saved_mtime = mtime; cx.emit(Event::Saved); cx.notify(); @@ -808,7 +821,13 @@ impl Buffer { this.apply_diff(diff, cx); tx.send(this.finalize_last_transaction().cloned()).ok(); this.has_conflict = false; - this.did_reload(this.version(), this.line_ending(), new_mtime, cx); + this.did_reload( + this.version(), + this.as_rope().fingerprint(), + this.line_ending(), + new_mtime, + cx, + ); } else { if !diff.edits.is_empty() || this @@ -819,7 +838,13 @@ impl Buffer { this.has_conflict = true; } - this.did_reload(prev_version, this.line_ending(), this.saved_mtime, cx); + this.did_reload( + prev_version, + Rope::text_fingerprint(&new_text), + this.line_ending(), + this.saved_mtime, + cx, + ); } this.reload_task.take(); @@ -832,17 +857,20 @@ impl Buffer { pub fn did_reload( &mut self, version: clock::Global, + fingerprint: RopeFingerprint, line_ending: LineEnding, mtime: Option, cx: &mut ModelContext, ) { self.saved_version = version; + self.file_fingerprint = fingerprint; self.text.set_line_ending(line_ending); self.saved_mtime = mtime; if let Some(file) = self.file.as_ref().and_then(|f| f.as_local()) { file.buffer_reloaded( self.remote_id(), &self.saved_version, + self.file_fingerprint, self.line_ending(), self.saved_mtime, cx, diff --git a/crates/language/src/proto.rs b/crates/language/src/proto.rs index ec26c87c7c..eae72092ba 100644 --- a/crates/language/src/proto.rs +++ b/crates/language/src/proto.rs @@ -13,6 +13,17 @@ use text::*; pub use proto::{BufferState, Operation}; +/// Serializes a [`RopeFingerprint`] to be sent over RPC. +pub fn serialize_fingerprint(fingerprint: RopeFingerprint) -> String { + fingerprint.to_hex() +} + +/// Deserializes a [`RopeFingerprint`] from the RPC representation. +pub fn deserialize_fingerprint(fingerprint: &str) -> Result { + RopeFingerprint::from_hex(fingerprint) + .map_err(|error| anyhow!("invalid fingerprint: {}", error)) +} + /// Deserializes a `[text::LineEnding]` from the RPC representation. pub fn deserialize_line_ending(message: proto::LineEnding) -> text::LineEnding { match message { diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 7d984525f3..44db1aa40d 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -37,8 +37,8 @@ use language::{ language_settings::{language_settings, FormatOnSave, Formatter, InlayHintKind}, markdown, point_to_lsp, proto::{ - deserialize_anchor, deserialize_line_ending, deserialize_version, serialize_anchor, - serialize_version, split_operations, + deserialize_anchor, deserialize_fingerprint, deserialize_line_ending, deserialize_version, + serialize_anchor, serialize_version, split_operations, }, range_from_lsp, Bias, Buffer, BufferSnapshot, CachedLspAdapter, Capability, CodeAction, CodeLabel, Completion, Diagnostic, DiagnosticEntry, DiagnosticSet, Diff, Documentation, @@ -7922,6 +7922,7 @@ impl Project { buffer_id: buffer_id.into(), version: serialize_version(buffer.saved_version()), mtime: buffer.saved_mtime().map(|time| time.into()), + fingerprint: language::proto::serialize_fingerprint(buffer.saved_version_fingerprint()), }) } @@ -8014,6 +8015,9 @@ impl Project { buffer_id: buffer_id.into(), version: language::proto::serialize_version(buffer.saved_version()), mtime: buffer.saved_mtime().map(|time| time.into()), + fingerprint: language::proto::serialize_fingerprint( + buffer.saved_version_fingerprint(), + ), line_ending: language::proto::serialize_line_ending( buffer.line_ending(), ) as i32, @@ -8890,6 +8894,7 @@ impl Project { _: Arc, mut cx: AsyncAppContext, ) -> Result<()> { + let fingerprint = deserialize_fingerprint(&envelope.payload.fingerprint)?; let version = deserialize_version(&envelope.payload.version); let buffer_id = BufferId::new(envelope.payload.buffer_id)?; let mtime = envelope.payload.mtime.map(|time| time.into()); @@ -8906,7 +8911,7 @@ impl Project { }); if let Some(buffer) = buffer { buffer.update(cx, |buffer, cx| { - buffer.did_save(version, mtime, cx); + buffer.did_save(version, fingerprint, mtime, cx); }); } Ok(()) @@ -8921,6 +8926,7 @@ impl Project { ) -> Result<()> { let payload = envelope.payload; let version = deserialize_version(&payload.version); + let fingerprint = deserialize_fingerprint(&payload.fingerprint)?; let line_ending = deserialize_line_ending( proto::LineEnding::from_i32(payload.line_ending) .ok_or_else(|| anyhow!("missing line ending"))?, @@ -8940,7 +8946,7 @@ impl Project { }); if let Some(buffer) = buffer { buffer.update(cx, |buffer, cx| { - buffer.did_reload(version, line_ending, mtime, cx); + buffer.did_reload(version, fingerprint, line_ending, mtime, cx); }); } Ok(()) diff --git a/crates/project/src/project_tests.rs b/crates/project/src/project_tests.rs index 987ef4356c..ecdb46f603 100644 --- a/crates/project/src/project_tests.rs +++ b/crates/project/src/project_tests.rs @@ -3118,7 +3118,12 @@ async fn test_buffer_is_dirty(cx: &mut gpui::TestAppContext) { &[language::Event::Edited, language::Event::DirtyChanged] ); events.lock().clear(); - buffer.did_save(buffer.version(), buffer.file().unwrap().mtime(), cx); + buffer.did_save( + buffer.version(), + buffer.as_rope().fingerprint(), + buffer.file().unwrap().mtime(), + cx, + ); }); // after saving, the buffer is not dirty, and emits a saved event. diff --git a/crates/rope/Cargo.toml b/crates/rope/Cargo.toml index f3ae40ffc6..b1ce369416 100644 --- a/crates/rope/Cargo.toml +++ b/crates/rope/Cargo.toml @@ -13,6 +13,7 @@ path = "src/rope.rs" [dependencies] arrayvec = "0.7.1" +bromberg_sl2 = { git = "https://github.com/zed-industries/bromberg_sl2", rev = "950bc5482c216c395049ae33ae4501e08975f17f" } log.workspace = true smallvec.workspace = true sum_tree.workspace = true diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 6b44b84494..77e86e3031 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -4,6 +4,7 @@ mod point_utf16; mod unclipped; use arrayvec::ArrayString; +use bromberg_sl2::HashMatrix; use smallvec::SmallVec; use std::{ cmp, fmt, io, mem, @@ -24,6 +25,12 @@ const CHUNK_BASE: usize = 6; #[cfg(not(test))] const CHUNK_BASE: usize = 16; +/// Type alias to [`HashMatrix`], an implementation of a homomorphic hash function. Two [`Rope`] instances +/// containing the same text will produce the same fingerprint. This hash function is special in that +/// it allows us to hash individual chunks and aggregate them up the [`Rope`]'s tree, with the resulting +/// hash being equivalent to hashing all the text contained in the [`Rope`] at once. +pub type RopeFingerprint = HashMatrix; + #[derive(Clone, Default)] pub struct Rope { chunks: SumTree, @@ -34,6 +41,10 @@ impl Rope { Self::default() } + pub fn text_fingerprint(text: &str) -> RopeFingerprint { + bromberg_sl2::hash_strict(text.as_bytes()) + } + pub fn append(&mut self, rope: Rope) { let mut chunks = rope.chunks.cursor::<()>(); chunks.next(&()); @@ -412,6 +423,10 @@ impl Rope { self.clip_point(Point::new(row, u32::MAX), Bias::Left) .column } + + pub fn fingerprint(&self) -> RopeFingerprint { + self.chunks.summary().fingerprint + } } impl<'a> From<&'a str> for Rope { @@ -962,12 +977,14 @@ impl sum_tree::Item for Chunk { #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct ChunkSummary { text: TextSummary, + fingerprint: RopeFingerprint, } impl<'a> From<&'a str> for ChunkSummary { fn from(text: &'a str) -> Self { Self { text: TextSummary::from(text), + fingerprint: Rope::text_fingerprint(text), } } } @@ -977,6 +994,7 @@ impl sum_tree::Summary for ChunkSummary { fn add_summary(&mut self, summary: &Self, _: &()) { self.text += &summary.text; + self.fingerprint = self.fingerprint * summary.fingerprint; } } diff --git a/crates/rpc/proto/zed.proto b/crates/rpc/proto/zed.proto index d5cf33f2e3..b69c7bbae7 100644 --- a/crates/rpc/proto/zed.proto +++ b/crates/rpc/proto/zed.proto @@ -674,7 +674,7 @@ message BufferSaved { uint64 buffer_id = 2; repeated VectorClockEntry version = 3; Timestamp mtime = 4; - reserved 5; + string fingerprint = 5; } message BufferReloaded { @@ -682,7 +682,7 @@ message BufferReloaded { uint64 buffer_id = 2; repeated VectorClockEntry version = 3; Timestamp mtime = 4; - reserved 5; + string fingerprint = 5; LineEnding line_ending = 6; } @@ -1502,7 +1502,7 @@ message BufferState { optional string diff_base = 4; LineEnding line_ending = 5; repeated VectorClockEntry saved_version = 6; - reserved 7; + string saved_version_fingerprint = 7; Timestamp saved_mtime = 8; } diff --git a/crates/worktree/src/worktree.rs b/crates/worktree/src/worktree.rs index fb37e6b313..2ad7436096 100644 --- a/crates/worktree/src/worktree.rs +++ b/crates/worktree/src/worktree.rs @@ -31,8 +31,12 @@ use gpui::{ use ignore::IgnoreStack; use itertools::Itertools; use language::{ - proto::{deserialize_version, serialize_line_ending, serialize_version}, - Buffer, Capability, DiagnosticEntry, File as _, LineEnding, PointUtf16, Rope, Unclipped, + proto::{ + deserialize_fingerprint, deserialize_version, serialize_fingerprint, serialize_line_ending, + serialize_version, + }, + Buffer, Capability, DiagnosticEntry, File as _, LineEnding, PointUtf16, Rope, RopeFingerprint, + Unclipped, }; use lsp::{DiagnosticSeverity, LanguageServerId}; use parking_lot::Mutex; @@ -1147,6 +1151,7 @@ impl LocalWorktree { } let text = buffer.as_rope().clone(); + let fingerprint = text.fingerprint(); let version = buffer.version(); let save = self.write_file(path.as_ref(), text, buffer.line_ending(), cx); let fs = Arc::clone(&self.fs); @@ -1209,11 +1214,12 @@ impl LocalWorktree { buffer_id, version: serialize_version(&version), mtime: mtime.map(|time| time.into()), + fingerprint: serialize_fingerprint(fingerprint), })?; } buffer_handle.update(&mut cx, |buffer, cx| { - buffer.did_save(version.clone(), mtime, cx); + buffer.did_save(version.clone(), fingerprint, mtime, cx); })?; Ok(()) @@ -1614,10 +1620,11 @@ impl RemoteWorktree { }) .await?; let version = deserialize_version(&response.version); + let fingerprint = deserialize_fingerprint(&response.fingerprint)?; let mtime = response.mtime.map(|mtime| mtime.into()); buffer_handle.update(&mut cx, |buffer, cx| { - buffer.did_save(version.clone(), mtime, cx); + buffer.did_save(version.clone(), fingerprint, mtime, cx); })?; Ok(()) @@ -2999,6 +3006,7 @@ impl language::LocalFile for File { &self, buffer_id: BufferId, version: &clock::Global, + fingerprint: RopeFingerprint, line_ending: LineEnding, mtime: Option, cx: &mut AppContext, @@ -3012,6 +3020,7 @@ impl language::LocalFile for File { buffer_id: buffer_id.into(), version: serialize_version(version), mtime: mtime.map(|time| time.into()), + fingerprint: serialize_fingerprint(fingerprint), line_ending: serialize_line_ending(line_ending) as i32, }) .log_err();