diff --git a/Cargo.lock b/Cargo.lock index a2e0e61f24..1f7e017431 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -420,7 +420,6 @@ dependencies = [ "serde_json", "serde_json_lenient", "settings", - "similar", "smol", "telemetry_events", "terminal", diff --git a/crates/assistant/Cargo.toml b/crates/assistant/Cargo.toml index ca6b4085fc..524f7f2bef 100644 --- a/crates/assistant/Cargo.toml +++ b/crates/assistant/Cargo.toml @@ -65,7 +65,6 @@ semantic_index.workspace = true serde.workspace = true serde_json.workspace = true settings.workspace = true -similar.workspace = true smol.workspace = true telemetry_events.workspace = true terminal.workspace = true diff --git a/crates/assistant/src/inline_assistant.rs b/crates/assistant/src/inline_assistant.rs index 8a33ec9069..3e5ac1ba68 100644 --- a/crates/assistant/src/inline_assistant.rs +++ b/crates/assistant/src/inline_assistant.rs @@ -1,6 +1,6 @@ use crate::{ humanize_token_count, prompts::generate_content_prompt, AssistantPanel, AssistantPanelEvent, - Hunk, ModelSelector, StreamingDiff, + CharOperation, LineDiff, LineOperation, ModelSelector, StreamingDiff, }; use anyhow::{anyhow, Context as _, Result}; use client::{telemetry::Telemetry, ErrorExt}; @@ -35,7 +35,6 @@ use multi_buffer::MultiBufferRow; use parking_lot::Mutex; use rope::Rope; use settings::Settings; -use similar::TextDiff; use smol::future::FutureExt; use std::{ cmp, @@ -2033,8 +2032,6 @@ pub enum CodegenStatus { #[derive(Default)] struct Diff { - task: Option>, - should_update: bool, deleted_row_ranges: Vec<(Anchor, RangeInclusive)>, inserted_row_ranges: Vec>, } @@ -2264,7 +2261,7 @@ impl Codegen { async move { let chunks = stream.await; let generate = async { - let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1); + let (mut diff_tx, mut diff_rx) = mpsc::channel(1); let diff: Task> = cx.background_executor().spawn(async move { let mut response_latency = None; @@ -2273,6 +2270,7 @@ impl Codegen { let chunks = StripInvalidSpans::new(chunks?); futures::pin_mut!(chunks); let mut diff = StreamingDiff::new(selected_text.to_string()); + let mut line_diff = LineDiff::default(); let mut new_text = String::new(); let mut base_indent = None; @@ -2323,12 +2321,22 @@ impl Codegen { } if line_indent.is_some() { - hunks_tx.send(diff.push_new(&new_text)).await?; + let char_ops = diff.push_new(&new_text); + line_diff + .push_char_operations(&char_ops, &selected_text); + diff_tx + .send((char_ops, line_diff.line_operations())) + .await?; new_text.clear(); } if lines.peek().is_some() { - hunks_tx.send(diff.push_new("\n")).await?; + let char_ops = diff.push_new("\n"); + line_diff + .push_char_operations(&char_ops, &selected_text); + diff_tx + .send((char_ops, line_diff.line_operations())) + .await?; if line_indent.is_none() { // Don't write out the leading indentation in empty lines on the next line // This is the case where the above if statement didn't clear the buffer @@ -2339,8 +2347,14 @@ impl Codegen { } } } - hunks_tx.send(diff.push_new(&new_text)).await?; - hunks_tx.send(diff.finish()).await?; + + let mut char_ops = diff.push_new(&new_text); + char_ops.extend(diff.finish()); + line_diff.push_char_operations(&char_ops, &selected_text); + line_diff.finish(&selected_text); + diff_tx + .send((char_ops, line_diff.line_operations())) + .await?; anyhow::Ok(()) }; @@ -2363,7 +2377,7 @@ impl Codegen { Ok(()) }); - while let Some(hunks) = hunks_rx.next().await { + while let Some((char_ops, line_diff)) = diff_rx.next().await { this.update(&mut cx, |this, cx| { this.last_equal_ranges.clear(); @@ -2373,27 +2387,29 @@ impl Codegen { buffer.start_transaction(cx); buffer.edit( - hunks.into_iter().filter_map(|hunk| match hunk { - Hunk::Insert { text } => { - let edit_start = snapshot.anchor_after(edit_start); - Some((edit_start..edit_start, text)) - } - Hunk::Remove { len } => { - let edit_end = edit_start + len; - let edit_range = snapshot.anchor_after(edit_start) - ..snapshot.anchor_before(edit_end); - edit_start = edit_end; - Some((edit_range, String::new())) - } - Hunk::Keep { len } => { - let edit_end = edit_start + len; - let edit_range = snapshot.anchor_after(edit_start) - ..snapshot.anchor_before(edit_end); - edit_start = edit_end; - this.last_equal_ranges.push(edit_range); - None - } - }), + char_ops + .into_iter() + .filter_map(|operation| match operation { + CharOperation::Insert { text } => { + let edit_start = snapshot.anchor_after(edit_start); + Some((edit_start..edit_start, text)) + } + CharOperation::Delete { bytes } => { + let edit_end = edit_start + bytes; + let edit_range = snapshot.anchor_after(edit_start) + ..snapshot.anchor_before(edit_end); + edit_start = edit_end; + Some((edit_range, String::new())) + } + CharOperation::Keep { bytes } => { + let edit_end = edit_start + bytes; + let edit_range = snapshot.anchor_after(edit_start) + ..snapshot.anchor_before(edit_end); + edit_start = edit_end; + this.last_equal_ranges.push(edit_range); + None + } + }), None, cx, ); @@ -2421,7 +2437,8 @@ impl Codegen { } } - this.update_diff(edit_range.clone(), cx); + this.update_diff(edit_range.clone(), line_diff, cx); + cx.notify(); })?; } @@ -2468,102 +2485,63 @@ impl Codegen { }); } - fn update_diff(&mut self, edit_range: Range, cx: &mut ModelContext) { - if self.diff.task.is_some() { - self.diff.should_update = true; - } else { - self.diff.should_update = false; + fn update_diff( + &mut self, + edit_range: Range, + line_operations: Vec, + cx: &mut ModelContext, + ) { + let old_snapshot = self.snapshot.clone(); + let old_range = edit_range.to_point(&old_snapshot); + let new_snapshot = self.buffer.read(cx).snapshot(cx); + let new_range = edit_range.to_point(&new_snapshot); - let old_snapshot = self.snapshot.clone(); - let old_range = edit_range.to_point(&old_snapshot); - let new_snapshot = self.buffer.read(cx).snapshot(cx); - let new_range = edit_range.to_point(&new_snapshot); + let mut old_row = old_range.start.row; + let mut new_row = new_range.start.row; - self.diff.task = Some(cx.spawn(|this, mut cx| async move { - let (deleted_row_ranges, inserted_row_ranges) = cx - .background_executor() - .spawn(async move { - let old_text = old_snapshot - .text_for_range( - Point::new(old_range.start.row, 0) - ..Point::new( - old_range.end.row, - old_snapshot.line_len(MultiBufferRow(old_range.end.row)), - ), - ) - .collect::(); - let new_text = new_snapshot - .text_for_range( - Point::new(new_range.start.row, 0) - ..Point::new( - new_range.end.row, - new_snapshot.line_len(MultiBufferRow(new_range.end.row)), - ), - ) - .collect::(); + self.diff.deleted_row_ranges.clear(); + self.diff.inserted_row_ranges.clear(); + for operation in line_operations { + match operation { + LineOperation::Keep { lines } => { + old_row += lines; + new_row += lines; + } + LineOperation::Delete { lines } => { + let old_end_row = old_row + lines - 1; + let new_row = new_snapshot.anchor_before(Point::new(new_row, 0)); - let mut old_row = old_range.start.row; - let mut new_row = new_range.start.row; - let diff = TextDiff::from_lines(old_text.as_str(), new_text.as_str()); - - let mut deleted_row_ranges: Vec<(Anchor, RangeInclusive)> = Vec::new(); - let mut inserted_row_ranges = Vec::new(); - for change in diff.iter_all_changes() { - let line_count = change.value().lines().count() as u32; - match change.tag() { - similar::ChangeTag::Equal => { - old_row += line_count; - new_row += line_count; - } - similar::ChangeTag::Delete => { - let old_end_row = old_row + line_count - 1; - let new_row = - new_snapshot.anchor_before(Point::new(new_row, 0)); - - if let Some((_, last_deleted_row_range)) = - deleted_row_ranges.last_mut() - { - if *last_deleted_row_range.end() + 1 == old_row { - *last_deleted_row_range = - *last_deleted_row_range.start()..=old_end_row; - } else { - deleted_row_ranges - .push((new_row, old_row..=old_end_row)); - } - } else { - deleted_row_ranges.push((new_row, old_row..=old_end_row)); - } - - old_row += line_count; - } - similar::ChangeTag::Insert => { - let new_end_row = new_row + line_count - 1; - let start = new_snapshot.anchor_before(Point::new(new_row, 0)); - let end = new_snapshot.anchor_before(Point::new( - new_end_row, - new_snapshot.line_len(MultiBufferRow(new_end_row)), - )); - inserted_row_ranges.push(start..=end); - new_row += line_count; - } - } + if let Some((_, last_deleted_row_range)) = + self.diff.deleted_row_ranges.last_mut() + { + if *last_deleted_row_range.end() + 1 == old_row { + *last_deleted_row_range = *last_deleted_row_range.start()..=old_end_row; + } else { + self.diff + .deleted_row_ranges + .push((new_row, old_row..=old_end_row)); } - - (deleted_row_ranges, inserted_row_ranges) - }) - .await; - - this.update(&mut cx, |this, cx| { - this.diff.deleted_row_ranges = deleted_row_ranges; - this.diff.inserted_row_ranges = inserted_row_ranges; - this.diff.task = None; - if this.diff.should_update { - this.update_diff(edit_range, cx); + } else { + self.diff + .deleted_row_ranges + .push((new_row, old_row..=old_end_row)); } - cx.notify(); - }) - .ok(); - })); + + old_row += lines; + } + LineOperation::Insert { lines } => { + let new_end_row = new_row + lines - 1; + let start = new_snapshot.anchor_before(Point::new(new_row, 0)); + let end = new_snapshot.anchor_before(Point::new( + new_end_row, + new_snapshot.line_len(MultiBufferRow(new_end_row)), + )); + self.diff.inserted_row_ranges.push(start..=end); + new_row += lines; + } + } + + cx.notify(); } } } diff --git a/crates/assistant/src/streaming_diff.rs b/crates/assistant/src/streaming_diff.rs index cba7758dde..8383e081d5 100644 --- a/crates/assistant/src/streaming_diff.rs +++ b/crates/assistant/src/streaming_diff.rs @@ -1,5 +1,6 @@ -use collections::HashMap; use ordered_float::OrderedFloat; +use rope::{Point, Rope, TextSummary}; +use std::collections::{BTreeSet, HashMap}; use std::{ cmp, fmt::{self, Debug}, @@ -64,11 +65,11 @@ impl Debug for Matrix { } } -#[derive(Debug)] -pub enum Hunk { +#[derive(Debug, Clone)] +pub enum CharOperation { Insert { text: String }, - Remove { len: usize }, - Keep { len: usize }, + Delete { bytes: usize }, + Keep { bytes: usize }, } pub struct StreamingDiff { @@ -103,7 +104,7 @@ impl StreamingDiff { } } - pub fn push_new(&mut self, text: &str) -> Vec { + pub fn push_new(&mut self, text: &str) -> Vec { self.new.extend(text.chars()); self.scores.resize(self.old.len() + 1, self.new.len() + 1); @@ -145,7 +146,7 @@ impl StreamingDiff { hunks } - fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec { + fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec { let mut pending_insert: Option> = None; let mut hunks = Vec::new(); let mut i = old_text_ix; @@ -185,22 +186,22 @@ impl StreamingDiff { } } else { if let Some(range) = pending_insert.take() { - hunks.push(Hunk::Insert { + hunks.push(CharOperation::Insert { text: self.new[range].iter().collect(), }); } let char_len = self.old[i - 1].len_utf8(); if prev_i == i - 1 && prev_j == j { - if let Some(Hunk::Remove { len }) = hunks.last_mut() { + if let Some(CharOperation::Delete { bytes: len }) = hunks.last_mut() { *len += char_len; } else { - hunks.push(Hunk::Remove { len: char_len }) + hunks.push(CharOperation::Delete { bytes: char_len }) } - } else if let Some(Hunk::Keep { len }) = hunks.last_mut() { + } else if let Some(CharOperation::Keep { bytes: len }) = hunks.last_mut() { *len += char_len; } else { - hunks.push(Hunk::Keep { len: char_len }) + hunks.push(CharOperation::Keep { bytes: char_len }) } } @@ -209,7 +210,7 @@ impl StreamingDiff { } if let Some(range) = pending_insert.take() { - hunks.push(Hunk::Insert { + hunks.push(CharOperation::Insert { text: self.new[range].iter().collect(), }); } @@ -218,74 +219,853 @@ impl StreamingDiff { hunks } - pub fn finish(self) -> Vec { + pub fn finish(self) -> Vec { self.backtrack(self.old.len(), self.new.len()) } } -#[cfg(test)] -mod tests { - use std::env; +#[derive(Debug, Clone, PartialEq)] +pub enum LineOperation { + Insert { lines: u32 }, + Delete { lines: u32 }, + Keep { lines: u32 }, +} - use super::*; - use rand::prelude::*; +#[derive(Debug, Default)] +pub struct LineDiff { + inserted_newline_at_end: bool, + /// The extent of kept and deleted text. + old_end: Point, + /// The extent of kept and inserted text. + new_end: Point, + /// Deleted rows, expressed in terms of the old text. + deleted_rows: BTreeSet, + /// Inserted rows, expressed in terms of the new text. + inserted_rows: BTreeSet, + buffered_insert: String, + /// After deleting a newline, we buffer deletion until we keep or insert a character. + buffered_delete: usize, +} - #[gpui::test(iterations = 100)] - fn test_random_diffs(mut rng: StdRng) { - let old_text_len = env::var("OLD_TEXT_LEN") - .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable")) - .unwrap_or(10); - let new_text_len = env::var("NEW_TEXT_LEN") - .map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable")) - .unwrap_or(10); - - let old = util::RandomCharIter::new(&mut rng) - .take(old_text_len) - .collect::(); - log::info!("old text: {:?}", old); - - let mut diff = StreamingDiff::new(old.clone()); - let mut hunks = Vec::new(); - let mut new_len = 0; - let mut new = String::new(); - while new_len < new_text_len { - let new_chunk_len = rng.gen_range(1..=new_text_len - new_len); - let new_chunk = util::RandomCharIter::new(&mut rng) - .take(new_len) - .collect::(); - log::info!("new chunk: {:?}", new_chunk); - new_len += new_chunk_len; - new.push_str(&new_chunk); - let new_hunks = diff.push_new(&new_chunk); - log::info!("hunks: {:?}", new_hunks); - hunks.extend(new_hunks); +impl LineDiff { + pub fn push_char_operations<'a>( + &mut self, + operations: impl IntoIterator, + old_text: &Rope, + ) { + for operation in operations { + self.push_char_operation(operation, old_text); } - let final_hunks = diff.finish(); - log::info!("final hunks: {:?}", final_hunks); - hunks.extend(final_hunks); + } - log::info!("new text: {:?}", new); - let mut old_ix = 0; - let mut new_ix = 0; - let mut patched = String::new(); - for hunk in hunks { - match hunk { - Hunk::Keep { len } => { - assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]); - patched.push_str(&old[old_ix..old_ix + len]); - old_ix += len; - new_ix += len; + pub fn push_char_operation(&mut self, operation: &CharOperation, old_text: &Rope) { + match operation { + CharOperation::Insert { text } => { + self.flush_delete(old_text); + + if is_line_start(self.old_end) { + if let Some(newline_ix) = text.rfind('\n') { + let (prefix, suffix) = text.split_at(newline_ix + 1); + self.buffered_insert.push_str(prefix); + self.flush_insert(old_text); + self.buffered_insert.push_str(suffix); + } else { + self.buffered_insert.push_str(&text); + } + } else { + self.buffered_insert.push_str(&text); + if !text.ends_with('\n') { + self.flush_insert(old_text); + } } - Hunk::Remove { len } => { - old_ix += len; + } + CharOperation::Delete { bytes } => { + self.buffered_delete += bytes; + + let common_suffix_len = self.trim_buffered_end(old_text); + self.flush_insert(old_text); + + if common_suffix_len > 0 || !is_line_end(self.old_end, old_text) { + self.flush_delete(old_text); + self.keep(common_suffix_len, old_text); } - Hunk::Insert { text } => { - assert_eq!(text, &new[new_ix..new_ix + text.len()]); - patched.push_str(&text); - new_ix += text.len(); + } + CharOperation::Keep { bytes } => { + self.flush_delete(old_text); + self.flush_insert(old_text); + self.keep(*bytes, old_text); + } + } + } + + fn flush_insert(&mut self, old_text: &Rope) { + if self.buffered_insert.is_empty() { + return; + } + + let new_start = self.new_end; + let lines = TextSummary::from(self.buffered_insert.as_str()).lines; + self.new_end += lines; + + if is_line_start(self.old_end) { + if self.new_end.column == 0 { + self.inserted_rows.extend(new_start.row..self.new_end.row); + } else { + self.deleted_rows.insert(self.old_end.row); + self.inserted_rows.extend(new_start.row..=self.new_end.row); + } + } else if is_line_end(self.old_end, old_text) { + if self.buffered_insert.starts_with('\n') { + self.inserted_rows + .extend(new_start.row + 1..=self.new_end.row); + self.inserted_newline_at_end = true; + } else { + if !self.inserted_newline_at_end { + self.deleted_rows.insert(self.old_end.row); + } + self.inserted_rows.extend(new_start.row..=self.new_end.row); + } + } else { + self.deleted_rows.insert(self.old_end.row); + self.inserted_rows.extend(new_start.row..=self.new_end.row); + } + + self.buffered_insert.clear(); + } + + fn flush_delete(&mut self, old_text: &Rope) { + if self.buffered_delete == 0 { + return; + } + + let old_start = self.old_end; + self.old_end = + old_text.offset_to_point(old_text.point_to_offset(self.old_end) + self.buffered_delete); + + if is_line_end(old_start, old_text) && is_line_end(self.old_end, old_text) { + self.deleted_rows + .extend(old_start.row + 1..=self.old_end.row); + } else if is_line_start(old_start) + && (is_line_start(self.old_end) && self.old_end < old_text.max_point()) + && self.new_end.column == 0 + { + self.deleted_rows.extend(old_start.row..self.old_end.row); + } else { + self.inserted_rows.insert(self.new_end.row); + self.deleted_rows.extend(old_start.row..=self.old_end.row); + } + + self.inserted_newline_at_end = false; + self.buffered_delete = 0; + } + + fn keep(&mut self, bytes: usize, old_text: &Rope) { + if bytes == 0 { + return; + } + + let lines = + old_text.offset_to_point(old_text.point_to_offset(self.old_end) + bytes) - self.old_end; + self.old_end += lines; + self.new_end += lines; + self.inserted_newline_at_end = false; + } + + fn trim_buffered_end(&mut self, old_text: &Rope) -> usize { + let old_start_offset = old_text.point_to_offset(self.old_end); + let old_end_offset = old_start_offset + self.buffered_delete; + + let new_chars = self.buffered_insert.chars().rev(); + let old_chars = old_text + .chunks_in_range(old_start_offset..old_end_offset) + .flat_map(|chunk| chunk.chars().rev()); + + let mut common_suffix_len = 0; + for (new_ch, old_ch) in new_chars.zip(old_chars) { + if new_ch == old_ch { + common_suffix_len += new_ch.len_utf8(); + } else { + break; + } + } + + self.buffered_delete -= common_suffix_len; + self.buffered_insert + .truncate(self.buffered_insert.len() - common_suffix_len); + + common_suffix_len + } + + pub fn finish(&mut self, old_text: &Rope) { + self.flush_insert(old_text); + self.flush_delete(old_text); + + let old_start = self.old_end; + self.old_end = old_text.max_point(); + self.new_end += self.old_end - old_start; + } + + pub fn line_operations(&self) -> Vec { + let mut ops = Vec::new(); + let mut deleted_rows = self.deleted_rows.iter().copied().peekable(); + let mut inserted_rows = self.inserted_rows.iter().copied().peekable(); + let mut old_row = 0; + let mut new_row = 0; + + while deleted_rows.peek().is_some() || inserted_rows.peek().is_some() { + // Check for a run of deleted lines at current old row. + if Some(old_row) == deleted_rows.peek().copied() { + if let Some(LineOperation::Delete { lines }) = ops.last_mut() { + *lines += 1; + } else { + ops.push(LineOperation::Delete { lines: 1 }); + } + old_row += 1; + deleted_rows.next(); + } else if Some(new_row) == inserted_rows.peek().copied() { + if let Some(LineOperation::Insert { lines }) = ops.last_mut() { + *lines += 1; + } else { + ops.push(LineOperation::Insert { lines: 1 }); + } + new_row += 1; + inserted_rows.next(); + } else { + // Keep lines until the next deletion, insertion, or the end of the old text. + let lines_to_next_deletion = inserted_rows + .peek() + .copied() + .unwrap_or(self.new_end.row + 1) + - new_row; + let lines_to_next_insertion = + deleted_rows.peek().copied().unwrap_or(self.old_end.row + 1) - old_row; + let kept_lines = + cmp::max(1, cmp::min(lines_to_next_insertion, lines_to_next_deletion)); + if kept_lines > 0 { + ops.push(LineOperation::Keep { lines: kept_lines }); + old_row += kept_lines; + new_row += kept_lines; } } } - assert_eq!(patched, new); + + if old_row < self.old_end.row + 1 { + ops.push(LineOperation::Keep { + lines: self.old_end.row + 1 - old_row, + }); + } + + ops + } +} + +fn is_line_start(point: Point) -> bool { + point.column == 0 +} + +fn is_line_end(point: Point, text: &Rope) -> bool { + text.line_len(point.row) == point.column +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::prelude::*; + use std::env; + + #[test] + fn test_delete_first_of_two_lines() { + let old_text = "aaaa\nbbbb"; + let char_ops = vec![ + CharOperation::Delete { bytes: 5 }, + CharOperation::Keep { bytes: 4 }, + ]; + let expected_line_ops = vec![ + LineOperation::Delete { lines: 1 }, + LineOperation::Keep { lines: 1 }, + ]; + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &expected_line_ops) + ); + + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!(line_ops, expected_line_ops); + } + + #[test] + fn test_delete_second_of_two_lines() { + let old_text = "aaaa\nbbbb"; + let char_ops = vec![ + CharOperation::Keep { bytes: 5 }, + CharOperation::Delete { bytes: 4 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Keep { lines: 1 }, + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 1 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_add_new_line() { + let old_text = "aaaa\nbbbb"; + let char_ops = vec![ + CharOperation::Keep { bytes: 9 }, + CharOperation::Insert { + text: "\ncccc".into(), + }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Keep { lines: 2 }, + LineOperation::Insert { lines: 1 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_delete_line_in_middle() { + let old_text = "aaaa\nbbbb\ncccc"; + let char_ops = vec![ + CharOperation::Keep { bytes: 5 }, + CharOperation::Delete { bytes: 5 }, + CharOperation::Keep { bytes: 4 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Keep { lines: 1 }, + LineOperation::Delete { lines: 1 }, + LineOperation::Keep { lines: 1 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_replace_line() { + let old_text = "aaaa\nbbbb\ncccc"; + let char_ops = vec![ + CharOperation::Keep { bytes: 5 }, + CharOperation::Delete { bytes: 4 }, + CharOperation::Insert { + text: "BBBB".into(), + }, + CharOperation::Keep { bytes: 5 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Keep { lines: 1 }, + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 1 }, + LineOperation::Keep { lines: 1 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_multiple_edits_on_different_lines() { + let old_text = "aaaa\nbbbb\ncccc\ndddd"; + let char_ops = vec![ + CharOperation::Insert { text: "A".into() }, + CharOperation::Keep { bytes: 9 }, + CharOperation::Delete { bytes: 5 }, + CharOperation::Keep { bytes: 4 }, + CharOperation::Insert { + text: "\nEEEE".into(), + }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 1 }, + LineOperation::Keep { lines: 1 }, + LineOperation::Delete { lines: 2 }, + LineOperation::Insert { lines: 2 }, + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_edit_at_end_of_line() { + let old_text = "aaaa\nbbbb\ncccc"; + let char_ops = vec![ + CharOperation::Keep { bytes: 4 }, + CharOperation::Insert { text: "A".into() }, + CharOperation::Keep { bytes: 10 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 1 }, + LineOperation::Keep { lines: 2 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_insert_newline_character() { + let old_text = "aaaabbbb"; + let char_ops = vec![ + CharOperation::Keep { bytes: 4 }, + CharOperation::Insert { text: "\n".into() }, + CharOperation::Keep { bytes: 4 }, + ]; + let new_text = apply_char_operations(old_text, &char_ops); + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 2 } + ] + ); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_insert_newline_at_beginning() { + let old_text = "aaaa\nbbbb"; + let char_ops = vec![ + CharOperation::Insert { text: "\n".into() }, + CharOperation::Keep { bytes: 9 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Insert { lines: 1 }, + LineOperation::Keep { lines: 2 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_delete_newline() { + let old_text = "aaaa\nbbbb"; + let char_ops = vec![ + CharOperation::Keep { bytes: 4 }, + CharOperation::Delete { bytes: 1 }, + CharOperation::Keep { bytes: 4 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Delete { lines: 2 }, + LineOperation::Insert { lines: 1 } + ] + ); + + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_insert_multiple_newlines() { + let old_text = "aaaa\nbbbb"; + let char_ops = vec![ + CharOperation::Keep { bytes: 5 }, + CharOperation::Insert { + text: "\n\n".into(), + }, + CharOperation::Keep { bytes: 4 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Keep { lines: 1 }, + LineOperation::Insert { lines: 2 }, + LineOperation::Keep { lines: 1 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_delete_multiple_newlines() { + let old_text = "aaaa\n\n\nbbbb"; + let char_ops = vec![ + CharOperation::Keep { bytes: 5 }, + CharOperation::Delete { bytes: 2 }, + CharOperation::Keep { bytes: 4 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Keep { lines: 1 }, + LineOperation::Delete { lines: 2 }, + LineOperation::Keep { lines: 1 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_complex_scenario() { + let old_text = "line1\nline2\nline3\nline4"; + let char_ops = vec![ + CharOperation::Keep { bytes: 6 }, + CharOperation::Insert { + text: "inserted\n".into(), + }, + CharOperation::Delete { bytes: 6 }, + CharOperation::Keep { bytes: 5 }, + CharOperation::Insert { + text: "\nnewline".into(), + }, + CharOperation::Keep { bytes: 6 }, + ]; + let line_ops = char_ops_to_line_ops(&old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Keep { lines: 1 }, + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 1 }, + LineOperation::Keep { lines: 1 }, + LineOperation::Insert { lines: 1 }, + LineOperation::Keep { lines: 1 } + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!(new_text, "line1\ninserted\nline3\nnewline\nline4"); + assert_eq!( + apply_line_operations(old_text, &new_text, &line_ops), + new_text, + ); + } + + #[test] + fn test_cleaning_up_common_suffix() { + let old_text = concat!( + " for y in 0..size.y() {\n", + " let a = 10;\n", + " let b = 20;\n", + " }", + ); + let char_ops = [ + CharOperation::Keep { bytes: 8 }, + CharOperation::Insert { text: "let".into() }, + CharOperation::Insert { + text: " mut".into(), + }, + CharOperation::Insert { text: " y".into() }, + CharOperation::Insert { text: " =".into() }, + CharOperation::Insert { text: " 0".into() }, + CharOperation::Insert { text: ";".into() }, + CharOperation::Insert { text: "\n".into() }, + CharOperation::Insert { + text: " while".into(), + }, + CharOperation::Insert { text: " y".into() }, + CharOperation::Insert { + text: " < size".into(), + }, + CharOperation::Insert { text: ".".into() }, + CharOperation::Insert { text: "y".into() }, + CharOperation::Insert { text: "()".into() }, + CharOperation::Insert { text: " {".into() }, + CharOperation::Insert { text: "\n".into() }, + CharOperation::Delete { bytes: 23 }, + CharOperation::Keep { bytes: 23 }, + CharOperation::Keep { bytes: 1 }, + CharOperation::Keep { bytes: 23 }, + CharOperation::Keep { bytes: 1 }, + CharOperation::Keep { bytes: 8 }, + CharOperation::Insert { + text: " y".into(), + }, + CharOperation::Insert { text: " +=".into() }, + CharOperation::Insert { text: " 1".into() }, + CharOperation::Insert { text: ";".into() }, + CharOperation::Insert { text: "\n".into() }, + CharOperation::Insert { + text: " ".into(), + }, + CharOperation::Keep { bytes: 1 }, + ]; + let line_ops = char_ops_to_line_ops(old_text, &char_ops); + assert_eq!( + line_ops, + vec![ + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 2 }, + LineOperation::Keep { lines: 2 }, + LineOperation::Delete { lines: 1 }, + LineOperation::Insert { lines: 2 }, + ] + ); + let new_text = apply_char_operations(old_text, &char_ops); + assert_eq!( + new_text, + apply_line_operations(old_text, &new_text, &line_ops) + ); + } + + #[test] + fn test_random_diffs() { + random_test(|mut rng| { + let old_text_len = env::var("OLD_TEXT_LEN") + .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable")) + .unwrap_or(10); + + let old = random_text(&mut rng, old_text_len); + println!("old text: {:?}", old); + + let new = randomly_edit(&old, &mut rng); + println!("new text: {:?}", new); + + let char_operations = random_streaming_diff(&mut rng, &old, &new); + println!("char operations: {:?}", char_operations); + + // Use apply_char_operations to verify the result + let patched = apply_char_operations(&old, &char_operations); + assert_eq!(patched, new); + + // Test char_ops_to_line_ops + let line_ops = char_ops_to_line_ops(&old, &char_operations); + println!("line operations: {:?}", line_ops); + let patched = apply_line_operations(&old, &new, &line_ops); + assert_eq!(patched, new); + }); + } + + fn char_ops_to_line_ops(old_text: &str, char_ops: &[CharOperation]) -> Vec { + let old_rope = Rope::from(old_text); + let mut diff = LineDiff::default(); + for op in char_ops { + diff.push_char_operation(op, &old_rope); + } + diff.finish(&old_rope); + diff.line_operations() + } + + fn random_streaming_diff(rng: &mut impl Rng, old: &str, new: &str) -> Vec { + let mut diff = StreamingDiff::new(old.to_string()); + let mut char_operations = Vec::new(); + let mut new_len = 0; + + while new_len < new.len() { + let mut chunk_len = rng.gen_range(1..=new.len() - new_len); + while !new.is_char_boundary(new_len + chunk_len) { + chunk_len += 1; + } + let chunk = &new[new_len..new_len + chunk_len]; + let new_hunks = diff.push_new(chunk); + char_operations.extend(new_hunks); + new_len += chunk_len; + } + + char_operations.extend(diff.finish()); + char_operations + } + + fn random_test(mut test_fn: F) + where + F: FnMut(StdRng), + { + let iterations = env::var("ITERATIONS") + .map(|i| i.parse().expect("invalid `ITERATIONS` variable")) + .unwrap_or(100); + + let seed: u64 = env::var("SEED") + .map(|s| s.parse().expect("invalid `SEED` variable")) + .unwrap_or(0); + + println!( + "Running test with {} iterations and seed {}", + iterations, seed + ); + + for i in 0..iterations { + println!("Iteration {}", i + 1); + let rng = StdRng::seed_from_u64(seed + i); + test_fn(rng); + } + } + + fn apply_line_operations(old_text: &str, new_text: &str, line_ops: &[LineOperation]) -> String { + let mut result: Vec<&str> = Vec::new(); + + let old_lines: Vec<&str> = old_text.split('\n').collect(); + let new_lines: Vec<&str> = new_text.split('\n').collect(); + let mut old_start = 0_usize; + let mut new_start = 0_usize; + + for op in line_ops { + match op { + LineOperation::Keep { lines } => { + let old_end = old_start + *lines as usize; + result.extend(&old_lines[old_start..old_end]); + old_start = old_end; + new_start += *lines as usize; + } + LineOperation::Delete { lines } => { + old_start += *lines as usize; + } + LineOperation::Insert { lines } => { + let new_end = new_start + *lines as usize; + result.extend(&new_lines[new_start..new_end]); + new_start = new_end; + } + } + } + + result.join("\n") + } + + #[test] + fn test_apply_char_operations() { + let old_text = "Hello, world!"; + let char_ops = vec![ + CharOperation::Keep { bytes: 7 }, + CharOperation::Delete { bytes: 5 }, + CharOperation::Insert { + text: "Rust".to_string(), + }, + CharOperation::Keep { bytes: 1 }, + ]; + let result = apply_char_operations(old_text, &char_ops); + assert_eq!(result, "Hello, Rust!"); + } + + fn random_text(rng: &mut impl Rng, length: usize) -> String { + util::RandomCharIter::new(rng).take(length).collect() + } + + fn randomly_edit(text: &str, rng: &mut impl Rng) -> String { + let mut result = String::from(text); + let edit_count = rng.gen_range(1..=5); + + fn random_char_range(text: &str, rng: &mut impl Rng) -> (usize, usize) { + let mut start = rng.gen_range(0..=text.len()); + while !text.is_char_boundary(start) { + start -= 1; + } + let mut end = rng.gen_range(start..=text.len()); + while !text.is_char_boundary(end) { + end += 1; + } + (start, end) + } + + for _ in 0..edit_count { + match rng.gen_range(0..3) { + 0 => { + // Insert + let (pos, _) = random_char_range(&result, rng); + let insert_len = rng.gen_range(1..=5); + let insert_text: String = random_text(rng, insert_len); + result.insert_str(pos, &insert_text); + } + 1 => { + // Delete + if !result.is_empty() { + let (start, end) = random_char_range(&result, rng); + result.replace_range(start..end, ""); + } + } + 2 => { + // Replace + if !result.is_empty() { + let (start, end) = random_char_range(&result, rng); + let replace_len = end - start; + let replace_text: String = random_text(rng, replace_len); + result.replace_range(start..end, &replace_text); + } + } + _ => unreachable!(), + } + } + + result + } + + fn apply_char_operations(old_text: &str, char_ops: &[CharOperation]) -> String { + let mut result = String::new(); + let mut old_ix = 0; + + for operation in char_ops { + match operation { + CharOperation::Keep { bytes } => { + result.push_str(&old_text[old_ix..old_ix + bytes]); + old_ix += bytes; + } + CharOperation::Delete { bytes } => { + old_ix += bytes; + } + CharOperation::Insert { text } => { + result.push_str(text); + } + } + } + + result } }