Improve streaming git diffs to be less jumpy during inline assistant generation (#15853)

Release Notes:

- Improved streaming git diffs to be less jumpy during inline assistant
generation

---------

Co-authored-by: Antonio Scandurra <antonio@zed.dev>
This commit is contained in:
Kirill Bulatov 2024-08-06 16:13:29 +03:00 committed by GitHub
parent a054a2a9a3
commit be514f23e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 950 additions and 194 deletions

1
Cargo.lock generated
View File

@ -420,7 +420,6 @@ dependencies = [
"serde_json",
"serde_json_lenient",
"settings",
"similar",
"smol",
"telemetry_events",
"terminal",

View File

@ -65,7 +65,6 @@ semantic_index.workspace = true
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
similar.workspace = true
smol.workspace = true
telemetry_events.workspace = true
terminal.workspace = true

View File

@ -1,6 +1,6 @@
use crate::{
humanize_token_count, prompts::generate_content_prompt, AssistantPanel, AssistantPanelEvent,
Hunk, ModelSelector, StreamingDiff,
CharOperation, LineDiff, LineOperation, ModelSelector, StreamingDiff,
};
use anyhow::{anyhow, Context as _, Result};
use client::{telemetry::Telemetry, ErrorExt};
@ -35,7 +35,6 @@ use multi_buffer::MultiBufferRow;
use parking_lot::Mutex;
use rope::Rope;
use settings::Settings;
use similar::TextDiff;
use smol::future::FutureExt;
use std::{
cmp,
@ -2033,8 +2032,6 @@ pub enum CodegenStatus {
#[derive(Default)]
struct Diff {
task: Option<Task<()>>,
should_update: bool,
deleted_row_ranges: Vec<(Anchor, RangeInclusive<u32>)>,
inserted_row_ranges: Vec<RangeInclusive<Anchor>>,
}
@ -2264,7 +2261,7 @@ impl Codegen {
async move {
let chunks = stream.await;
let generate = async {
let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
let (mut diff_tx, mut diff_rx) = mpsc::channel(1);
let diff: Task<anyhow::Result<()>> =
cx.background_executor().spawn(async move {
let mut response_latency = None;
@ -2273,6 +2270,7 @@ impl Codegen {
let chunks = StripInvalidSpans::new(chunks?);
futures::pin_mut!(chunks);
let mut diff = StreamingDiff::new(selected_text.to_string());
let mut line_diff = LineDiff::default();
let mut new_text = String::new();
let mut base_indent = None;
@ -2323,12 +2321,22 @@ impl Codegen {
}
if line_indent.is_some() {
hunks_tx.send(diff.push_new(&new_text)).await?;
let char_ops = diff.push_new(&new_text);
line_diff
.push_char_operations(&char_ops, &selected_text);
diff_tx
.send((char_ops, line_diff.line_operations()))
.await?;
new_text.clear();
}
if lines.peek().is_some() {
hunks_tx.send(diff.push_new("\n")).await?;
let char_ops = diff.push_new("\n");
line_diff
.push_char_operations(&char_ops, &selected_text);
diff_tx
.send((char_ops, line_diff.line_operations()))
.await?;
if line_indent.is_none() {
// Don't write out the leading indentation in empty lines on the next line
// This is the case where the above if statement didn't clear the buffer
@ -2339,8 +2347,14 @@ impl Codegen {
}
}
}
hunks_tx.send(diff.push_new(&new_text)).await?;
hunks_tx.send(diff.finish()).await?;
let mut char_ops = diff.push_new(&new_text);
char_ops.extend(diff.finish());
line_diff.push_char_operations(&char_ops, &selected_text);
line_diff.finish(&selected_text);
diff_tx
.send((char_ops, line_diff.line_operations()))
.await?;
anyhow::Ok(())
};
@ -2363,7 +2377,7 @@ impl Codegen {
Ok(())
});
while let Some(hunks) = hunks_rx.next().await {
while let Some((char_ops, line_diff)) = diff_rx.next().await {
this.update(&mut cx, |this, cx| {
this.last_equal_ranges.clear();
@ -2373,27 +2387,29 @@ impl Codegen {
buffer.start_transaction(cx);
buffer.edit(
hunks.into_iter().filter_map(|hunk| match hunk {
Hunk::Insert { text } => {
let edit_start = snapshot.anchor_after(edit_start);
Some((edit_start..edit_start, text))
}
Hunk::Remove { len } => {
let edit_end = edit_start + len;
let edit_range = snapshot.anchor_after(edit_start)
..snapshot.anchor_before(edit_end);
edit_start = edit_end;
Some((edit_range, String::new()))
}
Hunk::Keep { len } => {
let edit_end = edit_start + len;
let edit_range = snapshot.anchor_after(edit_start)
..snapshot.anchor_before(edit_end);
edit_start = edit_end;
this.last_equal_ranges.push(edit_range);
None
}
}),
char_ops
.into_iter()
.filter_map(|operation| match operation {
CharOperation::Insert { text } => {
let edit_start = snapshot.anchor_after(edit_start);
Some((edit_start..edit_start, text))
}
CharOperation::Delete { bytes } => {
let edit_end = edit_start + bytes;
let edit_range = snapshot.anchor_after(edit_start)
..snapshot.anchor_before(edit_end);
edit_start = edit_end;
Some((edit_range, String::new()))
}
CharOperation::Keep { bytes } => {
let edit_end = edit_start + bytes;
let edit_range = snapshot.anchor_after(edit_start)
..snapshot.anchor_before(edit_end);
edit_start = edit_end;
this.last_equal_ranges.push(edit_range);
None
}
}),
None,
cx,
);
@ -2421,7 +2437,8 @@ impl Codegen {
}
}
this.update_diff(edit_range.clone(), cx);
this.update_diff(edit_range.clone(), line_diff, cx);
cx.notify();
})?;
}
@ -2468,102 +2485,63 @@ impl Codegen {
});
}
fn update_diff(&mut self, edit_range: Range<Anchor>, cx: &mut ModelContext<Self>) {
if self.diff.task.is_some() {
self.diff.should_update = true;
} else {
self.diff.should_update = false;
fn update_diff(
&mut self,
edit_range: Range<Anchor>,
line_operations: Vec<LineOperation>,
cx: &mut ModelContext<Self>,
) {
let old_snapshot = self.snapshot.clone();
let old_range = edit_range.to_point(&old_snapshot);
let new_snapshot = self.buffer.read(cx).snapshot(cx);
let new_range = edit_range.to_point(&new_snapshot);
let old_snapshot = self.snapshot.clone();
let old_range = edit_range.to_point(&old_snapshot);
let new_snapshot = self.buffer.read(cx).snapshot(cx);
let new_range = edit_range.to_point(&new_snapshot);
let mut old_row = old_range.start.row;
let mut new_row = new_range.start.row;
self.diff.task = Some(cx.spawn(|this, mut cx| async move {
let (deleted_row_ranges, inserted_row_ranges) = cx
.background_executor()
.spawn(async move {
let old_text = old_snapshot
.text_for_range(
Point::new(old_range.start.row, 0)
..Point::new(
old_range.end.row,
old_snapshot.line_len(MultiBufferRow(old_range.end.row)),
),
)
.collect::<String>();
let new_text = new_snapshot
.text_for_range(
Point::new(new_range.start.row, 0)
..Point::new(
new_range.end.row,
new_snapshot.line_len(MultiBufferRow(new_range.end.row)),
),
)
.collect::<String>();
self.diff.deleted_row_ranges.clear();
self.diff.inserted_row_ranges.clear();
for operation in line_operations {
match operation {
LineOperation::Keep { lines } => {
old_row += lines;
new_row += lines;
}
LineOperation::Delete { lines } => {
let old_end_row = old_row + lines - 1;
let new_row = new_snapshot.anchor_before(Point::new(new_row, 0));
let mut old_row = old_range.start.row;
let mut new_row = new_range.start.row;
let diff = TextDiff::from_lines(old_text.as_str(), new_text.as_str());
let mut deleted_row_ranges: Vec<(Anchor, RangeInclusive<u32>)> = Vec::new();
let mut inserted_row_ranges = Vec::new();
for change in diff.iter_all_changes() {
let line_count = change.value().lines().count() as u32;
match change.tag() {
similar::ChangeTag::Equal => {
old_row += line_count;
new_row += line_count;
}
similar::ChangeTag::Delete => {
let old_end_row = old_row + line_count - 1;
let new_row =
new_snapshot.anchor_before(Point::new(new_row, 0));
if let Some((_, last_deleted_row_range)) =
deleted_row_ranges.last_mut()
{
if *last_deleted_row_range.end() + 1 == old_row {
*last_deleted_row_range =
*last_deleted_row_range.start()..=old_end_row;
} else {
deleted_row_ranges
.push((new_row, old_row..=old_end_row));
}
} else {
deleted_row_ranges.push((new_row, old_row..=old_end_row));
}
old_row += line_count;
}
similar::ChangeTag::Insert => {
let new_end_row = new_row + line_count - 1;
let start = new_snapshot.anchor_before(Point::new(new_row, 0));
let end = new_snapshot.anchor_before(Point::new(
new_end_row,
new_snapshot.line_len(MultiBufferRow(new_end_row)),
));
inserted_row_ranges.push(start..=end);
new_row += line_count;
}
}
if let Some((_, last_deleted_row_range)) =
self.diff.deleted_row_ranges.last_mut()
{
if *last_deleted_row_range.end() + 1 == old_row {
*last_deleted_row_range = *last_deleted_row_range.start()..=old_end_row;
} else {
self.diff
.deleted_row_ranges
.push((new_row, old_row..=old_end_row));
}
(deleted_row_ranges, inserted_row_ranges)
})
.await;
this.update(&mut cx, |this, cx| {
this.diff.deleted_row_ranges = deleted_row_ranges;
this.diff.inserted_row_ranges = inserted_row_ranges;
this.diff.task = None;
if this.diff.should_update {
this.update_diff(edit_range, cx);
} else {
self.diff
.deleted_row_ranges
.push((new_row, old_row..=old_end_row));
}
cx.notify();
})
.ok();
}));
old_row += lines;
}
LineOperation::Insert { lines } => {
let new_end_row = new_row + lines - 1;
let start = new_snapshot.anchor_before(Point::new(new_row, 0));
let end = new_snapshot.anchor_before(Point::new(
new_end_row,
new_snapshot.line_len(MultiBufferRow(new_end_row)),
));
self.diff.inserted_row_ranges.push(start..=end);
new_row += lines;
}
}
cx.notify();
}
}
}

View File

@ -1,5 +1,6 @@
use collections::HashMap;
use ordered_float::OrderedFloat;
use rope::{Point, Rope, TextSummary};
use std::collections::{BTreeSet, HashMap};
use std::{
cmp,
fmt::{self, Debug},
@ -64,11 +65,11 @@ impl Debug for Matrix {
}
}
#[derive(Debug)]
pub enum Hunk {
#[derive(Debug, Clone)]
pub enum CharOperation {
Insert { text: String },
Remove { len: usize },
Keep { len: usize },
Delete { bytes: usize },
Keep { bytes: usize },
}
pub struct StreamingDiff {
@ -103,7 +104,7 @@ impl StreamingDiff {
}
}
pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
pub fn push_new(&mut self, text: &str) -> Vec<CharOperation> {
self.new.extend(text.chars());
self.scores.resize(self.old.len() + 1, self.new.len() + 1);
@ -145,7 +146,7 @@ impl StreamingDiff {
hunks
}
fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<CharOperation> {
let mut pending_insert: Option<Range<usize>> = None;
let mut hunks = Vec::new();
let mut i = old_text_ix;
@ -185,22 +186,22 @@ impl StreamingDiff {
}
} else {
if let Some(range) = pending_insert.take() {
hunks.push(Hunk::Insert {
hunks.push(CharOperation::Insert {
text: self.new[range].iter().collect(),
});
}
let char_len = self.old[i - 1].len_utf8();
if prev_i == i - 1 && prev_j == j {
if let Some(Hunk::Remove { len }) = hunks.last_mut() {
if let Some(CharOperation::Delete { bytes: len }) = hunks.last_mut() {
*len += char_len;
} else {
hunks.push(Hunk::Remove { len: char_len })
hunks.push(CharOperation::Delete { bytes: char_len })
}
} else if let Some(Hunk::Keep { len }) = hunks.last_mut() {
} else if let Some(CharOperation::Keep { bytes: len }) = hunks.last_mut() {
*len += char_len;
} else {
hunks.push(Hunk::Keep { len: char_len })
hunks.push(CharOperation::Keep { bytes: char_len })
}
}
@ -209,7 +210,7 @@ impl StreamingDiff {
}
if let Some(range) = pending_insert.take() {
hunks.push(Hunk::Insert {
hunks.push(CharOperation::Insert {
text: self.new[range].iter().collect(),
});
}
@ -218,74 +219,853 @@ impl StreamingDiff {
hunks
}
pub fn finish(self) -> Vec<Hunk> {
pub fn finish(self) -> Vec<CharOperation> {
self.backtrack(self.old.len(), self.new.len())
}
}
#[cfg(test)]
mod tests {
use std::env;
#[derive(Debug, Clone, PartialEq)]
pub enum LineOperation {
Insert { lines: u32 },
Delete { lines: u32 },
Keep { lines: u32 },
}
use super::*;
use rand::prelude::*;
#[derive(Debug, Default)]
pub struct LineDiff {
inserted_newline_at_end: bool,
/// The extent of kept and deleted text.
old_end: Point,
/// The extent of kept and inserted text.
new_end: Point,
/// Deleted rows, expressed in terms of the old text.
deleted_rows: BTreeSet<u32>,
/// Inserted rows, expressed in terms of the new text.
inserted_rows: BTreeSet<u32>,
buffered_insert: String,
/// After deleting a newline, we buffer deletion until we keep or insert a character.
buffered_delete: usize,
}
#[gpui::test(iterations = 100)]
fn test_random_diffs(mut rng: StdRng) {
let old_text_len = env::var("OLD_TEXT_LEN")
.map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
.unwrap_or(10);
let new_text_len = env::var("NEW_TEXT_LEN")
.map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable"))
.unwrap_or(10);
let old = util::RandomCharIter::new(&mut rng)
.take(old_text_len)
.collect::<String>();
log::info!("old text: {:?}", old);
let mut diff = StreamingDiff::new(old.clone());
let mut hunks = Vec::new();
let mut new_len = 0;
let mut new = String::new();
while new_len < new_text_len {
let new_chunk_len = rng.gen_range(1..=new_text_len - new_len);
let new_chunk = util::RandomCharIter::new(&mut rng)
.take(new_len)
.collect::<String>();
log::info!("new chunk: {:?}", new_chunk);
new_len += new_chunk_len;
new.push_str(&new_chunk);
let new_hunks = diff.push_new(&new_chunk);
log::info!("hunks: {:?}", new_hunks);
hunks.extend(new_hunks);
impl LineDiff {
pub fn push_char_operations<'a>(
&mut self,
operations: impl IntoIterator<Item = &'a CharOperation>,
old_text: &Rope,
) {
for operation in operations {
self.push_char_operation(operation, old_text);
}
let final_hunks = diff.finish();
log::info!("final hunks: {:?}", final_hunks);
hunks.extend(final_hunks);
}
log::info!("new text: {:?}", new);
let mut old_ix = 0;
let mut new_ix = 0;
let mut patched = String::new();
for hunk in hunks {
match hunk {
Hunk::Keep { len } => {
assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]);
patched.push_str(&old[old_ix..old_ix + len]);
old_ix += len;
new_ix += len;
pub fn push_char_operation(&mut self, operation: &CharOperation, old_text: &Rope) {
match operation {
CharOperation::Insert { text } => {
self.flush_delete(old_text);
if is_line_start(self.old_end) {
if let Some(newline_ix) = text.rfind('\n') {
let (prefix, suffix) = text.split_at(newline_ix + 1);
self.buffered_insert.push_str(prefix);
self.flush_insert(old_text);
self.buffered_insert.push_str(suffix);
} else {
self.buffered_insert.push_str(&text);
}
} else {
self.buffered_insert.push_str(&text);
if !text.ends_with('\n') {
self.flush_insert(old_text);
}
}
Hunk::Remove { len } => {
old_ix += len;
}
CharOperation::Delete { bytes } => {
self.buffered_delete += bytes;
let common_suffix_len = self.trim_buffered_end(old_text);
self.flush_insert(old_text);
if common_suffix_len > 0 || !is_line_end(self.old_end, old_text) {
self.flush_delete(old_text);
self.keep(common_suffix_len, old_text);
}
Hunk::Insert { text } => {
assert_eq!(text, &new[new_ix..new_ix + text.len()]);
patched.push_str(&text);
new_ix += text.len();
}
CharOperation::Keep { bytes } => {
self.flush_delete(old_text);
self.flush_insert(old_text);
self.keep(*bytes, old_text);
}
}
}
fn flush_insert(&mut self, old_text: &Rope) {
if self.buffered_insert.is_empty() {
return;
}
let new_start = self.new_end;
let lines = TextSummary::from(self.buffered_insert.as_str()).lines;
self.new_end += lines;
if is_line_start(self.old_end) {
if self.new_end.column == 0 {
self.inserted_rows.extend(new_start.row..self.new_end.row);
} else {
self.deleted_rows.insert(self.old_end.row);
self.inserted_rows.extend(new_start.row..=self.new_end.row);
}
} else if is_line_end(self.old_end, old_text) {
if self.buffered_insert.starts_with('\n') {
self.inserted_rows
.extend(new_start.row + 1..=self.new_end.row);
self.inserted_newline_at_end = true;
} else {
if !self.inserted_newline_at_end {
self.deleted_rows.insert(self.old_end.row);
}
self.inserted_rows.extend(new_start.row..=self.new_end.row);
}
} else {
self.deleted_rows.insert(self.old_end.row);
self.inserted_rows.extend(new_start.row..=self.new_end.row);
}
self.buffered_insert.clear();
}
fn flush_delete(&mut self, old_text: &Rope) {
if self.buffered_delete == 0 {
return;
}
let old_start = self.old_end;
self.old_end =
old_text.offset_to_point(old_text.point_to_offset(self.old_end) + self.buffered_delete);
if is_line_end(old_start, old_text) && is_line_end(self.old_end, old_text) {
self.deleted_rows
.extend(old_start.row + 1..=self.old_end.row);
} else if is_line_start(old_start)
&& (is_line_start(self.old_end) && self.old_end < old_text.max_point())
&& self.new_end.column == 0
{
self.deleted_rows.extend(old_start.row..self.old_end.row);
} else {
self.inserted_rows.insert(self.new_end.row);
self.deleted_rows.extend(old_start.row..=self.old_end.row);
}
self.inserted_newline_at_end = false;
self.buffered_delete = 0;
}
fn keep(&mut self, bytes: usize, old_text: &Rope) {
if bytes == 0 {
return;
}
let lines =
old_text.offset_to_point(old_text.point_to_offset(self.old_end) + bytes) - self.old_end;
self.old_end += lines;
self.new_end += lines;
self.inserted_newline_at_end = false;
}
fn trim_buffered_end(&mut self, old_text: &Rope) -> usize {
let old_start_offset = old_text.point_to_offset(self.old_end);
let old_end_offset = old_start_offset + self.buffered_delete;
let new_chars = self.buffered_insert.chars().rev();
let old_chars = old_text
.chunks_in_range(old_start_offset..old_end_offset)
.flat_map(|chunk| chunk.chars().rev());
let mut common_suffix_len = 0;
for (new_ch, old_ch) in new_chars.zip(old_chars) {
if new_ch == old_ch {
common_suffix_len += new_ch.len_utf8();
} else {
break;
}
}
self.buffered_delete -= common_suffix_len;
self.buffered_insert
.truncate(self.buffered_insert.len() - common_suffix_len);
common_suffix_len
}
pub fn finish(&mut self, old_text: &Rope) {
self.flush_insert(old_text);
self.flush_delete(old_text);
let old_start = self.old_end;
self.old_end = old_text.max_point();
self.new_end += self.old_end - old_start;
}
pub fn line_operations(&self) -> Vec<LineOperation> {
let mut ops = Vec::new();
let mut deleted_rows = self.deleted_rows.iter().copied().peekable();
let mut inserted_rows = self.inserted_rows.iter().copied().peekable();
let mut old_row = 0;
let mut new_row = 0;
while deleted_rows.peek().is_some() || inserted_rows.peek().is_some() {
// Check for a run of deleted lines at current old row.
if Some(old_row) == deleted_rows.peek().copied() {
if let Some(LineOperation::Delete { lines }) = ops.last_mut() {
*lines += 1;
} else {
ops.push(LineOperation::Delete { lines: 1 });
}
old_row += 1;
deleted_rows.next();
} else if Some(new_row) == inserted_rows.peek().copied() {
if let Some(LineOperation::Insert { lines }) = ops.last_mut() {
*lines += 1;
} else {
ops.push(LineOperation::Insert { lines: 1 });
}
new_row += 1;
inserted_rows.next();
} else {
// Keep lines until the next deletion, insertion, or the end of the old text.
let lines_to_next_deletion = inserted_rows
.peek()
.copied()
.unwrap_or(self.new_end.row + 1)
- new_row;
let lines_to_next_insertion =
deleted_rows.peek().copied().unwrap_or(self.old_end.row + 1) - old_row;
let kept_lines =
cmp::max(1, cmp::min(lines_to_next_insertion, lines_to_next_deletion));
if kept_lines > 0 {
ops.push(LineOperation::Keep { lines: kept_lines });
old_row += kept_lines;
new_row += kept_lines;
}
}
}
assert_eq!(patched, new);
if old_row < self.old_end.row + 1 {
ops.push(LineOperation::Keep {
lines: self.old_end.row + 1 - old_row,
});
}
ops
}
}
fn is_line_start(point: Point) -> bool {
point.column == 0
}
fn is_line_end(point: Point, text: &Rope) -> bool {
text.line_len(point.row) == point.column
}
#[cfg(test)]
mod tests {
use super::*;
use rand::prelude::*;
use std::env;
#[test]
fn test_delete_first_of_two_lines() {
let old_text = "aaaa\nbbbb";
let char_ops = vec![
CharOperation::Delete { bytes: 5 },
CharOperation::Keep { bytes: 4 },
];
let expected_line_ops = vec![
LineOperation::Delete { lines: 1 },
LineOperation::Keep { lines: 1 },
];
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &expected_line_ops)
);
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(line_ops, expected_line_ops);
}
#[test]
fn test_delete_second_of_two_lines() {
let old_text = "aaaa\nbbbb";
let char_ops = vec![
CharOperation::Keep { bytes: 5 },
CharOperation::Delete { bytes: 4 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Keep { lines: 1 },
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_add_new_line() {
let old_text = "aaaa\nbbbb";
let char_ops = vec![
CharOperation::Keep { bytes: 9 },
CharOperation::Insert {
text: "\ncccc".into(),
},
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Keep { lines: 2 },
LineOperation::Insert { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_delete_line_in_middle() {
let old_text = "aaaa\nbbbb\ncccc";
let char_ops = vec![
CharOperation::Keep { bytes: 5 },
CharOperation::Delete { bytes: 5 },
CharOperation::Keep { bytes: 4 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Keep { lines: 1 },
LineOperation::Delete { lines: 1 },
LineOperation::Keep { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_replace_line() {
let old_text = "aaaa\nbbbb\ncccc";
let char_ops = vec![
CharOperation::Keep { bytes: 5 },
CharOperation::Delete { bytes: 4 },
CharOperation::Insert {
text: "BBBB".into(),
},
CharOperation::Keep { bytes: 5 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Keep { lines: 1 },
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 1 },
LineOperation::Keep { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_multiple_edits_on_different_lines() {
let old_text = "aaaa\nbbbb\ncccc\ndddd";
let char_ops = vec![
CharOperation::Insert { text: "A".into() },
CharOperation::Keep { bytes: 9 },
CharOperation::Delete { bytes: 5 },
CharOperation::Keep { bytes: 4 },
CharOperation::Insert {
text: "\nEEEE".into(),
},
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 1 },
LineOperation::Keep { lines: 1 },
LineOperation::Delete { lines: 2 },
LineOperation::Insert { lines: 2 },
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_edit_at_end_of_line() {
let old_text = "aaaa\nbbbb\ncccc";
let char_ops = vec![
CharOperation::Keep { bytes: 4 },
CharOperation::Insert { text: "A".into() },
CharOperation::Keep { bytes: 10 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 1 },
LineOperation::Keep { lines: 2 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_insert_newline_character() {
let old_text = "aaaabbbb";
let char_ops = vec![
CharOperation::Keep { bytes: 4 },
CharOperation::Insert { text: "\n".into() },
CharOperation::Keep { bytes: 4 },
];
let new_text = apply_char_operations(old_text, &char_ops);
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 2 }
]
);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_insert_newline_at_beginning() {
let old_text = "aaaa\nbbbb";
let char_ops = vec![
CharOperation::Insert { text: "\n".into() },
CharOperation::Keep { bytes: 9 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Insert { lines: 1 },
LineOperation::Keep { lines: 2 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_delete_newline() {
let old_text = "aaaa\nbbbb";
let char_ops = vec![
CharOperation::Keep { bytes: 4 },
CharOperation::Delete { bytes: 1 },
CharOperation::Keep { bytes: 4 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Delete { lines: 2 },
LineOperation::Insert { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_insert_multiple_newlines() {
let old_text = "aaaa\nbbbb";
let char_ops = vec![
CharOperation::Keep { bytes: 5 },
CharOperation::Insert {
text: "\n\n".into(),
},
CharOperation::Keep { bytes: 4 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Keep { lines: 1 },
LineOperation::Insert { lines: 2 },
LineOperation::Keep { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_delete_multiple_newlines() {
let old_text = "aaaa\n\n\nbbbb";
let char_ops = vec![
CharOperation::Keep { bytes: 5 },
CharOperation::Delete { bytes: 2 },
CharOperation::Keep { bytes: 4 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Keep { lines: 1 },
LineOperation::Delete { lines: 2 },
LineOperation::Keep { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_complex_scenario() {
let old_text = "line1\nline2\nline3\nline4";
let char_ops = vec![
CharOperation::Keep { bytes: 6 },
CharOperation::Insert {
text: "inserted\n".into(),
},
CharOperation::Delete { bytes: 6 },
CharOperation::Keep { bytes: 5 },
CharOperation::Insert {
text: "\nnewline".into(),
},
CharOperation::Keep { bytes: 6 },
];
let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Keep { lines: 1 },
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 1 },
LineOperation::Keep { lines: 1 },
LineOperation::Insert { lines: 1 },
LineOperation::Keep { lines: 1 }
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(new_text, "line1\ninserted\nline3\nnewline\nline4");
assert_eq!(
apply_line_operations(old_text, &new_text, &line_ops),
new_text,
);
}
#[test]
fn test_cleaning_up_common_suffix() {
let old_text = concat!(
" for y in 0..size.y() {\n",
" let a = 10;\n",
" let b = 20;\n",
" }",
);
let char_ops = [
CharOperation::Keep { bytes: 8 },
CharOperation::Insert { text: "let".into() },
CharOperation::Insert {
text: " mut".into(),
},
CharOperation::Insert { text: " y".into() },
CharOperation::Insert { text: " =".into() },
CharOperation::Insert { text: " 0".into() },
CharOperation::Insert { text: ";".into() },
CharOperation::Insert { text: "\n".into() },
CharOperation::Insert {
text: " while".into(),
},
CharOperation::Insert { text: " y".into() },
CharOperation::Insert {
text: " < size".into(),
},
CharOperation::Insert { text: ".".into() },
CharOperation::Insert { text: "y".into() },
CharOperation::Insert { text: "()".into() },
CharOperation::Insert { text: " {".into() },
CharOperation::Insert { text: "\n".into() },
CharOperation::Delete { bytes: 23 },
CharOperation::Keep { bytes: 23 },
CharOperation::Keep { bytes: 1 },
CharOperation::Keep { bytes: 23 },
CharOperation::Keep { bytes: 1 },
CharOperation::Keep { bytes: 8 },
CharOperation::Insert {
text: " y".into(),
},
CharOperation::Insert { text: " +=".into() },
CharOperation::Insert { text: " 1".into() },
CharOperation::Insert { text: ";".into() },
CharOperation::Insert { text: "\n".into() },
CharOperation::Insert {
text: " ".into(),
},
CharOperation::Keep { bytes: 1 },
];
let line_ops = char_ops_to_line_ops(old_text, &char_ops);
assert_eq!(
line_ops,
vec![
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 2 },
LineOperation::Keep { lines: 2 },
LineOperation::Delete { lines: 1 },
LineOperation::Insert { lines: 2 },
]
);
let new_text = apply_char_operations(old_text, &char_ops);
assert_eq!(
new_text,
apply_line_operations(old_text, &new_text, &line_ops)
);
}
#[test]
fn test_random_diffs() {
random_test(|mut rng| {
let old_text_len = env::var("OLD_TEXT_LEN")
.map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
.unwrap_or(10);
let old = random_text(&mut rng, old_text_len);
println!("old text: {:?}", old);
let new = randomly_edit(&old, &mut rng);
println!("new text: {:?}", new);
let char_operations = random_streaming_diff(&mut rng, &old, &new);
println!("char operations: {:?}", char_operations);
// Use apply_char_operations to verify the result
let patched = apply_char_operations(&old, &char_operations);
assert_eq!(patched, new);
// Test char_ops_to_line_ops
let line_ops = char_ops_to_line_ops(&old, &char_operations);
println!("line operations: {:?}", line_ops);
let patched = apply_line_operations(&old, &new, &line_ops);
assert_eq!(patched, new);
});
}
fn char_ops_to_line_ops(old_text: &str, char_ops: &[CharOperation]) -> Vec<LineOperation> {
let old_rope = Rope::from(old_text);
let mut diff = LineDiff::default();
for op in char_ops {
diff.push_char_operation(op, &old_rope);
}
diff.finish(&old_rope);
diff.line_operations()
}
fn random_streaming_diff(rng: &mut impl Rng, old: &str, new: &str) -> Vec<CharOperation> {
let mut diff = StreamingDiff::new(old.to_string());
let mut char_operations = Vec::new();
let mut new_len = 0;
while new_len < new.len() {
let mut chunk_len = rng.gen_range(1..=new.len() - new_len);
while !new.is_char_boundary(new_len + chunk_len) {
chunk_len += 1;
}
let chunk = &new[new_len..new_len + chunk_len];
let new_hunks = diff.push_new(chunk);
char_operations.extend(new_hunks);
new_len += chunk_len;
}
char_operations.extend(diff.finish());
char_operations
}
fn random_test<F>(mut test_fn: F)
where
F: FnMut(StdRng),
{
let iterations = env::var("ITERATIONS")
.map(|i| i.parse().expect("invalid `ITERATIONS` variable"))
.unwrap_or(100);
let seed: u64 = env::var("SEED")
.map(|s| s.parse().expect("invalid `SEED` variable"))
.unwrap_or(0);
println!(
"Running test with {} iterations and seed {}",
iterations, seed
);
for i in 0..iterations {
println!("Iteration {}", i + 1);
let rng = StdRng::seed_from_u64(seed + i);
test_fn(rng);
}
}
fn apply_line_operations(old_text: &str, new_text: &str, line_ops: &[LineOperation]) -> String {
let mut result: Vec<&str> = Vec::new();
let old_lines: Vec<&str> = old_text.split('\n').collect();
let new_lines: Vec<&str> = new_text.split('\n').collect();
let mut old_start = 0_usize;
let mut new_start = 0_usize;
for op in line_ops {
match op {
LineOperation::Keep { lines } => {
let old_end = old_start + *lines as usize;
result.extend(&old_lines[old_start..old_end]);
old_start = old_end;
new_start += *lines as usize;
}
LineOperation::Delete { lines } => {
old_start += *lines as usize;
}
LineOperation::Insert { lines } => {
let new_end = new_start + *lines as usize;
result.extend(&new_lines[new_start..new_end]);
new_start = new_end;
}
}
}
result.join("\n")
}
#[test]
fn test_apply_char_operations() {
let old_text = "Hello, world!";
let char_ops = vec![
CharOperation::Keep { bytes: 7 },
CharOperation::Delete { bytes: 5 },
CharOperation::Insert {
text: "Rust".to_string(),
},
CharOperation::Keep { bytes: 1 },
];
let result = apply_char_operations(old_text, &char_ops);
assert_eq!(result, "Hello, Rust!");
}
fn random_text(rng: &mut impl Rng, length: usize) -> String {
util::RandomCharIter::new(rng).take(length).collect()
}
fn randomly_edit(text: &str, rng: &mut impl Rng) -> String {
let mut result = String::from(text);
let edit_count = rng.gen_range(1..=5);
fn random_char_range(text: &str, rng: &mut impl Rng) -> (usize, usize) {
let mut start = rng.gen_range(0..=text.len());
while !text.is_char_boundary(start) {
start -= 1;
}
let mut end = rng.gen_range(start..=text.len());
while !text.is_char_boundary(end) {
end += 1;
}
(start, end)
}
for _ in 0..edit_count {
match rng.gen_range(0..3) {
0 => {
// Insert
let (pos, _) = random_char_range(&result, rng);
let insert_len = rng.gen_range(1..=5);
let insert_text: String = random_text(rng, insert_len);
result.insert_str(pos, &insert_text);
}
1 => {
// Delete
if !result.is_empty() {
let (start, end) = random_char_range(&result, rng);
result.replace_range(start..end, "");
}
}
2 => {
// Replace
if !result.is_empty() {
let (start, end) = random_char_range(&result, rng);
let replace_len = end - start;
let replace_text: String = random_text(rng, replace_len);
result.replace_range(start..end, &replace_text);
}
}
_ => unreachable!(),
}
}
result
}
fn apply_char_operations(old_text: &str, char_ops: &[CharOperation]) -> String {
let mut result = String::new();
let mut old_ix = 0;
for operation in char_ops {
match operation {
CharOperation::Keep { bytes } => {
result.push_str(&old_text[old_ix..old_ix + bytes]);
old_ix += bytes;
}
CharOperation::Delete { bytes } => {
old_ix += bytes;
}
CharOperation::Insert { text } => {
result.push_str(text);
}
}
}
result
}
}