correctly handle unicode diffs

This commit is contained in:
Nikita Galaiko 2023-04-21 12:40:39 +02:00
parent 0153f3bc7e
commit 3690b99537
3 changed files with 23 additions and 12 deletions

View File

@ -36,7 +36,7 @@ md5 = "0.7.0"
urlencoding = "2.1.2" urlencoding = "2.1.2"
thiserror = "1.0.38" thiserror = "1.0.38"
tantivy = "0.19.2" tantivy = "0.19.2"
similar = "2.2.1" similar = { version = "2.2.1", features = ["unicode"] }
fslock = "0.2.1" fslock = "0.2.1"
tokio = { version = "1.26.0", features = ["full", "sync"] } tokio = { version = "1.26.0", features = ["full", "sync"] }
tokio-tungstenite = "0.18.0" tokio-tungstenite = "0.18.0"

View File

@ -6,16 +6,16 @@ use similar::{ChangeTag, TextDiff};
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub enum Operation { pub enum Operation {
// corresponds to YText.insert(index, chunk) // corresponds to YText.insert(index, chunk)
Insert((u32, String)), Insert((usize, String)),
// corresponds to YText.remove_range(index, len) // corresponds to YText.remove_range(index, len)
Delete((u32, u32)), Delete((usize, usize)),
} }
impl Operation { impl Operation {
pub fn apply(&self, text: &mut Vec<char>) -> Result<()> { pub fn apply(&self, text: &mut Vec<char>) -> Result<()> {
match self { match self {
Operation::Insert((index, chunk)) => { Operation::Insert((index, chunk)) => {
if *index as usize > text.len() { if *index > text.len() {
Err(anyhow::anyhow!( Err(anyhow::anyhow!(
"Index out of bounds, {} > {}", "Index out of bounds, {} > {}",
index, index,
@ -30,7 +30,7 @@ impl Operation {
} }
} }
Operation::Delete((index, len)) => { Operation::Delete((index, len)) => {
if *index as usize > text.len() { if *index > text.len() {
Err(anyhow::anyhow!( Err(anyhow::anyhow!(
"Index out of bounds, {} > {}", "Index out of bounds, {} > {}",
index, index,
@ -61,7 +61,7 @@ fn merge_touching(ops: &Vec<Operation>) -> Vec<Operation> {
for op in ops { for op in ops {
match (merged.last_mut(), op) { match (merged.last_mut(), op) {
(Some(Operation::Insert((index, chunk))), Operation::Insert((index2, chunk2))) => { (Some(Operation::Insert((index, chunk))), Operation::Insert((index2, chunk2))) => {
if *index + chunk.len() as u32 == *index2 { if *index + chunk.len() == *index2 {
chunk.push_str(chunk2); chunk.push_str(chunk2);
} else { } else {
merged.push(op.clone()); merged.push(op.clone());
@ -86,25 +86,27 @@ pub fn get_delta_operations(initial_text: &str, final_text: &str) -> Vec<Operati
return vec![]; return vec![];
} }
let changeset = TextDiff::configure().diff_chars(initial_text, final_text); let changeset = TextDiff::configure().diff_graphemes(initial_text, final_text);
let mut offset: u32 = 0;
let mut deltas = vec![]; let mut deltas = vec![];
let mut offset = 0;
for change in changeset.iter_all_changes() { for change in changeset.iter_all_changes() {
println!("{:?}", change);
match change.tag() { match change.tag() {
ChangeTag::Delete => { ChangeTag::Delete => {
deltas.push(Operation::Delete(( deltas.push(Operation::Delete((
offset, offset,
change.as_str().unwrap_or("").len() as u32, change.as_str().unwrap_or("").chars().count(),
))); )));
} }
ChangeTag::Insert => { ChangeTag::Insert => {
let text = change.as_str().unwrap_or(""); let text = change.as_str().unwrap();
deltas.push(Operation::Insert((offset, text.to_string()))); deltas.push(Operation::Insert((offset, text.to_string())));
offset += text.len() as u32; offset = change.new_index().unwrap() + text.chars().count()
} }
ChangeTag::Equal => { ChangeTag::Equal => {
offset += change.as_str().unwrap_or("").len() as u32; let text = change.as_str().unwrap();
offset = change.new_index().unwrap() + text.chars().count()
} }
} }
} }

View File

@ -188,3 +188,12 @@ fn test_multiline_remove() {
Operation::Delete((0, 5)) Operation::Delete((0, 5))
); );
} }
#[test]
fn test_unicode() {
let latest = Some("");
let current = "_";
let mut document = TextDocument::new(latest, vec![]).unwrap();
document.update(current).unwrap();
assert_eq!(document.to_string(), "_");
}