From fdddbfc1796f5f1a002c97ab140684dbfb117ce5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=8F=E7=99=BD?= <364772080@qq.com> Date: Thu, 11 Apr 2024 03:01:25 +0800 Subject: [PATCH] Fix caret movement issue for some special characters (#10198) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently in Zed, certain characters require pressing the key twice to move the caret through that character. For example: "❤️" and "y̆". The reason for this is as follows: Currently, Zed uses `chars` to distinguish different characters, and calling `chars` on `y̆` will yield two `char` values: `y` and `\u{306}`, and calling `chars` on `❤️` will yield two `char` values: `❤` and `\u{fe0f}`. Therefore, consider the following scenario (where ^ represents the caret): - what we see: ❤️ ^ - the actual buffer: ❤ \u{fe0f} ^ After pressing the left arrow key once: - what we see: ❤️ ^ - the actual buffer: ❤ ^ \u{fe0f} After pressing the left arrow key again: - what we see: ^ ❤️ - the actual buffer: ^ ❤ \u{fe0f} Thus, two left arrow key presses are needed to move the caret, and this PR fixes this bug (or this is actually a feature?). I have tried to keep the scope of code modifications as minimal as possible. In this PR, Zed handles such characters as follows: - what we see: ❤️ ^ - the actual buffer: ❤ \u{fe0f} ^ After pressing the left arrow key once: - what we see: ^ ❤️ - the actual buffer: ^ ❤ \u{fe0f} Or after pressing the delete key: - what we see: ^ - the actual buffer: ^ Please note that currently, different platforms and software handle these special characters differently, and even the same software may handle these characters differently in different situations. For example, in my testing on Chrome on macOS, GitHub treats `y̆` as a single character, just like in this PR; however, in Rust Playground, `y̆` is treated as two characters, and pressing the delete key does not delete the entire `y̆` character, but instead deletes `\u{306}` to yield the character `y`. And they both treat `❤️` as a single character, pressing the delete key will delete the entire `❤️` character. This PR is based on the principle of making changes with the smallest impact on the code, and I think that deleting the entire character with the delete key is more intuitive. Release Notes: - Fix caret movement issue for some special characters --------- Co-authored-by: Conrad Irwin Co-authored-by: Thorsten Co-authored-by: Bennet --- Cargo.lock | 1 + Cargo.toml | 1 + crates/gpui/Cargo.toml | 6 +---- crates/rope/Cargo.toml | 1 + crates/rope/benches/rope_benchmark.rs | 37 +++++++++++++++++++++++++-- crates/rope/src/rope.rs | 23 ++++++++++++++--- 6 files changed, 59 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e0f2e6cb3..a6abd6d316 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7929,6 +7929,7 @@ dependencies = [ "rand 0.8.5", "smallvec", "sum_tree", + "unicode-segmentation", "util", ] diff --git a/Cargo.toml b/Cargo.toml index 34f41d0425..368071f608 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -341,6 +341,7 @@ tree-sitter-vue = { git = "https://github.com/zed-industries/tree-sitter-vue", r tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930" } unindent = "0.1.7" unicase = "2.6" +unicode-segmentation = "1.10" url = "2.2" uuid = { version = "1.1.2", features = ["v4"] } wasmparser = "0.201" diff --git a/crates/gpui/Cargo.toml b/crates/gpui/Cargo.toml index 3ae61d1d29..990a844074 100644 --- a/crates/gpui/Cargo.toml +++ b/crates/gpui/Cargo.toml @@ -12,11 +12,7 @@ workspace = true [features] default = [] -test-support = [ - "backtrace", - "collections/test-support", - "util/test-support", -] +test-support = ["backtrace", "collections/test-support", "util/test-support"] runtime_shaders = [] macos-blade = ["blade-graphics", "blade-macros", "blade-rwh", "bytemuck"] diff --git a/crates/rope/Cargo.toml b/crates/rope/Cargo.toml index 577fbf9c0d..bd76303a62 100644 --- a/crates/rope/Cargo.toml +++ b/crates/rope/Cargo.toml @@ -17,6 +17,7 @@ bromberg_sl2 = { git = "https://github.com/zed-industries/bromberg_sl2", rev = " log.workspace = true smallvec.workspace = true sum_tree.workspace = true +unicode-segmentation.workspace = true util.workspace = true [dev-dependencies] diff --git a/crates/rope/benches/rope_benchmark.rs b/crates/rope/benches/rope_benchmark.rs index a1d04474d3..1f95559d77 100644 --- a/crates/rope/benches/rope_benchmark.rs +++ b/crates/rope/benches/rope_benchmark.rs @@ -1,9 +1,12 @@ use std::ops::Range; -use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput}; +use criterion::{ + black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput, +}; use rand::prelude::*; use rand::rngs::StdRng; -use rope::Rope; +use rope::{Point, Rope}; +use sum_tree::Bias; use util::RandomCharIter; fn generate_random_text(mut rng: StdRng, text_len: usize) -> String { @@ -44,6 +47,16 @@ fn generate_random_rope_ranges(mut rng: StdRng, rope: &Rope) -> Vec ranges } +fn generate_random_rope_points(mut rng: StdRng, rope: &Rope) -> Vec { + let num_points = rope.len() / 10; + + let mut points = Vec::new(); + for _ in 0..num_points { + points.push(rope.offset_to_point(rng.gen_range(0..rope.len()))); + } + points +} + fn rope_benchmarks(c: &mut Criterion) { static SEED: u64 = 9999; static KB: usize = 1024; @@ -138,6 +151,26 @@ fn rope_benchmarks(c: &mut Criterion) { }); } group.finish(); + + let mut group = c.benchmark_group("clip_point"); + for size in sizes.iter() { + group.throughput(Throughput::Bytes(*size as u64)); + group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| { + let rope = generate_random_rope(rng.clone(), *size); + + b.iter_batched( + || generate_random_rope_points(rng.clone(), &rope), + |offsets| { + for offset in offsets.iter() { + black_box(rope.clip_point(*offset, Bias::Left)); + black_box(rope.clip_point(*offset, Bias::Right)); + } + }, + BatchSize::SmallInput, + ); + }); + } + group.finish(); } criterion_group!(benches, rope_benchmarks); diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 852910cacc..6a78048fc4 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -12,6 +12,7 @@ use std::{ str, }; use sum_tree::{Bias, Dimension, SumTree}; +use unicode_segmentation::GraphemeCursor; use util::debug_panic; pub use offset_utf16::OffsetUtf16; @@ -923,14 +924,30 @@ impl Chunk { fn clip_point(&self, target: Point, bias: Bias) -> Point { for (row, line) in self.0.split('\n').enumerate() { if row == target.row as usize { - let mut column = target.column.min(line.len() as u32); - while !line.is_char_boundary(column as usize) { + let bytes = line.as_bytes(); + let mut column = target.column.min(bytes.len() as u32) as usize; + if column == 0 + || column == bytes.len() + || (bytes[column - 1] < 128 && bytes[column] < 128) + { + return Point::new(row as u32, column as u32); + } + + let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true); + loop { + if line.is_char_boundary(column) { + if grapheme_cursor.is_boundary(line, 0).unwrap_or(false) { + break; + } + } + match bias { Bias::Left => column -= 1, Bias::Right => column += 1, } + grapheme_cursor.set_cursor(column); } - return Point::new(row as u32, column); + return Point::new(row as u32, column as u32); } } unreachable!()