Fix caret movement issue for some special characters (#10198)

Currently in Zed, certain characters require pressing the key twice to move the caret through that character. For example: "❤️" and "y̆". The reason for this is as follows: Currently, Zed uses `chars` to distinguish different characters, and calling `chars` on `y̆` will yield two `char` values: `y` and `\u{306}`, and calling `chars` on `❤️` will yield two `char` values: `❤` and `\u{fe0f}`. Therefore, consider the following scenario (where ^ represents the caret): - what we see: ❤️ ^ - the actual buffer: ❤ \u{fe0f} ^ After pressing the left arrow key once: - what we see: ❤️ ^ - the actual buffer: ❤ ^ \u{fe0f} After pressing the left arrow key again: - what we see: ^ ❤️ - the actual buffer: ^ ❤ \u{fe0f} Thus, two left arrow key presses are needed to move the caret, and this PR fixes this bug (or this is actually a feature?). I have tried to keep the scope of code modifications as minimal as possible. In this PR, Zed handles such characters as follows: - what we see: ❤️ ^ - the actual buffer: ❤ \u{fe0f} ^ After pressing the left arrow key once: - what we see: ^ ❤️ - the actual buffer: ^ ❤ \u{fe0f} Or after pressing the delete key: - what we see: ^ - the actual buffer: ^ Please note that currently, different platforms and software handle these special characters differently, and even the same software may handle these characters differently in different situations. For example, in my testing on Chrome on macOS, GitHub treats `y̆` as a single character, just like in this PR; however, in Rust Playground, `y̆` is treated as two characters, and pressing the delete key does not delete the entire `y̆` character, but instead deletes `\u{306}` to yield the character `y`. And they both treat `❤️` as a single character, pressing the delete key will delete the entire `❤️` character. This PR is based on the principle of making changes with the smallest impact on the code, and I think that deleting the entire character with the delete key is more intuitive. Release Notes: - Fix caret movement issue for some special characters --------- Co-authored-by: Conrad Irwin <conrad.irwin@gmail.com> Co-authored-by: Thorsten <thorsten@zed.dev> Co-authored-by: Bennet <bennetbo@gmx.de>
2024-11-07 20:39:04 +03:00 · 2024-04-11 03:01:25 +08:00 · 2024-04-11 03:01:25 +08:00 · fdddbfc179
commit fdddbfc179
parent 3648d79ddb
6 changed files with 59 additions and 10 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -7929,6 +7929,7 @@ dependencies = [
 "rand 0.8.5",
 "smallvec",
 "sum_tree",
 "unicode-segmentation",
 "util",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -341,6 +341,7 @@ tree-sitter-vue = { git = "https://github.com/zed-industries/tree-sitter-vue", r
 tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930" }
 unindent = "0.1.7"
 unicase = "2.6"
 unicode-segmentation = "1.10"
 url = "2.2"
 uuid = { version = "1.1.2", features = ["v4"] }
 wasmparser = "0.201"
--- a/crates/gpui/Cargo.toml
+++ b/crates/gpui/Cargo.toml
@ -12,11 +12,7 @@ workspace = true
 [features]
 default = []
-test-support = [
+test-support = ["backtrace", "collections/test-support", "util/test-support"]
    "backtrace",
    "collections/test-support",
    "util/test-support",
 ]
 runtime_shaders = []
 macos-blade = ["blade-graphics", "blade-macros", "blade-rwh", "bytemuck"]
--- a/crates/rope/Cargo.toml
+++ b/crates/rope/Cargo.toml
@ -17,6 +17,7 @@ bromberg_sl2 = { git = "https://github.com/zed-industries/bromberg_sl2", rev = "
 log.workspace = true
 smallvec.workspace = true
 sum_tree.workspace = true
 unicode-segmentation.workspace = true
 util.workspace = true
 [dev-dependencies]
--- a/crates/rope/benches/rope_benchmark.rs
+++ b/crates/rope/benches/rope_benchmark.rs
@ -1,9 +1,12 @@
 use std::ops::Range;
-use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput};
+use criterion::{
    black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput,
 };
 use rand::prelude::*;
 use rand::rngs::StdRng;
-use rope::Rope;
+use rope::{Point, Rope};
 use sum_tree::Bias;
 use util::RandomCharIter;
 fn generate_random_text(mut rng: StdRng, text_len: usize) -> String {
@ -44,6 +47,16 @@ fn generate_random_rope_ranges(mut rng: StdRng, rope: &Rope) -> Vec<Range<usize>
    ranges
 }
 fn generate_random_rope_points(mut rng: StdRng, rope: &Rope) -> Vec<Point> {
    let num_points = rope.len() / 10;
    let mut points = Vec::new();
    for _ in 0..num_points {
        points.push(rope.offset_to_point(rng.gen_range(0..rope.len())));
    }
    points
 }
 fn rope_benchmarks(c: &mut Criterion) {
    static SEED: u64 = 9999;
    static KB: usize = 1024;
@ -138,6 +151,26 @@ fn rope_benchmarks(c: &mut Criterion) {
        });
    }
    group.finish();
    let mut group = c.benchmark_group("clip_point");
    for size in sizes.iter() {
        group.throughput(Throughput::Bytes(*size as u64));
        group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| {
            let rope = generate_random_rope(rng.clone(), *size);
            b.iter_batched(
                || generate_random_rope_points(rng.clone(), &rope),
                |offsets| {
                    for offset in offsets.iter() {
                        black_box(rope.clip_point(*offset, Bias::Left));
                        black_box(rope.clip_point(*offset, Bias::Right));
                    }
                },
                BatchSize::SmallInput,
            );
        });
    }
    group.finish();
 }
 criterion_group!(benches, rope_benchmarks);
--- a/crates/rope/src/rope.rs
+++ b/crates/rope/src/rope.rs
@ -12,6 +12,7 @@ use std::{
    str,
 };
 use sum_tree::{Bias, Dimension, SumTree};
 use unicode_segmentation::GraphemeCursor;
 use util::debug_panic;
 pub use offset_utf16::OffsetUtf16;
@ -923,14 +924,30 @@ impl Chunk {
    fn clip_point(&self, target: Point, bias: Bias) -> Point {
        for (row, line) in self.0.split('\n').enumerate() {
            if row == target.row as usize {
-                let mut column = target.column.min(line.len() as u32);
+                let bytes = line.as_bytes();
-                while !line.is_char_boundary(column as usize) {
+                let mut column = target.column.min(bytes.len() as u32) as usize;
                if column == 0
                    || column == bytes.len()
                    || (bytes[column - 1] < 128 && bytes[column] < 128)
                {
                    return Point::new(row as u32, column as u32);
                }
                let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
                loop {
                    if line.is_char_boundary(column) {
                        if grapheme_cursor.is_boundary(line, 0).unwrap_or(false) {
                            break;
                        }
                    }
                    match bias {
                        Bias::Left => column -= 1,
                        Bias::Right => column += 1,
                    }
                    grapheme_cursor.set_cursor(column);
                }
-                return Point::new(row as u32, column);
+                return Point::new(row as u32, column as u32);
            }
        }
        unreachable!()