Fix caret movement issue for some special characters (#10198)

Currently in Zed, certain characters require pressing the key twice to
move the caret through that character. For example: "❤️" and "y̆".

The reason for this is as follows:

Currently, Zed uses `chars` to distinguish different characters, and
calling `chars` on `y̆` will yield two `char` values: `y` and `\u{306}`,
and calling `chars` on `❤️` will yield two `char` values: `❤` and
`\u{fe0f}`.

Therefore, consider the following scenario (where ^ represents the
caret):

- what we see: ❤️ ^
- the actual buffer: ❤ \u{fe0f} ^

After pressing the left arrow key once:

- what we see: ❤️ ^
- the actual buffer: ❤ ^ \u{fe0f}

After pressing the left arrow key again:
- what we see: ^ ❤️
- the actual buffer: ^ ❤ \u{fe0f}

Thus, two left arrow key presses are needed to move the caret, and this
PR fixes this bug (or this is actually a feature?).

I have tried to keep the scope of code modifications as minimal as
possible. In this PR, Zed handles such characters as follows:

- what we see: ❤️ ^
- the actual buffer: ❤ \u{fe0f} ^

After pressing the left arrow key once:

- what we see: ^ ❤️
- the actual buffer: ^ ❤ \u{fe0f}

Or after pressing the delete key:

- what we see: ^
- the actual buffer: ^

Please note that currently, different platforms and software handle
these special characters differently, and even the same software may
handle these characters differently in different situations. For
example, in my testing on Chrome on macOS, GitHub treats `y̆` as a
single character, just like in this PR; however, in Rust Playground,
`y̆` is treated as two characters, and pressing the delete key does not
delete the entire `y̆` character, but instead deletes `\u{306}` to yield
the character `y`. And they both treat `❤️` as a single character,
pressing the delete key will delete the entire `❤️` character.

This PR is based on the principle of making changes with the smallest
impact on the code, and I think that deleting the entire character with
the delete key is more intuitive.

Release Notes:

- Fix caret movement issue for some special characters

---------

Co-authored-by: Conrad Irwin <conrad.irwin@gmail.com>
Co-authored-by: Thorsten <thorsten@zed.dev>
Co-authored-by: Bennet <bennetbo@gmx.de>
This commit is contained in:
张小白 2024-04-11 03:01:25 +08:00 committed by GitHub
parent 3648d79ddb
commit fdddbfc179
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 59 additions and 10 deletions

1
Cargo.lock generated
View File

@ -7929,6 +7929,7 @@ dependencies = [
"rand 0.8.5", "rand 0.8.5",
"smallvec", "smallvec",
"sum_tree", "sum_tree",
"unicode-segmentation",
"util", "util",
] ]

View File

@ -341,6 +341,7 @@ tree-sitter-vue = { git = "https://github.com/zed-industries/tree-sitter-vue", r
tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930" } tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930" }
unindent = "0.1.7" unindent = "0.1.7"
unicase = "2.6" unicase = "2.6"
unicode-segmentation = "1.10"
url = "2.2" url = "2.2"
uuid = { version = "1.1.2", features = ["v4"] } uuid = { version = "1.1.2", features = ["v4"] }
wasmparser = "0.201" wasmparser = "0.201"

View File

@ -12,11 +12,7 @@ workspace = true
[features] [features]
default = [] default = []
test-support = [ test-support = ["backtrace", "collections/test-support", "util/test-support"]
"backtrace",
"collections/test-support",
"util/test-support",
]
runtime_shaders = [] runtime_shaders = []
macos-blade = ["blade-graphics", "blade-macros", "blade-rwh", "bytemuck"] macos-blade = ["blade-graphics", "blade-macros", "blade-rwh", "bytemuck"]

View File

@ -17,6 +17,7 @@ bromberg_sl2 = { git = "https://github.com/zed-industries/bromberg_sl2", rev = "
log.workspace = true log.workspace = true
smallvec.workspace = true smallvec.workspace = true
sum_tree.workspace = true sum_tree.workspace = true
unicode-segmentation.workspace = true
util.workspace = true util.workspace = true
[dev-dependencies] [dev-dependencies]

View File

@ -1,9 +1,12 @@
use std::ops::Range; use std::ops::Range;
use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput}; use criterion::{
black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput,
};
use rand::prelude::*; use rand::prelude::*;
use rand::rngs::StdRng; use rand::rngs::StdRng;
use rope::Rope; use rope::{Point, Rope};
use sum_tree::Bias;
use util::RandomCharIter; use util::RandomCharIter;
fn generate_random_text(mut rng: StdRng, text_len: usize) -> String { fn generate_random_text(mut rng: StdRng, text_len: usize) -> String {
@ -44,6 +47,16 @@ fn generate_random_rope_ranges(mut rng: StdRng, rope: &Rope) -> Vec<Range<usize>
ranges ranges
} }
fn generate_random_rope_points(mut rng: StdRng, rope: &Rope) -> Vec<Point> {
let num_points = rope.len() / 10;
let mut points = Vec::new();
for _ in 0..num_points {
points.push(rope.offset_to_point(rng.gen_range(0..rope.len())));
}
points
}
fn rope_benchmarks(c: &mut Criterion) { fn rope_benchmarks(c: &mut Criterion) {
static SEED: u64 = 9999; static SEED: u64 = 9999;
static KB: usize = 1024; static KB: usize = 1024;
@ -138,6 +151,26 @@ fn rope_benchmarks(c: &mut Criterion) {
}); });
} }
group.finish(); group.finish();
let mut group = c.benchmark_group("clip_point");
for size in sizes.iter() {
group.throughput(Throughput::Bytes(*size as u64));
group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| {
let rope = generate_random_rope(rng.clone(), *size);
b.iter_batched(
|| generate_random_rope_points(rng.clone(), &rope),
|offsets| {
for offset in offsets.iter() {
black_box(rope.clip_point(*offset, Bias::Left));
black_box(rope.clip_point(*offset, Bias::Right));
}
},
BatchSize::SmallInput,
);
});
}
group.finish();
} }
criterion_group!(benches, rope_benchmarks); criterion_group!(benches, rope_benchmarks);

View File

@ -12,6 +12,7 @@ use std::{
str, str,
}; };
use sum_tree::{Bias, Dimension, SumTree}; use sum_tree::{Bias, Dimension, SumTree};
use unicode_segmentation::GraphemeCursor;
use util::debug_panic; use util::debug_panic;
pub use offset_utf16::OffsetUtf16; pub use offset_utf16::OffsetUtf16;
@ -923,14 +924,30 @@ impl Chunk {
fn clip_point(&self, target: Point, bias: Bias) -> Point { fn clip_point(&self, target: Point, bias: Bias) -> Point {
for (row, line) in self.0.split('\n').enumerate() { for (row, line) in self.0.split('\n').enumerate() {
if row == target.row as usize { if row == target.row as usize {
let mut column = target.column.min(line.len() as u32); let bytes = line.as_bytes();
while !line.is_char_boundary(column as usize) { let mut column = target.column.min(bytes.len() as u32) as usize;
if column == 0
|| column == bytes.len()
|| (bytes[column - 1] < 128 && bytes[column] < 128)
{
return Point::new(row as u32, column as u32);
}
let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
loop {
if line.is_char_boundary(column) {
if grapheme_cursor.is_boundary(line, 0).unwrap_or(false) {
break;
}
}
match bias { match bias {
Bias::Left => column -= 1, Bias::Left => column -= 1,
Bias::Right => column += 1, Bias::Right => column += 1,
} }
grapheme_cursor.set_cursor(column);
} }
return Point::new(row as u32, column); return Point::new(row as u32, column as u32);
} }
} }
unreachable!() unreachable!()