Fix moving to next word boundary with multi-byte characters

Previously, for a given point, we would create a char iterator at
the start of the row and the skip `column` characters. This is
however incorrect because display points are expressed in bytes,
and so we could park the anchor midway through a multi-byte character.

This commit fixes the issue by switching `DisplayMap::chars_at` to
take a point instead and skipping characters correctly when a point with
a non-zero column is provided.
This commit is contained in:
Antonio Scandurra 2021-08-02 14:11:31 +02:00
parent 6a03f9368b
commit d5ec0c8feb
2 changed files with 74 additions and 7 deletions

View File

@ -159,14 +159,23 @@ impl DisplayMapSnapshot {
.highlighted_chunks_for_rows(display_rows)
}
pub fn chars_at<'a>(&'a self, display_row: u32) -> impl Iterator<Item = char> + 'a {
self.chunks_at(display_row).flat_map(str::chars)
pub fn chars_at<'a>(&'a self, point: DisplayPoint) -> impl Iterator<Item = char> + 'a {
let mut column = 0;
let mut chars = self.chunks_at(point.row()).flat_map(str::chars);
while column < point.column() {
if let Some(c) = chars.next() {
column += c.len_utf8() as u32;
} else {
break;
}
}
chars
}
pub fn column_to_chars(&self, display_row: u32, target: u32) -> u32 {
let mut count = 0;
let mut column = 0;
for c in self.chars_at(display_row) {
for c in self.chars_at(DisplayPoint::new(display_row, 0)) {
if column >= target {
break;
}
@ -179,7 +188,7 @@ impl DisplayMapSnapshot {
pub fn column_from_chars(&self, display_row: u32, char_count: u32) -> u32 {
let mut count = 0;
let mut column = 0;
for c in self.chars_at(display_row) {
for c in self.chars_at(DisplayPoint::new(display_row, 0)) {
if c == '\n' || count >= char_count {
break;
}
@ -237,7 +246,7 @@ impl DisplayMapSnapshot {
pub fn line_indent(&self, display_row: u32) -> (u32, bool) {
let mut indent = 0;
let mut is_blank = true;
for c in self.chars_at(display_row) {
for c in self.chars_at(DisplayPoint::new(display_row, 0)) {
if c == ' ' {
indent += 1;
} else {

View File

@ -120,7 +120,7 @@ pub fn prev_word_boundary(map: &DisplayMapSnapshot, point: DisplayPoint) -> Resu
let mut boundary = DisplayPoint::new(point.row(), 0);
let mut column = 0;
let mut prev_c = None;
for c in map.chars_at(point.row()) {
for c in map.chars_at(DisplayPoint::new(point.row(), 0)) {
if column >= point.column() {
break;
}
@ -141,7 +141,7 @@ pub fn next_word_boundary(
mut point: DisplayPoint,
) -> Result<DisplayPoint> {
let mut prev_c = None;
for c in map.chars_at(point.row()).skip(point.column() as usize) {
for c in map.chars_at(point) {
if prev_c.is_some() && (c == '\n' || char_kind(prev_c.unwrap()) != char_kind(c)) {
break;
}
@ -176,3 +176,61 @@ fn char_kind(c: char) -> CharKind {
CharKind::Punctuation
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
editor::{display_map::DisplayMap, Buffer},
test::build_app_state,
};
#[gpui::test]
fn test_prev_next_word_boundary_multibyte(cx: &mut gpui::MutableAppContext) {
let settings = build_app_state(cx).settings.borrow().clone();
let buffer = cx.add_model(|cx| Buffer::new(0, "a bcΔ defγ", cx));
let display_map = cx.add_model(|cx| DisplayMap::new(buffer, settings, None, cx));
let snapshot = display_map.update(cx, |map, cx| map.snapshot(cx));
assert_eq!(
prev_word_boundary(&snapshot, DisplayPoint::new(0, 12)).unwrap(),
DisplayPoint::new(0, 7)
);
assert_eq!(
prev_word_boundary(&snapshot, DisplayPoint::new(0, 7)).unwrap(),
DisplayPoint::new(0, 6)
);
assert_eq!(
prev_word_boundary(&snapshot, DisplayPoint::new(0, 6)).unwrap(),
DisplayPoint::new(0, 2)
);
assert_eq!(
prev_word_boundary(&snapshot, DisplayPoint::new(0, 2)).unwrap(),
DisplayPoint::new(0, 1)
);
assert_eq!(
prev_word_boundary(&snapshot, DisplayPoint::new(0, 1)).unwrap(),
DisplayPoint::new(0, 0)
);
assert_eq!(
next_word_boundary(&snapshot, DisplayPoint::new(0, 0)).unwrap(),
DisplayPoint::new(0, 1)
);
assert_eq!(
next_word_boundary(&snapshot, DisplayPoint::new(0, 1)).unwrap(),
DisplayPoint::new(0, 2)
);
assert_eq!(
next_word_boundary(&snapshot, DisplayPoint::new(0, 2)).unwrap(),
DisplayPoint::new(0, 6)
);
assert_eq!(
next_word_boundary(&snapshot, DisplayPoint::new(0, 6)).unwrap(),
DisplayPoint::new(0, 7)
);
assert_eq!(
next_word_boundary(&snapshot, DisplayPoint::new(0, 7)).unwrap(),
DisplayPoint::new(0, 12)
);
}
}