Avoid buffering line content to compute indent guides (#15167)

Release Notes:

- Improved performance when computing indent guides for buffers with
extremely long lines.

---------

Co-authored-by: Nathan <nathan@zed.dev>
Co-authored-by: Bennet <bennet@zed.dev>
Co-authored-by: Thorsten <thorsten@zed.dev>
This commit is contained in:
Antonio Scandurra 2024-07-25 15:21:50 +02:00 committed by GitHub
parent cd9a42e8da
commit 8631180e43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 411 additions and 51 deletions

2
Cargo.lock generated
View File

@ -8863,6 +8863,8 @@ version = "0.1.0"
dependencies = [
"arrayvec",
"criterion",
"ctor",
"env_logger",
"gpui",
"log",
"rand 0.8.5",

View File

@ -20,6 +20,8 @@ unicode-segmentation.workspace = true
util.workspace = true
[dev-dependencies]
ctor.workspace = true
env_logger.workspace = true
gpui = { workspace = true, features = ["test-support"] }
rand.workspace = true
util = { workspace = true, features = ["test-support"] }

View File

@ -557,33 +557,49 @@ impl<'a> Cursor<'a> {
pub struct Chunks<'a> {
chunks: sum_tree::Cursor<'a, Chunk, usize>,
range: Range<usize>,
offset: usize,
reversed: bool,
}
impl<'a> Chunks<'a> {
pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
let mut chunks = rope.chunks.cursor();
if reversed {
let offset = if reversed {
chunks.seek(&range.end, Bias::Left, &());
range.end
} else {
chunks.seek(&range.start, Bias::Right, &());
}
range.start
};
Self {
chunks,
range,
offset,
reversed,
}
}
pub fn offset(&self) -> usize {
fn offset_is_valid(&self) -> bool {
if self.reversed {
self.range.end.min(self.chunks.end(&()))
if self.offset <= self.range.start || self.offset > self.range.end {
return false;
}
} else {
self.range.start.max(*self.chunks.start())
if self.offset < self.range.start || self.offset >= self.range.end {
return false;
}
}
true
}
pub fn seek(&mut self, offset: usize) {
pub fn offset(&self) -> usize {
self.offset
}
pub fn seek(&mut self, mut offset: usize) {
offset = offset.clamp(self.range.start, self.range.end);
let bias = if self.reversed {
Bias::Left
} else {
@ -596,26 +612,123 @@ impl<'a> Chunks<'a> {
self.chunks.seek(&offset, bias, &());
}
if self.reversed {
self.range.end = offset;
} else {
self.range.start = offset;
self.offset = offset;
}
/// Moves this cursor to the start of the next line in the rope.
///
/// This method advances the cursor to the beginning of the next line.
/// If the cursor is already at the end of the rope, this method does nothing.
/// Reversed chunks iterators are not currently supported and will panic.
///
/// Returns `true` if the cursor was successfully moved to the next line start,
/// or `false` if the cursor was already at the end of the rope.
pub fn next_line(&mut self) -> bool {
assert!(!self.reversed);
let mut found = false;
if let Some(chunk) = self.peek() {
if let Some(newline_ix) = chunk.find('\n') {
self.offset += newline_ix + 1;
found = self.offset <= self.range.end;
} else {
self.chunks
.search_forward(|summary| summary.text.lines.row > 0, &());
self.offset = *self.chunks.start();
if let Some(newline_ix) = self.peek().and_then(|chunk| chunk.find('\n')) {
self.offset += newline_ix + 1;
found = self.offset <= self.range.end;
} else {
self.offset = self.chunks.end(&());
}
}
if self.offset == self.chunks.end(&()) {
self.next();
}
}
if self.offset > self.range.end {
self.offset = cmp::min(self.offset, self.range.end);
self.chunks.seek(&self.offset, Bias::Right, &());
}
found
}
/// Move this cursor to the preceding position in the rope that starts a new line.
/// Reversed chunks iterators are not currently supported and will panic.
///
/// If this cursor is not on the start of a line, it will be moved to the start of
/// its current line. Otherwise it will be moved to the start of the previous line.
/// It updates the cursor's position and returns true if a previous line was found,
/// or false if the cursor was already at the start of the rope.
pub fn prev_line(&mut self) -> bool {
assert!(!self.reversed);
let initial_offset = self.offset;
if self.offset == *self.chunks.start() {
self.chunks.prev(&());
}
if let Some(chunk) = self.chunks.item() {
let mut end_ix = self.offset - *self.chunks.start();
if chunk.0.as_bytes()[end_ix - 1] == b'\n' {
end_ix -= 1;
}
if let Some(newline_ix) = chunk.0[..end_ix].rfind('\n') {
self.offset = *self.chunks.start() + newline_ix + 1;
if self.offset_is_valid() {
return true;
}
}
}
self.chunks
.search_backward(|summary| summary.text.lines.row > 0, &());
self.offset = *self.chunks.start();
if let Some(chunk) = self.chunks.item() {
if let Some(newline_ix) = chunk.0.rfind('\n') {
self.offset += newline_ix + 1;
if self.offset_is_valid() {
if self.offset == self.chunks.end(&()) {
self.chunks.next(&());
}
return true;
}
}
}
if !self.offset_is_valid() || self.chunks.item().is_none() {
self.offset = self.range.start;
self.chunks.seek(&self.offset, Bias::Right, &());
}
self.offset < initial_offset && self.offset == 0
}
pub fn peek(&self) -> Option<&'a str> {
let chunk = self.chunks.item()?;
if self.reversed && self.range.start >= self.chunks.end(&()) {
return None;
}
let chunk_start = *self.chunks.start();
if self.range.end <= chunk_start {
if !self.offset_is_valid() {
return None;
}
let start = self.range.start.saturating_sub(chunk_start);
let end = self.range.end - chunk_start;
Some(&chunk.0[start..chunk.0.len().min(end)])
let chunk = self.chunks.item()?;
let chunk_start = *self.chunks.start();
let slice_range = if self.reversed {
let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
let slice_end = self.offset - chunk_start;
slice_start..slice_end
} else {
let slice_start = self.offset - chunk_start;
let slice_end = cmp::min(self.chunks.end(&()), self.range.end) - chunk_start;
slice_start..slice_end
};
Some(&chunk.0[slice_range])
}
pub fn lines(self) -> Lines<'a> {
@ -633,15 +746,16 @@ impl<'a> Iterator for Chunks<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
let result = self.peek();
if result.is_some() {
if self.reversed {
self.chunks.prev(&());
} else {
self.chunks.next(&());
}
let chunk = self.peek()?;
if self.reversed {
self.chunks.prev(&());
self.offset -= chunk.len();
} else {
self.chunks.next(&());
self.offset += chunk.len();
}
result
Some(chunk)
}
}
@ -1299,6 +1413,13 @@ mod tests {
use util::RandomCharIter;
use Bias::{Left, Right};
#[ctor::ctor]
fn init_logger() {
if std::env::var("RUST_LOG").is_ok() {
env_logger::init();
}
}
#[test]
fn test_all_4_byte_chars() {
let mut rope = Rope::new();
@ -1355,6 +1476,50 @@ mod tests {
);
}
#[test]
fn test_prev_next_line() {
let rope = Rope::from("abc\ndef\nghi\njkl");
let mut chunks = rope.chunks();
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
assert!(chunks.next_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
assert!(chunks.next_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
assert!(chunks.next_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
assert!(!chunks.next_line());
assert_eq!(chunks.peek(), None);
assert!(chunks.prev_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
assert!(chunks.prev_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
assert!(chunks.prev_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
assert!(chunks.prev_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
assert!(!chunks.prev_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
// Only return true when the cursor has moved to the start of a line
let mut chunks = rope.chunks_in_range(5..7);
chunks.seek(6);
assert!(!chunks.prev_line());
assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'e');
assert!(!chunks.next_line());
assert_eq!(chunks.peek(), None);
}
#[test]
fn test_lines() {
let rope = Rope::from("abc\ndefg\nhi");
@ -1438,6 +1603,143 @@ mod tests {
.collect::<String>(),
&expected[start_ix..end_ix]
);
let mut expected_line_starts: Vec<_> = expected[start_ix..end_ix]
.match_indices('\n')
.map(|(index, _)| start_ix + index + 1)
.collect();
let mut chunks = actual.chunks_in_range(start_ix..end_ix);
let mut actual_line_starts = Vec::new();
while chunks.next_line() {
actual_line_starts.push(chunks.offset());
}
assert_eq!(
actual_line_starts,
expected_line_starts,
"actual line starts != expected line starts when using next_line() for {:?} ({:?})",
&expected[start_ix..end_ix],
start_ix..end_ix
);
if start_ix < end_ix
&& (start_ix == 0 || expected.as_bytes()[start_ix - 1] == b'\n')
{
expected_line_starts.insert(0, start_ix);
}
// Remove the last index if it starts at the end of the range.
if expected_line_starts.last() == Some(&end_ix) {
expected_line_starts.pop();
}
let mut actual_line_starts = Vec::new();
while chunks.prev_line() {
actual_line_starts.push(chunks.offset());
}
actual_line_starts.reverse();
assert_eq!(
actual_line_starts,
expected_line_starts,
"actual line starts != expected line starts when using prev_line() for {:?} ({:?})",
&expected[start_ix..end_ix],
start_ix..end_ix
);
// Check that next_line/prev_line work correctly from random positions
let mut random_offset = rng.gen_range(start_ix..=end_ix);
while !expected.is_char_boundary(random_offset) {
random_offset -= 1;
}
chunks.seek(random_offset);
if rng.gen() {
let expected_next_line_start = expected[random_offset..end_ix]
.find('\n')
.map(|newline_ix| random_offset + newline_ix + 1);
let moved = chunks.next_line();
assert_eq!(
moved,
expected_next_line_start.is_some(),
"unexpected result from next_line after seeking to {} in range {:?} ({:?})",
random_offset,
start_ix..end_ix,
&expected[start_ix..end_ix]
);
if let Some(expected_next_line_start) = expected_next_line_start {
assert_eq!(
chunks.offset(),
expected_next_line_start,
"invalid position after seeking to {} in range {:?} ({:?})",
random_offset,
start_ix..end_ix,
&expected[start_ix..end_ix]
);
} else {
assert_eq!(
chunks.offset(),
end_ix,
"invalid position after seeking to {} in range {:?} ({:?})",
random_offset,
start_ix..end_ix,
&expected[start_ix..end_ix]
);
}
} else {
let search_end =
if random_offset > 0 && expected.as_bytes()[random_offset - 1] == b'\n' {
random_offset - 1
} else {
random_offset
};
let expected_prev_line_start = expected[..search_end]
.rfind('\n')
.and_then(|newline_ix| {
let line_start_ix = newline_ix + 1;
if line_start_ix >= start_ix {
Some(line_start_ix)
} else {
None
}
})
.or_else(|| {
if random_offset > 0 && start_ix == 0 {
Some(0)
} else {
None
}
});
let moved = chunks.prev_line();
assert_eq!(
moved,
expected_prev_line_start.is_some(),
"unexpected result from prev_line after seeking to {} in range {:?} ({:?})",
random_offset,
start_ix..end_ix,
&expected[start_ix..end_ix]
);
if let Some(expected_prev_line_start) = expected_prev_line_start {
assert_eq!(
chunks.offset(),
expected_prev_line_start,
"invalid position after seeking to {} in range {:?} ({:?})",
random_offset,
start_ix..end_ix,
&expected[start_ix..end_ix]
);
} else {
assert_eq!(
chunks.offset(),
start_ix,
"invalid position after seeking to {} in range {:?} ({:?})",
random_offset,
start_ix..end_ix,
&expected[start_ix..end_ix]
);
}
}
}
let mut offset_utf16 = OffsetUtf16(0);

View File

@ -178,11 +178,11 @@ where
#[track_caller]
pub fn prev(&mut self, cx: &<T::Summary as Summary>::Context) {
self.prev_internal(|_| true, cx)
self.search_backward(|_| true, cx)
}
#[track_caller]
fn prev_internal<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
pub fn search_backward<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
where
F: FnMut(&T::Summary) -> bool,
{
@ -249,11 +249,11 @@ where
#[track_caller]
pub fn next(&mut self, cx: &<T::Summary as Summary>::Context) {
self.next_internal(|_| true, cx)
self.search_forward(|_| true, cx)
}
#[track_caller]
fn next_internal<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
pub fn search_forward<F>(&mut self, mut filter_node: F, cx: &<T::Summary as Summary>::Context)
where
F: FnMut(&T::Summary) -> bool,
{
@ -658,11 +658,11 @@ where
}
pub fn next(&mut self, cx: &<T::Summary as Summary>::Context) {
self.cursor.next_internal(&mut self.filter_node, cx);
self.cursor.search_forward(&mut self.filter_node, cx);
}
pub fn prev(&mut self, cx: &<T::Summary as Summary>::Context) {
self.cursor.prev_internal(&mut self.filter_node, cx);
self.cursor.search_backward(&mut self.filter_node, cx);
}
}
@ -681,7 +681,7 @@ where
}
if let Some(item) = self.item() {
self.cursor.next_internal(&mut self.filter_node, &());
self.cursor.search_forward(&mut self.filter_node, &());
Some(item)
} else {
None

View File

@ -542,6 +542,35 @@ pub struct LineIndent {
}
impl LineIndent {
pub fn from_chunks(chunks: &mut Chunks) -> Self {
let mut tabs = 0;
let mut spaces = 0;
let mut line_blank = true;
'outer: while let Some(chunk) = chunks.peek() {
for ch in chunk.chars() {
if ch == '\t' {
tabs += 1;
} else if ch == ' ' {
spaces += 1;
} else {
if ch != '\n' {
line_blank = false;
}
break 'outer;
}
}
chunks.next();
}
Self {
tabs,
spaces,
line_blank,
}
}
/// Constructs a new `LineIndent` which only contains spaces.
pub fn spaces(spaces: u32) -> Self {
Self {
@ -1983,39 +2012,64 @@ impl BufferSnapshot {
row_range: Range<u32>,
) -> impl Iterator<Item = (u32, LineIndent)> + '_ {
let start = Point::new(row_range.start, 0).to_offset(self);
let end = Point::new(row_range.end, 0).to_offset(self);
let end = Point::new(row_range.end - 1, self.line_len(row_range.end - 1)).to_offset(self);
let mut lines = self.as_rope().chunks_in_range(start..end).lines();
let mut chunks = self.as_rope().chunks_in_range(start..end);
let mut row = row_range.start;
let mut done = start == end;
std::iter::from_fn(move || {
if let Some(line) = lines.next() {
let indent = LineIndent::from(line);
row += 1;
Some((row - 1, indent))
} else {
if done {
None
} else {
let indent = (row, LineIndent::from_chunks(&mut chunks));
done = !chunks.next_line();
row += 1;
Some(indent)
}
})
}
/// Returns the line indents in the given row range, exclusive of end row, in reversed order.
pub fn reversed_line_indents_in_row_range(
&self,
row_range: Range<u32>,
) -> impl Iterator<Item = (u32, LineIndent)> + '_ {
let start = Point::new(row_range.start, 0).to_offset(self);
let end = Point::new(row_range.end, 0)
.to_offset(self)
.saturating_sub(1);
let mut lines = self.as_rope().reversed_chunks_in_range(start..end).lines();
let mut row = row_range.end;
let end_point;
let end;
if row_range.end > row_range.start {
end_point = Point::new(row_range.end - 1, self.line_len(row_range.end - 1));
end = end_point.to_offset(self);
} else {
end_point = Point::new(row_range.start, 0);
end = start;
};
let mut chunks = self.as_rope().chunks_in_range(start..end);
// Move the cursor to the start of the last line if it's not empty.
chunks.seek(end);
if end_point.column > 0 {
chunks.prev_line();
}
let mut row = end_point.row;
let mut done = start == end;
std::iter::from_fn(move || {
if let Some(line) = lines.next() {
let indent = LineIndent::from(line);
row = row.saturating_sub(1);
Some((row, indent))
} else {
if done {
None
} else {
let initial_offset = chunks.offset();
let indent = (row, LineIndent::from_chunks(&mut chunks));
if chunks.offset() > initial_offset {
chunks.prev_line();
}
done = !chunks.prev_line();
if !done {
row -= 1;
}
Some(indent)
}
})
}