From 8631180e43566918742ff85113b47cefda41545d Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Thu, 25 Jul 2024 15:21:50 +0200 Subject: [PATCH] Avoid buffering line content to compute indent guides (#15167) Release Notes: - Improved performance when computing indent guides for buffers with extremely long lines. --------- Co-authored-by: Nathan Co-authored-by: Bennet Co-authored-by: Thorsten --- Cargo.lock | 2 + crates/rope/Cargo.toml | 2 + crates/rope/src/rope.rs | 356 +++++++++++++++++++++++++++++++--- crates/sum_tree/src/cursor.rs | 14 +- crates/text/src/text.rs | 88 +++++++-- 5 files changed, 411 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 064bddaca9..ed7e7e13e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8863,6 +8863,8 @@ version = "0.1.0" dependencies = [ "arrayvec", "criterion", + "ctor", + "env_logger", "gpui", "log", "rand 0.8.5", diff --git a/crates/rope/Cargo.toml b/crates/rope/Cargo.toml index d29e2c7408..309ceaf0bf 100644 --- a/crates/rope/Cargo.toml +++ b/crates/rope/Cargo.toml @@ -20,6 +20,8 @@ unicode-segmentation.workspace = true util.workspace = true [dev-dependencies] +ctor.workspace = true +env_logger.workspace = true gpui = { workspace = true, features = ["test-support"] } rand.workspace = true util = { workspace = true, features = ["test-support"] } diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 2c41f9fc9f..6cc219d3ae 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -557,33 +557,49 @@ impl<'a> Cursor<'a> { pub struct Chunks<'a> { chunks: sum_tree::Cursor<'a, Chunk, usize>, range: Range, + offset: usize, reversed: bool, } impl<'a> Chunks<'a> { pub fn new(rope: &'a Rope, range: Range, reversed: bool) -> Self { let mut chunks = rope.chunks.cursor(); - if reversed { + let offset = if reversed { chunks.seek(&range.end, Bias::Left, &()); + range.end } else { chunks.seek(&range.start, Bias::Right, &()); - } + range.start + }; Self { chunks, range, + offset, reversed, } } - pub fn offset(&self) -> usize { + fn offset_is_valid(&self) -> bool { if self.reversed { - self.range.end.min(self.chunks.end(&())) + if self.offset <= self.range.start || self.offset > self.range.end { + return false; + } } else { - self.range.start.max(*self.chunks.start()) + if self.offset < self.range.start || self.offset >= self.range.end { + return false; + } } + + true } - pub fn seek(&mut self, offset: usize) { + pub fn offset(&self) -> usize { + self.offset + } + + pub fn seek(&mut self, mut offset: usize) { + offset = offset.clamp(self.range.start, self.range.end); + let bias = if self.reversed { Bias::Left } else { @@ -596,26 +612,123 @@ impl<'a> Chunks<'a> { self.chunks.seek(&offset, bias, &()); } - if self.reversed { - self.range.end = offset; - } else { - self.range.start = offset; + self.offset = offset; + } + + /// Moves this cursor to the start of the next line in the rope. + /// + /// This method advances the cursor to the beginning of the next line. + /// If the cursor is already at the end of the rope, this method does nothing. + /// Reversed chunks iterators are not currently supported and will panic. + /// + /// Returns `true` if the cursor was successfully moved to the next line start, + /// or `false` if the cursor was already at the end of the rope. + pub fn next_line(&mut self) -> bool { + assert!(!self.reversed); + + let mut found = false; + if let Some(chunk) = self.peek() { + if let Some(newline_ix) = chunk.find('\n') { + self.offset += newline_ix + 1; + found = self.offset <= self.range.end; + } else { + self.chunks + .search_forward(|summary| summary.text.lines.row > 0, &()); + self.offset = *self.chunks.start(); + + if let Some(newline_ix) = self.peek().and_then(|chunk| chunk.find('\n')) { + self.offset += newline_ix + 1; + found = self.offset <= self.range.end; + } else { + self.offset = self.chunks.end(&()); + } + } + + if self.offset == self.chunks.end(&()) { + self.next(); + } } + + if self.offset > self.range.end { + self.offset = cmp::min(self.offset, self.range.end); + self.chunks.seek(&self.offset, Bias::Right, &()); + } + + found + } + + /// Move this cursor to the preceding position in the rope that starts a new line. + /// Reversed chunks iterators are not currently supported and will panic. + /// + /// If this cursor is not on the start of a line, it will be moved to the start of + /// its current line. Otherwise it will be moved to the start of the previous line. + /// It updates the cursor's position and returns true if a previous line was found, + /// or false if the cursor was already at the start of the rope. + pub fn prev_line(&mut self) -> bool { + assert!(!self.reversed); + + let initial_offset = self.offset; + + if self.offset == *self.chunks.start() { + self.chunks.prev(&()); + } + + if let Some(chunk) = self.chunks.item() { + let mut end_ix = self.offset - *self.chunks.start(); + if chunk.0.as_bytes()[end_ix - 1] == b'\n' { + end_ix -= 1; + } + + if let Some(newline_ix) = chunk.0[..end_ix].rfind('\n') { + self.offset = *self.chunks.start() + newline_ix + 1; + if self.offset_is_valid() { + return true; + } + } + } + + self.chunks + .search_backward(|summary| summary.text.lines.row > 0, &()); + self.offset = *self.chunks.start(); + if let Some(chunk) = self.chunks.item() { + if let Some(newline_ix) = chunk.0.rfind('\n') { + self.offset += newline_ix + 1; + if self.offset_is_valid() { + if self.offset == self.chunks.end(&()) { + self.chunks.next(&()); + } + + return true; + } + } + } + + if !self.offset_is_valid() || self.chunks.item().is_none() { + self.offset = self.range.start; + self.chunks.seek(&self.offset, Bias::Right, &()); + } + + self.offset < initial_offset && self.offset == 0 } pub fn peek(&self) -> Option<&'a str> { - let chunk = self.chunks.item()?; - if self.reversed && self.range.start >= self.chunks.end(&()) { - return None; - } - let chunk_start = *self.chunks.start(); - if self.range.end <= chunk_start { + if !self.offset_is_valid() { return None; } - let start = self.range.start.saturating_sub(chunk_start); - let end = self.range.end - chunk_start; - Some(&chunk.0[start..chunk.0.len().min(end)]) + let chunk = self.chunks.item()?; + let chunk_start = *self.chunks.start(); + let slice_range = if self.reversed { + let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start; + let slice_end = self.offset - chunk_start; + slice_start..slice_end + } else { + let slice_start = self.offset - chunk_start; + let slice_end = cmp::min(self.chunks.end(&()), self.range.end) - chunk_start; + slice_start..slice_end + }; + + Some(&chunk.0[slice_range]) } pub fn lines(self) -> Lines<'a> { @@ -633,15 +746,16 @@ impl<'a> Iterator for Chunks<'a> { type Item = &'a str; fn next(&mut self) -> Option { - let result = self.peek(); - if result.is_some() { - if self.reversed { - self.chunks.prev(&()); - } else { - self.chunks.next(&()); - } + let chunk = self.peek()?; + if self.reversed { + self.chunks.prev(&()); + self.offset -= chunk.len(); + } else { + self.chunks.next(&()); + self.offset += chunk.len(); } - result + + Some(chunk) } } @@ -1299,6 +1413,13 @@ mod tests { use util::RandomCharIter; use Bias::{Left, Right}; + #[ctor::ctor] + fn init_logger() { + if std::env::var("RUST_LOG").is_ok() { + env_logger::init(); + } + } + #[test] fn test_all_4_byte_chars() { let mut rope = Rope::new(); @@ -1355,6 +1476,50 @@ mod tests { ); } + #[test] + fn test_prev_next_line() { + let rope = Rope::from("abc\ndef\nghi\njkl"); + + let mut chunks = rope.chunks(); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a'); + + assert!(chunks.next_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd'); + + assert!(chunks.next_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g'); + + assert!(chunks.next_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j'); + + assert!(!chunks.next_line()); + assert_eq!(chunks.peek(), None); + + assert!(chunks.prev_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j'); + + assert!(chunks.prev_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g'); + + assert!(chunks.prev_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd'); + + assert!(chunks.prev_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a'); + + assert!(!chunks.prev_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a'); + + // Only return true when the cursor has moved to the start of a line + let mut chunks = rope.chunks_in_range(5..7); + chunks.seek(6); + assert!(!chunks.prev_line()); + assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'e'); + + assert!(!chunks.next_line()); + assert_eq!(chunks.peek(), None); + } + #[test] fn test_lines() { let rope = Rope::from("abc\ndefg\nhi"); @@ -1438,6 +1603,143 @@ mod tests { .collect::(), &expected[start_ix..end_ix] ); + + let mut expected_line_starts: Vec<_> = expected[start_ix..end_ix] + .match_indices('\n') + .map(|(index, _)| start_ix + index + 1) + .collect(); + + let mut chunks = actual.chunks_in_range(start_ix..end_ix); + + let mut actual_line_starts = Vec::new(); + while chunks.next_line() { + actual_line_starts.push(chunks.offset()); + } + assert_eq!( + actual_line_starts, + expected_line_starts, + "actual line starts != expected line starts when using next_line() for {:?} ({:?})", + &expected[start_ix..end_ix], + start_ix..end_ix + ); + + if start_ix < end_ix + && (start_ix == 0 || expected.as_bytes()[start_ix - 1] == b'\n') + { + expected_line_starts.insert(0, start_ix); + } + // Remove the last index if it starts at the end of the range. + if expected_line_starts.last() == Some(&end_ix) { + expected_line_starts.pop(); + } + + let mut actual_line_starts = Vec::new(); + while chunks.prev_line() { + actual_line_starts.push(chunks.offset()); + } + actual_line_starts.reverse(); + assert_eq!( + actual_line_starts, + expected_line_starts, + "actual line starts != expected line starts when using prev_line() for {:?} ({:?})", + &expected[start_ix..end_ix], + start_ix..end_ix + ); + + // Check that next_line/prev_line work correctly from random positions + let mut random_offset = rng.gen_range(start_ix..=end_ix); + while !expected.is_char_boundary(random_offset) { + random_offset -= 1; + } + chunks.seek(random_offset); + if rng.gen() { + let expected_next_line_start = expected[random_offset..end_ix] + .find('\n') + .map(|newline_ix| random_offset + newline_ix + 1); + + let moved = chunks.next_line(); + assert_eq!( + moved, + expected_next_line_start.is_some(), + "unexpected result from next_line after seeking to {} in range {:?} ({:?})", + random_offset, + start_ix..end_ix, + &expected[start_ix..end_ix] + ); + if let Some(expected_next_line_start) = expected_next_line_start { + assert_eq!( + chunks.offset(), + expected_next_line_start, + "invalid position after seeking to {} in range {:?} ({:?})", + random_offset, + start_ix..end_ix, + &expected[start_ix..end_ix] + ); + } else { + assert_eq!( + chunks.offset(), + end_ix, + "invalid position after seeking to {} in range {:?} ({:?})", + random_offset, + start_ix..end_ix, + &expected[start_ix..end_ix] + ); + } + } else { + let search_end = + if random_offset > 0 && expected.as_bytes()[random_offset - 1] == b'\n' { + random_offset - 1 + } else { + random_offset + }; + + let expected_prev_line_start = expected[..search_end] + .rfind('\n') + .and_then(|newline_ix| { + let line_start_ix = newline_ix + 1; + if line_start_ix >= start_ix { + Some(line_start_ix) + } else { + None + } + }) + .or_else(|| { + if random_offset > 0 && start_ix == 0 { + Some(0) + } else { + None + } + }); + + let moved = chunks.prev_line(); + assert_eq!( + moved, + expected_prev_line_start.is_some(), + "unexpected result from prev_line after seeking to {} in range {:?} ({:?})", + random_offset, + start_ix..end_ix, + &expected[start_ix..end_ix] + ); + if let Some(expected_prev_line_start) = expected_prev_line_start { + assert_eq!( + chunks.offset(), + expected_prev_line_start, + "invalid position after seeking to {} in range {:?} ({:?})", + random_offset, + start_ix..end_ix, + &expected[start_ix..end_ix] + ); + } else { + assert_eq!( + chunks.offset(), + start_ix, + "invalid position after seeking to {} in range {:?} ({:?})", + random_offset, + start_ix..end_ix, + &expected[start_ix..end_ix] + ); + } + } } let mut offset_utf16 = OffsetUtf16(0); diff --git a/crates/sum_tree/src/cursor.rs b/crates/sum_tree/src/cursor.rs index 1e8519114d..452930f942 100644 --- a/crates/sum_tree/src/cursor.rs +++ b/crates/sum_tree/src/cursor.rs @@ -178,11 +178,11 @@ where #[track_caller] pub fn prev(&mut self, cx: &::Context) { - self.prev_internal(|_| true, cx) + self.search_backward(|_| true, cx) } #[track_caller] - fn prev_internal(&mut self, mut filter_node: F, cx: &::Context) + pub fn search_backward(&mut self, mut filter_node: F, cx: &::Context) where F: FnMut(&T::Summary) -> bool, { @@ -249,11 +249,11 @@ where #[track_caller] pub fn next(&mut self, cx: &::Context) { - self.next_internal(|_| true, cx) + self.search_forward(|_| true, cx) } #[track_caller] - fn next_internal(&mut self, mut filter_node: F, cx: &::Context) + pub fn search_forward(&mut self, mut filter_node: F, cx: &::Context) where F: FnMut(&T::Summary) -> bool, { @@ -658,11 +658,11 @@ where } pub fn next(&mut self, cx: &::Context) { - self.cursor.next_internal(&mut self.filter_node, cx); + self.cursor.search_forward(&mut self.filter_node, cx); } pub fn prev(&mut self, cx: &::Context) { - self.cursor.prev_internal(&mut self.filter_node, cx); + self.cursor.search_backward(&mut self.filter_node, cx); } } @@ -681,7 +681,7 @@ where } if let Some(item) = self.item() { - self.cursor.next_internal(&mut self.filter_node, &()); + self.cursor.search_forward(&mut self.filter_node, &()); Some(item) } else { None diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 945483a848..a70d1b7692 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -542,6 +542,35 @@ pub struct LineIndent { } impl LineIndent { + pub fn from_chunks(chunks: &mut Chunks) -> Self { + let mut tabs = 0; + let mut spaces = 0; + let mut line_blank = true; + + 'outer: while let Some(chunk) = chunks.peek() { + for ch in chunk.chars() { + if ch == '\t' { + tabs += 1; + } else if ch == ' ' { + spaces += 1; + } else { + if ch != '\n' { + line_blank = false; + } + break 'outer; + } + } + + chunks.next(); + } + + Self { + tabs, + spaces, + line_blank, + } + } + /// Constructs a new `LineIndent` which only contains spaces. pub fn spaces(spaces: u32) -> Self { Self { @@ -1983,39 +2012,64 @@ impl BufferSnapshot { row_range: Range, ) -> impl Iterator + '_ { let start = Point::new(row_range.start, 0).to_offset(self); - let end = Point::new(row_range.end, 0).to_offset(self); + let end = Point::new(row_range.end - 1, self.line_len(row_range.end - 1)).to_offset(self); - let mut lines = self.as_rope().chunks_in_range(start..end).lines(); + let mut chunks = self.as_rope().chunks_in_range(start..end); let mut row = row_range.start; + let mut done = start == end; std::iter::from_fn(move || { - if let Some(line) = lines.next() { - let indent = LineIndent::from(line); - row += 1; - Some((row - 1, indent)) - } else { + if done { None + } else { + let indent = (row, LineIndent::from_chunks(&mut chunks)); + done = !chunks.next_line(); + row += 1; + Some(indent) } }) } + /// Returns the line indents in the given row range, exclusive of end row, in reversed order. pub fn reversed_line_indents_in_row_range( &self, row_range: Range, ) -> impl Iterator + '_ { let start = Point::new(row_range.start, 0).to_offset(self); - let end = Point::new(row_range.end, 0) - .to_offset(self) - .saturating_sub(1); - let mut lines = self.as_rope().reversed_chunks_in_range(start..end).lines(); - let mut row = row_range.end; + let end_point; + let end; + if row_range.end > row_range.start { + end_point = Point::new(row_range.end - 1, self.line_len(row_range.end - 1)); + end = end_point.to_offset(self); + } else { + end_point = Point::new(row_range.start, 0); + end = start; + }; + + let mut chunks = self.as_rope().chunks_in_range(start..end); + // Move the cursor to the start of the last line if it's not empty. + chunks.seek(end); + if end_point.column > 0 { + chunks.prev_line(); + } + + let mut row = end_point.row; + let mut done = start == end; std::iter::from_fn(move || { - if let Some(line) = lines.next() { - let indent = LineIndent::from(line); - row = row.saturating_sub(1); - Some((row, indent)) - } else { + if done { None + } else { + let initial_offset = chunks.offset(); + let indent = (row, LineIndent::from_chunks(&mut chunks)); + if chunks.offset() > initial_offset { + chunks.prev_line(); + } + done = !chunks.prev_line(); + if !done { + row -= 1; + } + + Some(indent) } }) }