Merge pull request #3326 from gitbutlerapp/exclusive-linespan-refactor

make LineSpan end-exclusive
This commit is contained in:
Josh Junon 2024-03-26 00:18:57 +01:00 committed by GitHub
commit c3c478b85e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,3 @@
#![allow(clippy::module_name_repetitions)]
/// A line-based span of text.
///
/// All line spans are at least one line long.
@ -7,20 +5,23 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct LineSpan {
start: usize,
end: usize,
end: usize, // Exclusive
}
impl LineSpan {
/// Creates a new line span from the given start and end lines.
/// Note that line numbers are zero-based, and the ending
/// line number is inclusive.
/// line number is exclusive.
///
/// # Panics
///
/// Panics if the start line is greater than the end line.
/// Panics if the start line is greater than or equal to the end line.
#[must_use]
pub fn new(start: usize, end: usize) -> Self {
assert!(start <= end, "start line cannot be greater than end line");
assert!(
start <= end,
"start line must be less than or equal to the end line"
);
Self { start, end }
}
@ -31,7 +32,7 @@ impl LineSpan {
self.start
}
/// The ending line of the span. Zero-based, inclusive.
/// The ending line of the span. Zero-based, exclusive.
#[inline]
#[must_use]
pub fn end(&self) -> usize {
@ -39,28 +40,36 @@ impl LineSpan {
}
/// Gets the line count from the span
#[inline]
#[must_use]
pub fn line_count(&self) -> usize {
debug_assert!(self.end >= self.start);
self.end - self.start + 1
self.end - self.start
}
/// Checks if the span is empty.
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.start == self.end
}
/// Returns true if the given span intersects with this span.
#[inline]
#[must_use]
pub fn intersects(&self, other: &Self) -> bool {
debug_assert!(self.end >= self.start);
debug_assert!(other.end >= other.start);
// If the other span starts after this span ends, they don't intersect.
// If the other span ends before this span starts, they don't intersect.
// Otherwise, they intersect.
other.start <= self.end && other.end >= self.start
other.start < self.end && other.end > self.start
}
/// Extracts the lines from the span from the given text.
/// The final line ending (if any) is not included.
/// The final line ending (if any) is included.
///
/// Also returns the character offsets (inclusive).
/// Also returns the character offsets (exclusive).
///
/// If the span is empty (i.e. start == end), or if the start
/// line starts after the last line, this will return `None`.
///
/// If the end line is after the last line, it will be clamped
/// to the last line of the input text.
///
/// # Panics
/// Panics if the span's start > end.
@ -68,46 +77,32 @@ impl LineSpan {
pub fn extract<'a>(&self, text: &'a str) -> Option<(&'a str, usize, usize)> {
debug_assert!(self.end >= self.start);
let mut start_offset = None;
if text.is_empty() || self.start == self.end {
return None;
}
let mut start_offset = if self.start == 0 { Some(0) } else { None };
let mut current_line = 0;
let mut end_offset = None;
for (i, c) in text.char_indices() {
if start_offset.is_none() && current_line == self.start {
start_offset = Some(i);
}
for (i, _) in text.char_indices().filter(|(_, c)| *c == '\n') {
current_line += 1;
if current_line == self.end {
if current_line == self.start {
start_offset = Some(i + 1);
} else if current_line == self.end {
debug_assert!(start_offset.is_some());
let start_offset = start_offset.unwrap();
// Fast-forward to the end of the line and return
// The strange song and dance is so that the final
// line ending is not included, but we still gracefully
// handle EOFs.
let mut last_i = i;
for (i, c) in text[i..].char_indices() {
if c == '\n' {
break;
}
last_i = i;
}
let end_offset = i + last_i;
return Some((&text[start_offset..=end_offset], start_offset, end_offset));
}
if c == '\n' {
current_line += 1;
end_offset = Some(i + 1);
break;
}
}
// Assert the invariant that we didn't mess up above.
debug_assert!(current_line < self.end);
None
start_offset.map(|start_offset| {
let end_offset = end_offset
.map(|i| if i > text.len() { i - 1 } else { i })
.unwrap_or_else(|| text.len());
(&text[start_offset..end_offset], start_offset, end_offset)
})
}
}
@ -118,21 +113,17 @@ mod tests {
#[test]
fn span_new() {
for s in 0..20 {
for e in 0..20 {
if s > e {
assert!(std::panic::catch_unwind(|| LineSpan::new(s, e)).is_err());
} else {
let span = LineSpan::new(s, e);
assert_eq!(span.start(), s);
assert_eq!(span.end(), e);
}
for e in s + 1..=20 {
let span = LineSpan::new(s, e);
assert_eq!(span.start(), s);
assert_eq!(span.end(), e);
}
}
}
#[test]
fn span_extract() {
let lines = vec![
let lines = [
"Hello, world!",
"This is a test.",
"This is another test.\r",
@ -143,70 +134,72 @@ mod tests {
"This is a seventh test.\r",
"This is an eighth test.",
"This is a ninth test.\r",
"This is a tenth test.",
"This is a tenth test.", // note no newline at end
];
let full_text = lines.join("\n");
// calculate the known character offsets of each line
let lines = lines
.into_iter()
.scan(0, |state, line| {
let start = *state;
let end = start + line.len();
*state = end + 1;
Some((line, start, end - 1))
})
.collect::<Vec<_>>();
// Test single-line extraction
for (i, expected) in lines.iter().enumerate() {
let span = LineSpan::new(i, i);
let extracted = span.extract(&full_text).unwrap();
assert_eq!(extracted, *expected);
let mut offsets = vec![];
let mut start = 0;
for (i, line) in lines.iter().enumerate() {
// If it's not the last line, add 1 for the newline character.
let end = start + line.len() + (i != (lines.len() - 1)) as usize;
offsets.push((start, end));
start = end;
}
// Test line span cartesian
for (i, start_expected) in lines.iter().enumerate() {
for (j, end_expected) in lines.iter().enumerate() {
if i > j {
// Test single-line extraction
for i in 0..lines.len() - 1 {
let span = LineSpan::new(i, i + 1);
let expected = &full_text[offsets[i].0..offsets[i].1];
let (extracted, start_offset, end_offset) = span.extract(&full_text).unwrap();
assert_eq!(extracted, expected);
assert_eq!((start_offset, end_offset), (offsets[i].0, offsets[i].1));
}
// Test multi-line extraction
for i in 0..lines.len() {
for j in i..=lines.len() {
let span = LineSpan::new(i, j);
assert!(span.line_count() == (j - i));
if i == j {
assert!(span.is_empty());
continue;
}
let expected = (
&full_text[start_expected.1..=end_expected.2],
start_expected.1,
end_expected.2,
);
let expected_start = offsets[i].0;
let expected_end = offsets[j - 1].1;
let expected_text = &full_text[expected_start..expected_end];
let span = LineSpan::new(i, j);
let extracted = span.extract(&full_text).unwrap();
assert_eq!(extracted, expected);
let (extracted, start_offset, end_offset) = span.extract(&full_text).unwrap();
assert_eq!(extracted, expected_text);
assert_eq!((start_offset, end_offset), (expected_start, expected_end));
}
}
}
#[test]
fn span_intersects() {
let span = LineSpan::new(5, 10);
let span = LineSpan::new(5, 11); // Exclusive end
assert!(span.intersects(&LineSpan::new(10, 10)));
assert!(span.intersects(&LineSpan::new(0, 10)));
assert!(span.intersects(&LineSpan::new(10, 15)));
assert!(span.intersects(&LineSpan::new(4, 5)));
assert!(span.intersects(&LineSpan::new(5, 5)));
assert!(span.intersects(&LineSpan::new(0, 5)));
assert!(span.intersects(&LineSpan::new(0, 7)));
assert!(span.intersects(&LineSpan::new(0, 9)));
assert!(span.intersects(&LineSpan::new(9, 9)));
assert!(span.intersects(&LineSpan::new(7, 8)));
assert!(span.intersects(&LineSpan::new(5, 10)));
assert!(span.intersects(&LineSpan::new(10, 11))); // Intersect at start
assert!(span.intersects(&LineSpan::new(0, 11))); // Fully contained
assert!(span.intersects(&LineSpan::new(10, 15))); // Partial overlap
assert!(span.intersects(&LineSpan::new(4, 6))); // Intersect at end
assert!(span.intersects(&LineSpan::new(5, 6))); // Exact match start
assert!(span.intersects(&LineSpan::new(0, 6))); // Overlap at end
assert!(span.intersects(&LineSpan::new(0, 8))); // Overlap middle
assert!(span.intersects(&LineSpan::new(0, 10))); // Overlap up to end
assert!(span.intersects(&LineSpan::new(9, 10))); // Overlap at single point
assert!(span.intersects(&LineSpan::new(7, 9))); // Overlap inside
assert!(!span.intersects(&LineSpan::new(0, 0)));
assert!(!span.intersects(&LineSpan::new(0, 4)));
assert!(!span.intersects(&LineSpan::new(4, 4)));
assert!(!span.intersects(&LineSpan::new(11, 20)));
assert!(!span.intersects(&LineSpan::new(11, 11)));
// Test cases where there should be no intersection due to exclusive end
assert!(!span.intersects(&LineSpan::new(0, 5))); // Before start
assert!(!span.intersects(&LineSpan::new(11, 20))); // After end
assert!(!span.intersects(&LineSpan::new(11, 12))); // Just after end
}
}