1
1
mirror of https://github.com/wez/wezterm.git synced 2024-12-22 21:01:36 +03:00

rangeset: fix accidentally quadratic complexity

When adding sparse ranges the cartesian product of range combinations
was explored to find intersections, which is pretty awful if there
are 1 million entries to be inserted.

This commit employs binary search to reduce the complexity, at
the expense of requiring that the internal range array is sorted.
This commit is contained in:
Wez Furlong 2022-07-24 12:48:02 -07:00
parent 565b03b1c5
commit 14f0162688
4 changed files with 105 additions and 13 deletions

1
Cargo.lock generated
View File

@ -3602,6 +3602,7 @@ dependencies = [
name = "rangeset"
version = "0.1.0"
dependencies = [
"criterion",
"num",
]

View File

@ -6,3 +6,11 @@ edition = "2018"
[dependencies]
num = "0.3"
[dev-dependencies]
criterion = "0.3"
[[bench]]
name = "rangeset"
harness = false

View File

@ -0,0 +1,43 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rangeset::RangeSet;
fn build_contig_rangeset(size: usize) -> RangeSet<usize> {
let mut set = RangeSet::new();
for i in 0..size {
set.add(i);
}
set
}
fn build_sparse_rangeset(size: usize) -> RangeSet<usize> {
let mut set = RangeSet::new();
for i in (0..size).step_by(2) {
set.add(i);
}
set
}
pub fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("Contig 100", |b| {
b.iter(|| black_box(build_contig_rangeset(100)))
});
c.bench_function("Contig 10000", |b| {
b.iter(|| black_box(build_contig_rangeset(10000)))
});
c.bench_function("Contig 1000000", |b| {
b.iter(|| black_box(build_contig_rangeset(1000000)))
});
c.bench_function("Sparse 100", |b| {
b.iter(|| black_box(build_sparse_rangeset(100)))
});
c.bench_function("Sparse 10000", |b| {
b.iter(|| black_box(build_sparse_rangeset(10000)))
});
c.bench_function("Sparse 1000000", |b| {
b.iter(|| black_box(build_sparse_rangeset(1000000)))
});
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@ -1,5 +1,5 @@
use num::{Integer, ToPrimitive};
use std::cmp::{max, min};
use std::cmp::{max, min, Ordering};
use std::fmt::Debug;
use std::ops::Range;
@ -9,6 +9,7 @@ use std::ops::Range;
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct RangeSet<T: Integer + Copy> {
ranges: Vec<Range<T>>,
needs_sort: bool,
}
pub fn range_is_empty<T: Integer>(range: &Range<T>) -> bool {
@ -92,7 +93,10 @@ impl<T: Integer + Copy + Debug + ToPrimitive> From<RangeSet<T>> for Vec<Range<T>
impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
/// Create a new set
pub fn new() -> Self {
Self { ranges: vec![] }
Self {
ranges: vec![],
needs_sort: false,
}
}
/// Returns true if this set is empty
@ -220,6 +224,8 @@ impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
return;
}
self.sort_if_needed();
match self.intersection_helper(&range) {
(Some(a), Some(b)) if b == a + 1 => {
// This range intersects with two or more adjacent ranges and will
@ -243,6 +249,7 @@ impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
pub fn add_range_unchecked(&mut self, range: Range<T>) {
self.ranges.push(range);
self.needs_sort = true;
}
/// Add a set of ranges to this set
@ -258,28 +265,61 @@ impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
}
fn intersection_helper(&self, range: &Range<T>) -> (Option<usize>, Option<usize>) {
let mut first = None;
if self.needs_sort {
panic!("rangeset needs sorting");
}
for (idx, r) in self.ranges.iter().enumerate() {
let idx = match self.binary_search_ranges(range) {
Ok(idx) => idx,
Err(idx) => idx.saturating_sub(1),
};
let mut first = None;
if let Some(r) = self.ranges.get(idx) {
if intersects_range(r, range) || r.end == range.start {
if first.is_some() {
return (first, Some(idx));
}
first = Some(idx);
}
}
if let Some(r) = self.ranges.get(idx + 1) {
if intersects_range(r, range) || r.end == range.start {
if first.is_some() {
return (first, Some(idx + 1));
}
}
}
(first, None)
}
fn insertion_point(&self, range: &Range<T>) -> usize {
for (idx, r) in self.ranges.iter().enumerate() {
if range.end < r.start {
return idx;
pub fn sort_if_needed(&mut self) {
if self.needs_sort {
self.ranges.sort_by_key(|r| r.start);
self.needs_sort = false;
}
}
fn binary_search_ranges(&self, range: &Range<T>) -> Result<usize, usize> {
self.ranges.binary_search_by(|r| {
if range.start >= r.start && range.end <= r.end {
Ordering::Equal
} else if range.start < r.start {
Ordering::Greater
} else if range.end > r.end {
Ordering::Less
} else {
unreachable!()
}
})
}
fn insertion_point(&self, range: &Range<T>) -> usize {
if self.needs_sort {
panic!("rangeset needs sorting");
}
self.ranges.len()
match self.binary_search_ranges(range) {
Ok(idx) => idx,
Err(idx) => idx,
}
}
/// Returns an iterator over the ranges that comprise the set