mirror of
https://github.com/wez/wezterm.git
synced 2024-12-22 21:01:36 +03:00
rangeset: fix accidentally quadratic complexity
When adding sparse ranges the cartesian product of range combinations was explored to find intersections, which is pretty awful if there are 1 million entries to be inserted. This commit employs binary search to reduce the complexity, at the expense of requiring that the internal range array is sorted.
This commit is contained in:
parent
565b03b1c5
commit
14f0162688
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3602,6 +3602,7 @@ dependencies = [
|
||||
name = "rangeset"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"num",
|
||||
]
|
||||
|
||||
|
@ -6,3 +6,11 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
num = "0.3"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
|
||||
[[bench]]
|
||||
name = "rangeset"
|
||||
harness = false
|
||||
|
||||
|
43
rangeset/benches/rangeset.rs
Normal file
43
rangeset/benches/rangeset.rs
Normal file
@ -0,0 +1,43 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use rangeset::RangeSet;
|
||||
|
||||
fn build_contig_rangeset(size: usize) -> RangeSet<usize> {
|
||||
let mut set = RangeSet::new();
|
||||
for i in 0..size {
|
||||
set.add(i);
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn build_sparse_rangeset(size: usize) -> RangeSet<usize> {
|
||||
let mut set = RangeSet::new();
|
||||
for i in (0..size).step_by(2) {
|
||||
set.add(i);
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
pub fn criterion_benchmark(c: &mut Criterion) {
|
||||
c.bench_function("Contig 100", |b| {
|
||||
b.iter(|| black_box(build_contig_rangeset(100)))
|
||||
});
|
||||
c.bench_function("Contig 10000", |b| {
|
||||
b.iter(|| black_box(build_contig_rangeset(10000)))
|
||||
});
|
||||
c.bench_function("Contig 1000000", |b| {
|
||||
b.iter(|| black_box(build_contig_rangeset(1000000)))
|
||||
});
|
||||
|
||||
c.bench_function("Sparse 100", |b| {
|
||||
b.iter(|| black_box(build_sparse_rangeset(100)))
|
||||
});
|
||||
c.bench_function("Sparse 10000", |b| {
|
||||
b.iter(|| black_box(build_sparse_rangeset(10000)))
|
||||
});
|
||||
c.bench_function("Sparse 1000000", |b| {
|
||||
b.iter(|| black_box(build_sparse_rangeset(1000000)))
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, criterion_benchmark);
|
||||
criterion_main!(benches);
|
@ -1,5 +1,5 @@
|
||||
use num::{Integer, ToPrimitive};
|
||||
use std::cmp::{max, min};
|
||||
use std::cmp::{max, min, Ordering};
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Range;
|
||||
|
||||
@ -9,6 +9,7 @@ use std::ops::Range;
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq)]
|
||||
pub struct RangeSet<T: Integer + Copy> {
|
||||
ranges: Vec<Range<T>>,
|
||||
needs_sort: bool,
|
||||
}
|
||||
|
||||
pub fn range_is_empty<T: Integer>(range: &Range<T>) -> bool {
|
||||
@ -92,7 +93,10 @@ impl<T: Integer + Copy + Debug + ToPrimitive> From<RangeSet<T>> for Vec<Range<T>
|
||||
impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
|
||||
/// Create a new set
|
||||
pub fn new() -> Self {
|
||||
Self { ranges: vec![] }
|
||||
Self {
|
||||
ranges: vec![],
|
||||
needs_sort: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this set is empty
|
||||
@ -220,6 +224,8 @@ impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
|
||||
return;
|
||||
}
|
||||
|
||||
self.sort_if_needed();
|
||||
|
||||
match self.intersection_helper(&range) {
|
||||
(Some(a), Some(b)) if b == a + 1 => {
|
||||
// This range intersects with two or more adjacent ranges and will
|
||||
@ -243,6 +249,7 @@ impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
|
||||
|
||||
pub fn add_range_unchecked(&mut self, range: Range<T>) {
|
||||
self.ranges.push(range);
|
||||
self.needs_sort = true;
|
||||
}
|
||||
|
||||
/// Add a set of ranges to this set
|
||||
@ -258,28 +265,61 @@ impl<T: Integer + Copy + Debug + ToPrimitive> RangeSet<T> {
|
||||
}
|
||||
|
||||
fn intersection_helper(&self, range: &Range<T>) -> (Option<usize>, Option<usize>) {
|
||||
let mut first = None;
|
||||
if self.needs_sort {
|
||||
panic!("rangeset needs sorting");
|
||||
}
|
||||
|
||||
for (idx, r) in self.ranges.iter().enumerate() {
|
||||
let idx = match self.binary_search_ranges(range) {
|
||||
Ok(idx) => idx,
|
||||
Err(idx) => idx.saturating_sub(1),
|
||||
};
|
||||
|
||||
let mut first = None;
|
||||
if let Some(r) = self.ranges.get(idx) {
|
||||
if intersects_range(r, range) || r.end == range.start {
|
||||
if first.is_some() {
|
||||
return (first, Some(idx));
|
||||
}
|
||||
first = Some(idx);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(r) = self.ranges.get(idx + 1) {
|
||||
if intersects_range(r, range) || r.end == range.start {
|
||||
if first.is_some() {
|
||||
return (first, Some(idx + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
(first, None)
|
||||
}
|
||||
|
||||
fn insertion_point(&self, range: &Range<T>) -> usize {
|
||||
for (idx, r) in self.ranges.iter().enumerate() {
|
||||
if range.end < r.start {
|
||||
return idx;
|
||||
pub fn sort_if_needed(&mut self) {
|
||||
if self.needs_sort {
|
||||
self.ranges.sort_by_key(|r| r.start);
|
||||
self.needs_sort = false;
|
||||
}
|
||||
}
|
||||
|
||||
fn binary_search_ranges(&self, range: &Range<T>) -> Result<usize, usize> {
|
||||
self.ranges.binary_search_by(|r| {
|
||||
if range.start >= r.start && range.end <= r.end {
|
||||
Ordering::Equal
|
||||
} else if range.start < r.start {
|
||||
Ordering::Greater
|
||||
} else if range.end > r.end {
|
||||
Ordering::Less
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn insertion_point(&self, range: &Range<T>) -> usize {
|
||||
if self.needs_sort {
|
||||
panic!("rangeset needs sorting");
|
||||
}
|
||||
|
||||
self.ranges.len()
|
||||
match self.binary_search_ranges(range) {
|
||||
Ok(idx) => idx,
|
||||
Err(idx) => idx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over the ranges that comprise the set
|
||||
|
Loading…
Reference in New Issue
Block a user