Widgets: improve scalability in some cases (#3920)

Fix issues noted here: https://github.com/enso-org/enso/pull/3678#issuecomment-1273623924 - Time complexity of an operation during line-redrawing scaled quadratically with number of lines in a change; now linear. - Time complexity of adding `n` selections to a group was `O(n^2)`. Now it is `O(n log n)`, even if the selections are added one by one. Also fix a subtle bug I found in `Group::newest_mut`: It returned a mutable reference that allowed breaking the *sorted* invariant of the selection group. The new implementation moves the element to invalidated space before returning a reference (internally to `LazyInvariantVec`), so that if it is mutated it will be moved to its correct location. ### Important Notes New APIs: - `NonEmptyVec::extend_at` supports inserting a sequence of elements at a location, with asymptotically-better performance than a series of `insert`s. (This is a subset of the functionality of `Vec::splice`, a function which we can't safely offer for `NonEmptyVec`). - `LazyInvariantVec` supports lazily-restoring an invariant on a vector. For an invariant such as *sorted* (or in this case, *sorted and merged*), this allows asymptotically-better performance than maintaining the invariant with each mutation.
2025-01-09 03:57:54 +03:00 · 2022-11-30 04:36:28 -08:00 · 2022-11-30 04:36:28 -08:00 · b58470145c
commit b58470145c
parent 1285dbd809
8 changed files with 337 additions and 50 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2956,6 +2956,8 @@ dependencies = [
 "ensogl-text-msdf",
 "ordered-float",
 "owned_ttf_parser",
+ "rand 0.8.5",
+ "rand_chacha 0.3.1",
 "rustybuzz",
 "serde",
 "wasm-bindgen-test",
--- a/lib/rust/ensogl/component/text/Cargo.toml
+++ b/lib/rust/ensogl/component/text/Cargo.toml
@ -26,4 +26,6 @@ ensogl-text-font-family = { path = "src/font/family" }
 rustybuzz = "0.5.1"

 [dev-dependencies]
+rand = { version = "0.8.5", default-features = false }
+rand_chacha = "0.3.1"
 wasm-bindgen-test = { version = "0.3.8" }
--- a/lib/rust/ensogl/component/text/src/buffer/selection.rs
+++ b/lib/rust/ensogl/component/text/src/buffer/selection.rs
@ -264,23 +264,24 @@ impl<T: Boundary> Selection<T> {

 /// A set of zero or more selections.
 ///
-/// The selections are kept in sorted order to maintain a good performance in algorithms. It is used
-/// in many places, including selection merging process.
+/// Some algorithms, such as selection merging, require the selections to be in sorted order. This
+/// invariant is restored as needed, to support asymptotically-efficient addition of selections to
+/// the collection.
 #[derive(Clone, Debug, Default)]
 pub struct Group {
-    sorted_selections: Vec<Selection>,
+    sorted_selections: LazyInvariantVec<Selection, SortAndMerge>,
 }

 impl Deref for Group {
    type Target = [Selection];
    fn deref(&self) -> &[Selection] {
-        &self.sorted_selections
+        self.sorted_selections.as_slice()
    }
 }

 impl DerefMut for Group {
    fn deref_mut(&mut self) -> &mut [Selection] {
-        &mut self.sorted_selections
+        self.sorted_selections.as_mut_slice()
    }
 }

@ -304,22 +305,24 @@ impl Group {

    /// Reference to the newest created selection if any.
    pub fn newest(&self) -> Option<&Selection> {
-        self.sorted_selections.iter().max_by(|x, y| x.id.cmp(&y.id))
+        self.sorted_selections.iter().max_by_key(|x| x.id)
    }

    /// Reference to the oldest created selection if any.
    pub fn oldest(&self) -> Option<&Selection> {
-        self.sorted_selections.iter().min_by(|x, y| x.id.cmp(&y.id))
+        self.sorted_selections.iter().min_by_key(|x| x.id)
    }

    /// Mutable reference to the newest created selection if any.
    pub fn newest_mut(&mut self) -> Option<&mut Selection> {
-        self.sorted_selections.iter_mut().max_by(|x, y| x.id.cmp(&y.id))
+        let i = self.sorted_selections.iter().enumerate().max_by_key(|(_, x)| x.id).map(|(i, _)| i);
+        i.and_then(|i| self.sorted_selections.get_mut(i))
    }

    /// Mutable reference to the oldest created selection if any.
    pub fn oldest_mut(&mut self) -> Option<&mut Selection> {
-        self.sorted_selections.iter_mut().min_by(|x, y| x.id.cmp(&y.id))
+        let i = self.sorted_selections.iter().enumerate().min_by_key(|(_, x)| x.id).map(|(i, _)| i);
+        i.and_then(|i| self.sorted_selections.get_mut(i))
    }

    /// Merge new selection with the group. This method implements merging logic.
@ -328,39 +331,12 @@ impl Group {
    /// not cause a merge. A cursor merges with a non-cursor if it is in the interior or on either
    /// edge. Two cursors merge if they are the same offset.
    ///
-    /// Performance note: should be O(1) if the new region strictly comes after all the others in
-    /// the selection, otherwise O(n).
+    /// Performance:
+    /// This operation is O(1), but reading from the vector when `d` new selections have been added
+    /// since the last read requires re-establishing the invariant, which involves a `O(d log d)`
+    /// operation.
    pub fn merge(&mut self, region: Selection) {
-        let mut ix = self.selection_index_on_the_left_to(region.min());
-        if ix == self.sorted_selections.len() {
-            self.sorted_selections.push(region);
-        } else {
-            let mut region = region;
-            let mut end_ix = ix;
-            if self.sorted_selections[ix].min() <= region.min() {
-                if self.sorted_selections[ix].should_merge_sorted(region) {
-                    region = region.merge_with(self.sorted_selections[ix]);
-                } else {
-                    ix += 1;
-                }
-                end_ix += 1;
-            }
-
-            let max_ix = self.sorted_selections.len();
-            while end_ix < max_ix && region.should_merge_sorted(self.sorted_selections[end_ix]) {
-                region = region.merge_with(self.sorted_selections[end_ix]);
-                end_ix += 1;
-            }
-
-            if ix == end_ix {
-                self.sorted_selections.insert(ix, region);
-            } else {
-                let start = ix + 1;
-                let len = end_ix - ix - 1;
-                self.sorted_selections[ix] = region;
-                self.sorted_selections.drain(start..start + len);
-            }
-        }
+        self.sorted_selections.push(region);
    }

    /// The smallest index so that offset > region.max() for all preceding regions. Note that the
@ -380,7 +356,7 @@ impl Group {

 impl From<Selection> for Group {
    fn from(t: Selection) -> Self {
-        let sorted_selections = vec![t];
+        let sorted_selections = vec![t].into();
        Self { sorted_selections }
    }
 }
@ -419,3 +395,116 @@ impl FromIterator<Selection> for Group {
        group
    }
 }
+
+
+// === Merging ===
+
+#[derive(Copy, Clone, Debug, Default)]
+struct SortAndMerge;
+
+impl<T: Boundary> lazy_invariant_vec::RestoreInvariant<Selection<T>> for SortAndMerge {
+    fn restore_invariant(&mut self, clean: usize, elements: &'_ mut Vec<Selection<T>>) {
+        sort_and_merge(clean, elements)
+    }
+}
+
+/// Given a collection `elements`, the first `clean` of which are sorted, update it so that it is
+/// fully-sorted, and overlapping elements have been merged.
+///
+/// Time complexity: `O(n + m log m)`, where `n` is the number of old elements, and `m` is the
+/// number of newly-added elements.
+fn sort_and_merge<T: Boundary>(clean: usize, elements: &mut Vec<Selection<T>>) {
+    let new = Vec::with_capacity(elements.len());
+    let mut old = mem::replace(elements, new);
+    // Sort the newly-added elements using a standard, fast sorting implementation. Some may
+    // overlap; we'll merge them below.
+    old[clean..].sort_unstable_by_key(|x| x.min());
+    let mut ys = old.split_off(clean).into_iter().peekable();
+    let mut xs = old.into_iter().peekable();
+    // Buffer the next element to be emitted; this is so we can merge selections, while merging the
+    // sorted lists of selections.
+    let mut a: Option<Selection<_>> = None;
+    loop {
+        // Advance `xs` or `ys` (whichever has a lesser next element), putting the result in `b`.
+        let b = match (xs.peek(), ys.peek()) {
+            (Some(x), Some(y)) =>
+                if x.min() <= y.min() {
+                    xs.next()
+                } else {
+                    ys.next()
+                },
+            _ => xs.next().or_else(|| ys.next()),
+        };
+        // Move data along this path: `b --> a --> elements`.
+        // While doing so, merge `(a,b) --> a` if appropriate.
+        match (a, b) {
+            (Some(a_), Some(next)) if a_.should_merge_sorted(next) => a = Some(a_.merge_with(next)),
+            (Some(a_), _) => {
+                elements.push(a_);
+                a = b;
+            }
+            (None, Some(b)) => a = Some(b),
+            (None, None) => break,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    // Check that some specific cases are completely correct, including the results of merging
+    // different selections.
+    #[test]
+    fn test_sort_and_merge_cases() {
+        let mut selections: LazyInvariantVec<Selection<_>, SortAndMerge> = default();
+        selections.push(Selection::new(1, 3, Id { value: 1 }));
+        selections.push(Selection::new(0, 2, Id { value: 0 }));
+        assert_eq!(selections.as_slice(), &[Selection::new(0, 3, Id { value: 0 })]);
+        selections.push(Selection::new(4, 4, Id { value: 2 }));
+        selections.push(Selection::new(0, 5, Id { value: 3 }));
+        assert_eq!(selections.as_slice(), &[Selection::new(0, 5, Id { value: 0 })]);
+        selections.push(Selection::new(7, 9, Id { value: 4 }));
+        selections.push(Selection::new(8, 10, Id { value: 5 }));
+        assert_eq!(selections.as_slice(), &[
+            Selection::new(0, 5, Id { value: 0 }),
+            Selection::new(7, 10, Id { value: 4 }),
+        ]);
+        selections.push(Selection::new(20, 20, Id { value: 20 }));
+        selections.push(Selection::new(19, 21, Id { value: 19 }));
+        assert_eq!(selections.as_slice(), &[
+            Selection::new(0, 5, Id { value: 0 }),
+            Selection::new(7, 10, Id { value: 4 }),
+            Selection::new(19, 21, Id { value: 19 }),
+        ]);
+        selections.push(Selection::new(-1, 100, Id { value: 100 }));
+        let _ = selections.as_slice();
+        assert_eq!(selections.as_slice(), &[Selection::new(-1, 100, Id { value: 100 })]);
+    }
+
+    // Check that the outputs obey the sorted-and-merged invariant for random inputs. This property
+    // doesn't guarantee that the results are correct, but it's a property we can easily verify for
+    // arbitrarily many inputs.
+    #[test]
+    fn test_sort_and_merge_property() {
+        use rand::Rng;
+        use rand::SeedableRng;
+        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
+        let mut selections: LazyInvariantVec<Selection<_>, SortAndMerge> = default();
+        for i in 1..=100 {
+            // Generate a batch of changes. Expand the range with each batch, so that new each new
+            // batch will have a chance of containing values greater or lower than all previous
+            // values.
+            for _ in 0..10 {
+                let start = rng.gen_range(-(i * 2)..i * 10);
+                let end = rng.gen_range(-(i * 2)..i * 10);
+                selections.push(Selection::new(start, end, Id { value: 0 }));
+            }
+            let selections_are_sorted = selections.iter().is_sorted_by_key(|x| x.min());
+            assert!(selections_are_sorted);
+            let no_unmerged_pairs =
+                !selections.array_windows::<2>().any(|&[a, b]| a.should_merge_sorted(b));
+            assert!(no_unmerged_pairs);
+        }
+    }
+}
--- a/lib/rust/ensogl/component/text/src/component/text.rs
+++ b/lib/rust/ensogl/component/text/src/component/text.rs
@ -1154,17 +1154,17 @@ impl TextModel {
                        if line_diff > LineDiff(0) {
                            // Add missing lines. They will be redrawn later. This is needed for
                            // proper partial redraw (redrawing only the lines that changed).
-                            let line_diff = line_diff.value as usize;
-                            for i in 0..line_diff {
-                                let index_to_insert = second_line_index + ViewLine(i);
+                            let new_lines = iter::from_fn(|| {
                                let new_line = self.new_line();
                                new_line.set_baseline(first_line_baseline);
                                new_line.skip_baseline_animation();
-                                if index_to_insert < ViewLine(lines.len()) {
-                                    lines.insert(index_to_insert, new_line);
-                                } else {
-                                    lines.push(new_line);
-                                }
+                                Some(new_line)
+                            });
+                            let new_lines = new_lines.take(line_diff.value as usize);
+                            if second_line_index < ViewLine(lines.len()) {
+                                lines.extend_at(second_line_index, new_lines);
+                            } else {
+                                lines.extend(new_lines);
                            }
                        } else if line_diff < LineDiff(0) {
                            // Remove lines that are no longer needed. This is needed for proper
--- a/lib/rust/ensogl/component/text/src/lib.rs
+++ b/lib/rust/ensogl/component/text/src/lib.rs
@ -17,6 +17,8 @@
 #![feature(step_trait)]
 #![feature(specialization)]
 #![feature(once_cell)]
+#![feature(is_sorted)]
+#![feature(array_windows)]
 // === Standard Linter Configuration ===
 #![deny(non_ascii_idents)]
 #![warn(unsafe_code)]
--- a/lib/rust/prelude/src/data.rs
+++ b/lib/rust/prelude/src/data.rs
@ -6,12 +6,14 @@
 // ==============

 pub mod at_least_one_of_two;
+pub mod lazy_invariant_vec;
 pub mod monoid;
 pub mod non_empty_vec;
 pub mod semigroup;
 pub mod vec_indexed_by;

 pub use at_least_one_of_two::*;
+pub use lazy_invariant_vec::LazyInvariantVec;
 pub use monoid::*;
 pub use non_empty_vec::NonEmptyVec;
 pub use semigroup::*;
--- a/lib/rust/prelude/src/data/lazy_invariant_vec.rs
+++ b/lib/rust/prelude/src/data/lazy_invariant_vec.rs
@ -0,0 +1,166 @@
+// ========================
+// === LazyInvariantVec ===
+// ========================
+
+/// Contiguous, ordered collection of elements with an invariant, determined by a parameter. The
+/// invariant is always restored before the elements are observed, but new elements can be added
+/// without immediately restoring the invariant.
+///
+/// # Safety:
+///
+/// `elements`:
+/// The only `unsafe` mutation performed is to restore the invariant, when `elements` is not clean.
+/// At that time, we can be sure no borrows of `elements` exist, because:
+/// - We never give out a borrow of `elements` unless it is clean.
+/// - `elements` never goes from clean to dirty without `&mut` access to `self`.
+///
+/// `restore_invariant`: No borrow of this escapes its scope.
+#[derive(Default)]
+pub struct LazyInvariantVec<T, F> {
+    elements:          core::cell::UnsafeCell<Vec<T>>,
+    clean_up_to:       core::cell::Cell<usize>,
+    restore_invariant: core::cell::UnsafeCell<F>,
+}
+
+impl<T, F> LazyInvariantVec<T, F> {
+    pub fn push(&mut self, t: T) {
+        self.elements.get_mut().push(t);
+    }
+}
+
+impl<T, F> LazyInvariantVec<T, F>
+where F: RestoreInvariant<T>
+{
+    pub fn get_mut(&mut self, i: usize) -> Option<&mut T> {
+        self.clean();
+        let elements = self.elements.get_mut();
+        if i < elements.len() {
+            let t = elements.remove(i);
+            let i = elements.len();
+            self.clean_up_to = i.into();
+            elements.push(t);
+            elements.get_mut(i)
+        } else {
+            None
+        }
+    }
+
+    pub fn as_slice(&self) -> &[T] {
+        self.clean()
+    }
+
+    pub fn as_mut_slice(&mut self) -> &mut [T] {
+        self.clean();
+        self.elements.get_mut().as_mut_slice()
+    }
+
+    #[allow(unsafe_code)] // See docs for [`Self`].
+    fn clean(&self) -> &Vec<T> {
+        unsafe {
+            let elements = &mut *self.elements.get();
+            let clean_up_to = self.clean_up_to.get();
+            let restore_invariant = &mut *self.restore_invariant.get();
+            // Note: Although `restore_invariant` *should* be a no-op when the whole is already
+            // clean, we check that condition here so that memory safety doesn't depend on that
+            // property.
+            if clean_up_to != elements.len() {
+                restore_invariant.restore_invariant(clean_up_to, elements);
+                self.clean_up_to.set(elements.len());
+            }
+            elements
+        }
+    }
+}
+
+
+// === Trait implementations ===
+
+impl<T, F> core::fmt::Debug for LazyInvariantVec<T, F>
+where
+    T: core::fmt::Debug,
+    F: core::fmt::Debug,
+{
+    #[allow(unsafe_code)] // Short-lived borrows.
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        unsafe {
+            let elements = &*self.elements.get();
+            let restore_invariant = &*self.restore_invariant.get();
+            f.debug_struct("LazyInvariantVec")
+                .field("elements", elements)
+                .field("clean_up_to", &self.clean_up_to.get())
+                .field("restore_invariant", restore_invariant)
+                .finish()
+        }
+    }
+}
+
+impl<T, F> Clone for LazyInvariantVec<T, F>
+where
+    T: Clone,
+    F: Clone + RestoreInvariant<T>,
+{
+    #[allow(unsafe_code)] // See docs for [`Self`].
+    fn clone(&self) -> Self {
+        let elements: Vec<_> = self.clean().clone();
+        let elements = elements.into();
+        let clean_up_to = self.clean_up_to.get().into();
+        let restore_invariant = unsafe { (*self.restore_invariant.get()).clone().into() };
+        Self { elements, clean_up_to, restore_invariant }
+    }
+}
+
+impl<T, F> From<Vec<T>> for LazyInvariantVec<T, F>
+where F: Default
+{
+    fn from(elements: Vec<T>) -> Self {
+        let elements = elements.into();
+        let clean_up_to = Default::default();
+        let restore_invariant = Default::default();
+        Self { elements, clean_up_to, restore_invariant }
+    }
+}
+
+impl<T, F> FromIterator<T> for LazyInvariantVec<T, F>
+where F: Default
+{
+    fn from_iter<I>(elements: I) -> Self
+    where I: IntoIterator<Item = T> {
+        let elements: Vec<_> = elements.into_iter().collect();
+        elements.into()
+    }
+}
+
+impl<T, F> From<LazyInvariantVec<T, F>> for Vec<T>
+where F: RestoreInvariant<T>
+{
+    fn from(mut vec: LazyInvariantVec<T, F>) -> Self {
+        vec.clean();
+        core::mem::take(vec.elements.get_mut())
+    }
+}
+
+impl<T, F> IntoIterator for LazyInvariantVec<T, F>
+where F: RestoreInvariant<T>
+{
+    type Item = T;
+    type IntoIter = <Vec<T> as IntoIterator>::IntoIter;
+    fn into_iter(self) -> Self::IntoIter {
+        Vec::from(self).into_iter()
+    }
+}
+
+impl<T, F> core::ops::Deref for LazyInvariantVec<T, F>
+where F: RestoreInvariant<T>
+{
+    type Target = Vec<T>;
+    fn deref(&self) -> &Self::Target {
+        self.clean()
+    }
+}
+
+
+// === RestoreInvariant ===
+
+pub trait RestoreInvariant<T> {
+    fn restore_invariant(&mut self, clean: usize, elements: &'_ mut Vec<T>);
+}
--- a/lib/rust/prelude/src/data/non_empty_vec.rs
+++ b/lib/rust/prelude/src/data/non_empty_vec.rs
@ -327,6 +327,30 @@ where I: vec_indexed_by::Index
            self.elems.drain(range)
        }
    }
+
+    /// Insert the contents of an iterator at a specified index in the collection.
+    ///
+    /// This is optimal if:
+    /// - The specified index is equal to the length of the vector,
+    /// - or the lower bound of the iterator's `size_hint()` is exact.
+    ///
+    /// Otherwise, a temporary vector is allocated and the tail is moved twice.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the given index is greater than the length of the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use enso_prelude::NonEmptyVec;
+    /// let mut vec = NonEmptyVec::new(0, vec![1, 4, 5]);
+    /// vec.extend_at(2, vec![2, 3]);
+    /// assert_eq!(&vec[..], &[0, 1, 2, 3, 4, 5])
+    /// ```
+    pub fn extend_at(&mut self, index: I, elems: impl IntoIterator<Item = T>) {
+        self.splice(index..index, elems);
+    }
 }