From de8b085e6ef970c455ab7b02be3630fa9ffd9c13 Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Wed, 3 Jun 2020 13:20:11 -0700 Subject: [PATCH] revlogindex: port gca and range algorithms from bindag Summary: Mostly copy-paste from code added in D19503373 and D19511574. Adjusted to match the revlog index interface. Reviewed By: sfilipco Differential Revision: D21626201 fbshipit-source-id: 05d160e4c03d7e2482b6a4f2d68c3688ad78f568 --- .../bindings/modules/pyrevlogindex/src/lib.rs | 2 +- eden/scm/lib/revlogindex/src/revlogindex.rs | 188 +++++++++++++++++- 2 files changed, 186 insertions(+), 4 deletions(-) diff --git a/eden/scm/edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs b/eden/scm/edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs index 070f7ba720..558d315da1 100644 --- a/eden/scm/edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs +++ b/eden/scm/edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs @@ -56,7 +56,7 @@ py_class!(class revlogindex |py| { /// Get parent revisions. def parentrevs(&self, rev: u32) -> PyResult> { let revlog = self.index(py).borrow(); - Ok(revlog.parents(rev).as_revs().to_vec()) + Ok(revlog.parent_revs(rev).as_revs().to_vec()) } /// Insert a new revision that hasn't been written to disk. diff --git a/eden/scm/lib/revlogindex/src/revlogindex.rs b/eden/scm/lib/revlogindex/src/revlogindex.rs index a92453da41..5355adc681 100644 --- a/eden/scm/lib/revlogindex/src/revlogindex.rs +++ b/eden/scm/lib/revlogindex/src/revlogindex.rs @@ -19,6 +19,7 @@ use indexedlog::utils::{atomic_write, mmap_bytes}; use minibytes::Bytes; use parking_lot::RwLock; use std::collections::BTreeMap; +use std::collections::VecDeque; use std::fs; use std::io; use std::marker::PhantomData; @@ -60,7 +61,7 @@ impl RevlogIndex { if state == State::PotentialHead { result.push(rev as u32); } - for &parent_rev in self.parents(rev as u32).as_revs() { + for &parent_rev in self.parent_revs(rev as u32).as_revs() { if parent_rev >= min_rev { states[(parent_rev - min_rev) as usize] = State::NotHead; } @@ -115,13 +116,194 @@ impl RevlogIndex { // with the new "dag" abstraction. Phase::Unspecified => (), } - for &parent_rev in self.parents(rev as u32).as_revs() { + for &parent_rev in self.parent_revs(rev as u32).as_revs() { // Propagate phases from this rev to its parents. phases[parent_rev as usize] = phases[parent_rev as usize].max(phase); } } (public_set, draft_set) } + + /// GCA based on linear scan. + /// + /// Ported from Mercurial's C code `find_gca_candidates()`. + /// + /// The algorithm was written by Bryan O'Sullivan on 2013-04-16. + /// He provided both a Python [1] and a C version [2]. Since then, the only real + /// logic change is removing an unnecessary "if" branch by Mads Kiilerich on + /// 2014-02-24 [4]. + /// + /// The C implementation is quite competitive among linear algorithms on + /// performance. It is cache-efficient, has fast paths to exit early, and + /// takes up to 62 (if bitmask is u64) revs at once. Other implementations + /// might just take 2 revs at most. For example, the older implemenation + /// by Matt Mackall in 2006 [3] takes 2 revs explicitly. + /// + /// Changes in this Rust implementation: + /// - Change `bitmask` from `u64` to `u8` for smaller memory footage, at the + /// cost of losing support for more than 6 revs. + /// - Adopted to RevlogIndex. + /// - Run `gca` recursively on large input. + /// + /// [1]: https://www.mercurial-scm.org/repo/hg/rev/2f7186400a07 + /// [2]: https://www.mercurial-scm.org/repo/hg/rev/5bae936764bb + /// [3]: https://www.mercurial-scm.org/repo/hg/rev/b1db258e875c + /// [4]: https://www.mercurial-scm.org/repo/hg/rev/4add43865a9b + pub fn gca_revs(&self, revs: &[u32], limit: usize) -> Vec { + type BitMask = u8; + let revcount = revs.len(); + assert!(revcount < 7); + if revcount == 0 { + return Vec::new(); + } + + let allseen: BitMask = (1 << revcount) - 1; + let poison: BitMask = 1 << revcount; + let maxrev = revs.iter().max().cloned().unwrap(); + let mut interesting = revcount; + let mut gca = Vec::new(); + let mut seen: Vec = vec![0; maxrev as usize + 1]; + + for (i, &rev) in revs.iter().enumerate() { + seen[rev as usize] = 1 << i; + } + + for v in (0..=maxrev).rev() { + if interesting == 0 { + break; + } + let mut sv = seen[v as usize]; + if sv == 0 { + continue; + } + if sv < poison { + interesting -= 1; + if sv == allseen { + gca.push(v); + if gca.len() >= limit { + return gca; + } + sv |= poison; + if revs.iter().any(|&r| r == v) { + break; + } + } + } + for &p in self.parent_revs(v).as_revs() { + let sp = seen[p as usize]; + if sv < poison { + if sp == 0 { + seen[p as usize] = sv; + interesting += 1 + } else if sp != sv { + seen[p as usize] |= sv + } + } else { + if sp != 0 && sp < poison { + interesting -= 1 + } + seen[p as usize] = sv + } + } + } + + gca + } + + /// Range based on linear scan. + /// + /// Ported from Mercurial's C code `reachableroots2()`. + /// + /// The C implementation was added by Laurent Charignon on 2015-08-06 [1]. + /// It was based on the a Python implementation added by Bryan O'Sullivan on + /// 2012-06-01 [2], which is similar to an older implementation by Eric Hopper + /// on 2005-10-07 [3], but faster and shorter. + /// + /// The C code was then revised by others. The most significant change was + /// switching the "contains" check of "roots" and "reachable" from Python sets + /// to bits in the pure C "revstates" array for easier error handling and + /// better performance, by Yuya Nishihara on 2015-08-13 [4] [5]. + /// + /// Improvements in this Rust implementation: + /// - Use `VecDeque` for `tovisit` (roughly O(len(result)) -> O(len(heads))). + /// - Truncate `revstates` (O(len(changelog)) -> O(max_head - min_root)). + /// - Add `reachable.is_empty()` fast path that existed in the Python code. + /// - Support octopus merge. + /// + /// [1]: https://www.mercurial-scm.org/repo/hg/rev/ff89383a97db + /// [2]: https://www.mercurial-scm.org/repo/hg/rev/b6efeb27e733 + /// [3]: https://www.mercurial-scm.org/repo/hg/rev/518da3c3b6ce + /// [4]: https://www.mercurial-scm.org/repo/hg/rev/b68c9d232db6 + /// [5]: https://www.mercurial-scm.org/repo/hg/rev/b3ad349d0e50 + pub fn range_revs(&self, roots: &[u32], heads: &[u32]) -> Vec { + if roots.is_empty() || heads.is_empty() { + return Vec::new(); + } + let min_root = *roots.iter().min().unwrap(); + let max_head = *heads.iter().max().unwrap(); + let len = (max_head.max(min_root) - min_root + 1) as usize; + let mut reachable = Vec::with_capacity(len); + let mut tovisit = VecDeque::new(); + let mut revstates = vec![0u8; len]; + + const RS_SEEN: u8 = 1; + const RS_ROOT: u8 = 2; + const RS_REACHABLE: u8 = 4; + + for &rev in roots { + if rev <= max_head { + revstates[(rev - min_root) as usize] |= RS_ROOT; + } + } + + for &rev in heads { + if rev >= min_root && revstates[(rev - min_root) as usize] & RS_SEEN == 0 { + tovisit.push_back(rev); + revstates[(rev - min_root) as usize] |= RS_SEEN; + } + } + + // Visit the tovisit list and find the reachable roots + while let Some(rev) = tovisit.pop_front() { + // Add the node to reachable if it is a root + if revstates[(rev - min_root) as usize] & RS_ROOT != 0 { + revstates[(rev - min_root) as usize] |= RS_REACHABLE; + reachable.push(rev); + } + + // Add its parents to the list of nodes to visit + for &p in self.parent_revs(rev).as_revs() { + if p >= min_root && revstates[(p - min_root) as usize] & RS_SEEN == 0 { + tovisit.push_back(p); + revstates[(p - min_root) as usize] |= RS_SEEN; + } + } + } + + if reachable.is_empty() { + return Vec::new(); + } + + // Find all the nodes in between the roots we found and the heads + // and add them to the reachable set + for rev in min_root..=max_head { + if revstates[(rev - min_root) as usize] & RS_SEEN == 0 { + continue; + } + if self + .parent_revs(rev) + .as_revs() + .iter() + .any(|&p| p >= min_root && revstates[(p - min_root) as usize] & RS_REACHABLE != 0) + && revstates[(rev - min_root) as usize] & RS_REACHABLE == 0 + { + revstates[(rev - min_root) as usize] |= RS_REACHABLE; + reachable.push(rev); + } + } + + reachable + } } /// Minimal code to read the DAG (i.e. parents) stored in non-inlined revlog. @@ -250,7 +432,7 @@ impl RevlogIndex { } /// Get parent revisions. - pub fn parents(&self, rev: u32) -> ParentRevs { + pub fn parent_revs(&self, rev: u32) -> ParentRevs { let data_len = self.data_len(); if rev >= data_len as u32 { return self.pending_parents[rev as usize - data_len].clone();