dag: impl MemNameDag using AbstractNameDag

Summary:
This verifies the abstraction and simplifies the code.

The new code will use non-master segments for add_heads. Therefore the test
changes.

Reviewed By: sfilipco

Differential Revision: D24399496

fbshipit-source-id: 39067ad88ade79b4f7758bcdaafc03e5f34ced91
This commit is contained in:
Jun Wu 2020-10-20 15:15:45 -07:00 committed by Facebook GitHub Bot
parent 72c4a10e7e
commit 24edf32eac
4 changed files with 88 additions and 338 deletions

View File

@ -88,7 +88,7 @@ impl IdMapWrite for MemIdMap {
}
fn next_free_id(&self, group: Group) -> Result<Id> {
let cached = self.cached_next_free_ids[group.0].load(atomic::Ordering::SeqCst);
let id = Id(cached);
let id = Id(cached).max(group.min_id());
Ok(id)
}
fn remove_non_master(&mut self) -> Result<()> {

View File

@ -50,9 +50,12 @@ use std::sync::Arc;
use crate::idmap::IdMapBuildParents;
mod indexedlog_namedag;
mod mem_namedag;
pub use indexedlog_namedag::IndexedLogNameDagPath;
pub use indexedlog_namedag::NameDag;
pub use mem_namedag::MemNameDag;
pub use mem_namedag::MemNameDagPath;
pub struct AbstractNameDag<I, M, P, S> {
pub(crate) dag: I,
@ -72,16 +75,6 @@ pub struct AbstractNameDag<I, M, P, S> {
state: S,
}
/// In-memory version of [`NameDag`].
///
/// Does not support loading from or saving to the filesystem.
/// The graph has to be built from scratch by `add_heads`.
pub struct MemNameDag {
dag: IdDag<InProcessStore>,
map: MemIdMap,
snapshot: RwLock<Option<Arc<MemNameDag>>>,
}
impl<IS, M, P, S> DagPersistent for AbstractNameDag<IdDag<IS>, M, P, S>
where
IS: IdDagStore + Persist,
@ -285,304 +278,9 @@ where
}
}
impl MemNameDag {
/// Create an empty [`MemNameDag`].
pub fn new() -> Self {
Self {
dag: IdDag::new_in_process(),
map: MemIdMap::new(),
snapshot: Default::default(),
}
}
/// Invalidate cached content. Call this after changing the graph.
fn invalidate_snapshot(&mut self) {
*self.snapshot.write() = None;
}
/// Get a snapshot of this graph.
fn snapshot(&self) -> Arc<Self> {
if let Some(s) = self.snapshot.read().deref() {
return s.clone();
}
let mut snapshot = self.snapshot.write();
match snapshot.deref() {
Some(s) => s.clone(),
None => {
let cloned = Self {
dag: self.dag.clone(),
map: self.map.clone(),
snapshot: Default::default(),
};
let result = Arc::new(cloned);
*snapshot = Some(result.clone());
result
}
}
}
}
impl DagAddHeads for MemNameDag {
/// Add vertexes and their ancestors to the in-memory DAG.
fn add_heads<F>(&mut self, parents: F, heads: &[VertexName]) -> Result<()>
where
F: Fn(VertexName) -> Result<Vec<VertexName>>,
{
// For simplicity, just use the master group for now.
let group = Group::MASTER;
let mut outcome = AssignHeadOutcome::default();
for head in heads.iter() {
if self.map.contains_vertex_name(head)? {
continue;
}
outcome.merge(self.map.assign_head(head.clone(), &parents, group)?);
}
#[cfg(test)]
{
let parent_ids_func = self.map.build_get_parents_by_id(&parents);
outcome.verify(&parent_ids_func);
}
self.dag
.build_segments_volatile_from_assign_head_outcome(&outcome)?;
self.invalidate_snapshot();
Ok(())
}
}
// Dag operations. Those are just simple wrappers around [`IdDag`].
// See [`IdDag`] for the actual implementations of these algorithms.
macro_rules! impl_dag_algorithms {
($t:ty) => {
/// DAG related read-only algorithms.
impl DagAlgorithm for $t {
/// Sort a `NameSet` topologically.
fn sort(&self, set: &NameSet) -> Result<NameSet> {
if set.hints().contains(Flags::TOPO_DESC)
&& set.hints().is_dag_compatible(self.dag_snapshot()?)
{
Ok(set.clone())
} else {
let flags =
extract_ancestor_flag_if_compatible(set.hints(), self.dag_snapshot()?);
let mut spans = SpanSet::empty();
for name in set.iter()? {
let id = self.map().vertex_id(name?)?;
spans.push(id);
}
let result = NameSet::from_spans_dag(spans, self)?;
result.hints().add_flags(flags);
Ok(result)
}
}
/// Get ordered parent vertexes.
fn parent_names(&self, name: VertexName) -> Result<Vec<VertexName>> {
let id = self.map().vertex_id(name)?;
self.dag()
.parent_ids(id)?
.into_iter()
.map(|id| self.map().vertex_name(id))
.collect()
}
/// Returns a [`SpanSet`] that covers all vertexes tracked by this DAG.
fn all(&self) -> Result<NameSet> {
let spans = self.dag().all()?;
let result = NameSet::from_spans_dag(spans, self)?;
result.hints().add_flags(Flags::FULL);
Ok(result)
}
/// Calculates all ancestors reachable from any name from the given set.
fn ancestors(&self, set: NameSet) -> Result<NameSet> {
if set.hints().contains(Flags::ANCESTORS)
&& set.hints().is_dag_compatible(self.dag_snapshot()?)
{
return Ok(set);
}
let spans = self.to_id_set(&set)?;
let spans = self.dag().ancestors(spans)?;
let result = NameSet::from_spans_dag(spans, self)?;
result.hints().add_flags(Flags::ANCESTORS);
Ok(result)
}
/// Calculates parents of the given set.
///
/// Note: Parent order is not preserved. Use [`NameDag::parent_names`]
/// to preserve order.
fn parents(&self, set: NameSet) -> Result<NameSet> {
// Preserve ANCESTORS flag. If ancestors(x) == x, then ancestors(parents(x)) == parents(x).
let flags = extract_ancestor_flag_if_compatible(set.hints(), self.dag_snapshot()?);
let spans = self.dag().parents(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
result.hints().add_flags(flags);
#[cfg(test)]
{
result.assert_eq(crate::default_impl::parents(self, set)?);
}
Ok(result)
}
/// Calculates the n-th first ancestor.
fn first_ancestor_nth(&self, name: VertexName, n: u64) -> Result<VertexName> {
#[cfg(test)]
let name2 = name.clone();
let id = self.map().vertex_id(name)?;
let id = self.dag().first_ancestor_nth(id, n)?;
let result = self.map().vertex_name(id)?;
#[cfg(test)]
{
let result2 = crate::default_impl::first_ancestor_nth(self, name2, n)?;
assert_eq!(result, result2);
}
Ok(result)
}
/// Calculates heads of the given set.
fn heads(&self, set: NameSet) -> Result<NameSet> {
if set.hints().contains(Flags::ANCESTORS)
&& set.hints().is_dag_compatible(self.dag_snapshot()?)
{
// heads_ancestors is faster.
return self.heads_ancestors(set);
}
let spans = self.dag().heads(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
#[cfg(test)]
{
result.assert_eq(crate::default_impl::heads(self, set)?);
}
Ok(result)
}
/// Calculates children of the given set.
fn children(&self, set: NameSet) -> Result<NameSet> {
let spans = self.dag().children(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
Ok(result)
}
/// Calculates roots of the given set.
fn roots(&self, set: NameSet) -> Result<NameSet> {
let flags = extract_ancestor_flag_if_compatible(set.hints(), self.dag_snapshot()?);
let spans = self.dag().roots(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
result.hints().add_flags(flags);
#[cfg(test)]
{
result.assert_eq(crate::default_impl::roots(self, set)?);
}
Ok(result)
}
/// Calculates one "greatest common ancestor" of the given set.
///
/// If there are no common ancestors, return None.
/// If there are multiple greatest common ancestors, pick one arbitrarily.
/// Use `gca_all` to get all of them.
fn gca_one(&self, set: NameSet) -> Result<Option<VertexName>> {
let result: Option<VertexName> = match self.dag().gca_one(self.to_id_set(&set)?)? {
None => None,
Some(id) => Some(self.map().vertex_name(id)?),
};
#[cfg(test)]
{
assert_eq!(&result, &crate::default_impl::gca_one(self, set)?);
}
Ok(result)
}
/// Calculates all "greatest common ancestor"s of the given set.
/// `gca_one` is faster if an arbitrary answer is ok.
fn gca_all(&self, set: NameSet) -> Result<NameSet> {
let spans = self.dag().gca_all(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
#[cfg(test)]
{
result.assert_eq(crate::default_impl::gca_all(self, set)?);
}
Ok(result)
}
/// Calculates all common ancestors of the given set.
fn common_ancestors(&self, set: NameSet) -> Result<NameSet> {
let spans = self.dag().common_ancestors(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
result.hints().add_flags(Flags::ANCESTORS);
#[cfg(test)]
{
result.assert_eq(crate::default_impl::common_ancestors(self, set)?);
}
Ok(result)
}
/// Tests if `ancestor` is an ancestor of `descendant`.
fn is_ancestor(&self, ancestor: VertexName, descendant: VertexName) -> Result<bool> {
#[cfg(test)]
let result2 =
crate::default_impl::is_ancestor(self, ancestor.clone(), descendant.clone())?;
let ancestor_id = self.map().vertex_id(ancestor)?;
let descendant_id = self.map().vertex_id(descendant)?;
let result = self.dag().is_ancestor(ancestor_id, descendant_id)?;
#[cfg(test)]
{
assert_eq!(&result, &result2);
}
Ok(result)
}
/// Calculates "heads" of the ancestors of the given set. That is,
/// Find Y, which is the smallest subset of set X, where `ancestors(Y)` is
/// `ancestors(X)`.
///
/// This is faster than calculating `heads(ancestors(set))`.
///
/// This is different from `heads`. In case set contains X and Y, and Y is
/// an ancestor of X, but not the immediate ancestor, `heads` will include
/// Y while this function won't.
fn heads_ancestors(&self, set: NameSet) -> Result<NameSet> {
let spans = self.dag().heads_ancestors(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
#[cfg(test)]
{
// default_impl::heads_ancestors calls `heads` if `Flags::ANCESTORS`
// is set. Prevent infinite loop.
if !set.hints().contains(Flags::ANCESTORS) {
result.assert_eq(crate::default_impl::heads_ancestors(self, set)?);
}
}
Ok(result)
}
/// Calculates the "dag range" - vertexes reachable from both sides.
fn range(&self, roots: NameSet, heads: NameSet) -> Result<NameSet> {
let roots = self.to_id_set(&roots)?;
let heads = self.to_id_set(&heads)?;
let spans = self.dag().range(roots, heads)?;
let result = NameSet::from_spans_dag(spans, self)?;
Ok(result)
}
/// Calculates the descendants of the given set.
fn descendants(&self, set: NameSet) -> Result<NameSet> {
let spans = self.dag().descendants(self.to_id_set(&set)?)?;
let result = NameSet::from_spans_dag(spans, self)?;
Ok(result)
}
/// Get a snapshot of the current graph.
fn dag_snapshot(&self) -> Result<Arc<dyn DagAlgorithm + Send + Sync>> {
NameDagStorage::storage_dag_snapshot(self)
}
}
};
}
/// DAG related read-only algorithms.
impl<IS, M, P, S> DagAlgorithm for AbstractNameDag<IdDag<IS>, M, P, S>
where
@ -812,8 +510,6 @@ where
}
}
impl_dag_algorithms!(MemNameDag);
/// Extract the ANCESTORS flag if the set with the `hints` is bound to a
/// compatible DAG.
fn extract_ancestor_flag_if_compatible(
@ -837,7 +533,6 @@ delegate! {
impl<I, M: IdConvert, P, S> IdConvert for AbstractNameDag<I, M, P, S>
} => self.map
}
delegate!(PrefixLookup | IdConvert, MemNameDag => self.map());
/// Export non-master DAG as parent_names_func on HashMap.
///
@ -999,7 +694,7 @@ impl NameDagStorage for MemNameDag {
&self.map
}
fn storage_dag_snapshot(&self) -> Result<Arc<dyn DagAlgorithm + Send + Sync>> {
Ok(self.snapshot() as Arc<dyn DagAlgorithm + Send + Sync>)
Ok(self.try_snapshot()? as Arc<dyn DagAlgorithm + Send + Sync>)
}
}
@ -1016,12 +711,6 @@ where
}
}
impl IdMapSnapshot for MemNameDag {
fn id_map_snapshot(&self) -> Result<Arc<dyn IdConvert + Send + Sync>> {
Ok(self.snapshot() as Arc<dyn IdConvert + Send + Sync>)
}
}
impl<IS, M, P, S> fmt::Debug for AbstractNameDag<IdDag<IS>, M, P, S>
where
IS: IdDagStore,
@ -1032,12 +721,6 @@ where
}
}
impl fmt::Debug for MemNameDag {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
debug(&self.dag, &self.map, f)
}
}
fn debug<S: IdDagStore>(
iddag: &IdDag<S>,
idmap: &dyn IdConvert,

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use super::AbstractNameDag;
use crate::iddag::IdDag;
use crate::iddagstore::InProcessStore;
use crate::idmap::MemIdMap;
use crate::ops::Open;
use crate::ops::Persist;
use crate::Result;
/// In-memory version of [`NameDag`].
///
/// Does not support loading from or saving to the filesystem.
/// The graph has to be built from scratch by `add_heads`.
pub type MemNameDag =
AbstractNameDag<IdDag<InProcessStore>, MemIdMap, MemNameDagPath, MemNameDagState>;
/// Address to open in-memory Dag.
#[derive(Debug, Clone)]
pub struct MemNameDagPath;
#[derive(Debug, Clone)]
pub struct MemNameDagState;
impl Open for MemNameDagPath {
type OpenTarget = MemNameDag;
fn open(&self) -> Result<Self::OpenTarget> {
let dag = IdDag::new_in_process();
let map = MemIdMap::new();
Ok(AbstractNameDag {
dag,
map,
path: self.clone(),
snapshot: Default::default(),
pending_heads: Default::default(),
state: MemNameDagState,
})
}
}
impl MemNameDag {
pub fn new() -> Self {
MemNameDagPath.open().unwrap()
}
}
impl Persist for MemNameDagState {
type Lock = ();
fn lock(&mut self) -> Result<Self::Lock> {
Ok(())
}
fn reload(&mut self, _lock: &Self::Lock) -> Result<()> {
Ok(())
}
fn persist(&mut self, _lock: &Self::Lock) -> Result<()> {
Ok(())
}
}

View File

@ -299,23 +299,23 @@ fn test_mem_namedag() {
r#"Max Level: 0
Level 0
Group Master:
Next Free Id: 12
Segments: 12
L+11 : L+11 [K+10] OnlyHead
K+10 : K+10 [H+7, J+9] OnlyHead
J+9 : J+9 [I+8]
I+8 : I+8 [G+6]
H+7 : H+7 [G+6] OnlyHead
G+6 : G+6 [F+5] OnlyHead
F+5 : F+5 [E+4] OnlyHead
E+4 : E+4 [B+1, D+3] OnlyHead
D+3 : D+3 [C+2]
C+2 : C+2 [] Root
B+1 : B+1 [A+0] OnlyHead
A+0 : A+0 [] Root OnlyHead
Group Non-Master:
Next Free Id: N0
Next Free Id: 0
Segments: 0
Group Non-Master:
Next Free Id: N12
Segments: 12
L+N11 : L+N11 [K+N10]
K+N10 : K+N10 [H+N7, J+N9]
J+N9 : J+N9 [I+N8]
I+N8 : I+N8 [G+N6]
H+N7 : H+N7 [G+N6]
G+N6 : G+N6 [F+N5]
F+N5 : F+N5 [E+N4]
E+N4 : E+N4 [B+N1, D+N3]
D+N3 : D+N3 [C+N2]
C+N2 : C+N2 [] Root
B+N1 : B+N1 [A+N0]
A+N0 : A+N0 [] Root
"#
);
}