2019-11-02 03:38:10 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
|
|
*
|
|
|
|
* This software may be used and distributed according to the terms of the
|
|
|
|
* GNU General Public License version 2.
|
|
|
|
*/
|
2019-09-14 05:26:34 +03:00
|
|
|
|
2020-05-11 19:45:32 +03:00
|
|
|
use dag::idmap::IdMapAssignHead;
|
|
|
|
use dag::idmap::IdMapBuildParents;
|
2020-02-28 20:19:41 +03:00
|
|
|
use dag::{idmap::IdMap, spanset::SpanSet, Group, Id, IdDag, VertexName};
|
2019-09-14 05:26:34 +03:00
|
|
|
use minibench::{bench, elapsed};
|
|
|
|
use tempfile::tempdir;
|
|
|
|
|
|
|
|
fn main() {
|
|
|
|
let parents = bindag::parse_bindag(bindag::MOZILLA);
|
|
|
|
|
2020-01-09 08:33:50 +03:00
|
|
|
let head_name = VertexName::copy_from(format!("{}", parents.len() - 1).as_bytes());
|
2020-08-06 22:29:53 +03:00
|
|
|
let parents_by_name = |name: VertexName| -> dag::Result<Vec<VertexName>> {
|
2020-01-09 08:33:50 +03:00
|
|
|
let i = String::from_utf8(name.as_ref().to_vec())
|
2019-09-14 05:26:34 +03:00
|
|
|
.unwrap()
|
|
|
|
.parse::<usize>()
|
|
|
|
.unwrap();
|
|
|
|
Ok(parents[i]
|
|
|
|
.iter()
|
|
|
|
.map(|p| format!("{}", p).as_bytes().to_vec().into())
|
|
|
|
.collect())
|
|
|
|
};
|
|
|
|
|
|
|
|
let id_map_dir = tempdir().unwrap();
|
|
|
|
let mut id_map = IdMap::open(id_map_dir.path()).unwrap();
|
2019-12-05 09:41:13 +03:00
|
|
|
id_map
|
2020-01-09 08:33:50 +03:00
|
|
|
.assign_head(head_name.clone(), &parents_by_name, Group::MASTER)
|
2019-12-05 09:41:13 +03:00
|
|
|
.unwrap();
|
2019-09-14 05:26:34 +03:00
|
|
|
|
2020-01-09 08:33:50 +03:00
|
|
|
let head_id = id_map.find_id_by_name(head_name.as_ref()).unwrap().unwrap();
|
2019-09-14 05:26:34 +03:00
|
|
|
let parents_by_id = id_map.build_get_parents_by_id(&parents_by_name);
|
|
|
|
|
|
|
|
let dag_dir = tempdir().unwrap();
|
|
|
|
|
|
|
|
bench("building segments", || {
|
2020-01-16 00:58:22 +03:00
|
|
|
let mut dag = IdDag::open(&dag_dir.path()).unwrap();
|
2019-09-14 05:26:34 +03:00
|
|
|
elapsed(|| {
|
dag: refactor segment building APIs
Summary:
Previously, the `Dag` has 2 low-level `build_segemnts` APIs:
- Dag::build_flat_segments(..., last_threshold)
- Dag::build_high_level_segments(..., drop_last)
They allow customization about whether the segments are lagging or not.
However, certain algorithms (ex. children and range) now require the high level
segments to cover everything covered by the flat segments. The above APIs
wouldn't ensure that.
This diff refactors the segment building APIs so that:
- Make `build_flat_segments`, and `build_high_level_segments` private to
prevent misuse.
- Ensure high level segments cover flat segments at `Dag::open` and
`Dag::build_segments_volatile`, the only ways to change `Dag`.
- Provide different APIs suitable for different (one-time in-memory vs
on-disk) use-cases. The on-disk `build_segments_persistent` API makes high
level segments lagging to avoid fragmentation, while the in-memory
`build_segments_volatile` does not.
To satisfy the existing test need, a `set_segment_size` API was added to
override the default segment size.
Most callsites become simpler because they no longer need to figure out
details about segment size, level, and lagging.
Reviewed By: sfilipco
Differential Revision: D17000965
fbshipit-source-id: 78bb0c7674c99e91be6011bb7e623cd4f63b1521
2019-09-14 05:26:34 +03:00
|
|
|
dag.build_segments_volatile(head_id, &parents_by_id)
|
|
|
|
.unwrap();
|
2019-09-14 05:26:34 +03:00
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
// Write segments to filesystem.
|
2020-01-16 00:58:22 +03:00
|
|
|
let mut dag = IdDag::open(&dag_dir.path()).unwrap();
|
2019-09-17 22:34:26 +03:00
|
|
|
let mut syncable = dag.prepare_filesystem_sync().unwrap();
|
|
|
|
syncable
|
|
|
|
.build_segments_persistent(head_id, &parents_by_id)
|
|
|
|
.unwrap();
|
|
|
|
syncable.sync(std::iter::once(&mut dag)).unwrap();
|
2019-09-14 05:26:34 +03:00
|
|
|
|
|
|
|
let sample_two_ids: Vec<SpanSet> = (0..parents.len() as u64)
|
|
|
|
.step_by(10079)
|
|
|
|
.flat_map(|i| {
|
|
|
|
(1..parents.len() as u64)
|
|
|
|
.step_by(7919)
|
2019-12-05 09:41:13 +03:00
|
|
|
.map(move |j| (Id(i), Id(j)).into())
|
2019-09-14 05:26:34 +03:00
|
|
|
})
|
|
|
|
.collect(); // 2679 samples
|
|
|
|
let sample_sets: Vec<SpanSet> = (0..parents.len() as u64)
|
|
|
|
.step_by(10079)
|
|
|
|
.flat_map(|i| {
|
|
|
|
((i + 7919)..parents.len() as u64)
|
|
|
|
.step_by(7919)
|
2019-12-05 09:41:13 +03:00
|
|
|
.map(move |j| (Id(i)..=Id(j)).into())
|
2019-09-14 05:26:34 +03:00
|
|
|
})
|
|
|
|
.collect(); // 1471 samples
|
|
|
|
|
|
|
|
bench("ancestors", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.ancestors(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
2019-09-14 05:26:34 +03:00
|
|
|
bench("children", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.children(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
2019-09-14 05:26:34 +03:00
|
|
|
bench("common_ancestors (spans)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.common_ancestors(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
2019-09-17 22:34:26 +03:00
|
|
|
bench("descendants (small subset)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
// "descendants" is extremely slow. Therefore only test a very
|
|
|
|
// small subset.
|
|
|
|
for set in sample_sets.iter().skip(500).take(2) {
|
|
|
|
dag.descendants(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
2019-09-14 05:26:34 +03:00
|
|
|
bench("gca_one (2 ids)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_two_ids {
|
|
|
|
dag.gca_one(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("gca_one (spans)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.gca_one(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("gca_all (2 ids)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_two_ids {
|
|
|
|
dag.gca_all(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("gca_all (spans)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.gca_all(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("heads", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.heads(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("heads_ancestors", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.heads_ancestors(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("is_ancestor", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_two_ids {
|
|
|
|
let ids: Vec<_> = set.iter().collect();
|
|
|
|
dag.is_ancestor(ids[0], ids[1]).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("parents", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.parents(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
2019-09-14 05:26:34 +03:00
|
|
|
|
2019-09-14 05:26:34 +03:00
|
|
|
bench("parent_ids", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_two_ids {
|
|
|
|
for id in set.iter() {
|
|
|
|
dag.parent_ids(id).unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
2019-09-14 05:26:34 +03:00
|
|
|
bench("range (2 ids)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_two_ids {
|
|
|
|
let ids: Vec<_> = set.iter().collect();
|
|
|
|
dag.range(ids[0], ids[1]).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
|
|
|
bench("range (spans)", || {
|
|
|
|
elapsed(|| {
|
|
|
|
let mut iter = sample_sets.iter();
|
|
|
|
if let (Some(set1), Some(set2)) = (iter.next(), iter.next_back()) {
|
|
|
|
dag.range(set1.clone(), set2.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
|
|
|
|
2019-09-14 05:26:34 +03:00
|
|
|
bench("roots", || {
|
|
|
|
elapsed(|| {
|
|
|
|
for set in &sample_sets {
|
|
|
|
dag.roots(set.clone()).unwrap();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
});
|
2019-09-14 05:26:34 +03:00
|
|
|
}
|