dag: rebuild non-master id and segments if needed

Summary:
Add utilities to rebuild non-master ids and segments if necessary.

The `NamedDag` structure ensures indexes have 1:1 mapping.

Reviewed By: sfilipco

Differential Revision: D18838995

fbshipit-source-id: 4a48b183c182bd5e6336a2ca4973c36091fbbfd8
This commit is contained in:
Jun Wu 2020-01-08 18:44:01 -08:00 committed by Facebook Github Bot
parent 28380e0272
commit bcaff9062c
3 changed files with 88 additions and 8 deletions

View File

@ -96,6 +96,65 @@ impl NamedDag {
// Before those APIs, LowLevelAccess might have to be used by callsites.
}
/// Export non-master DAG as parent_names_func on HashMap.
///
/// This can be expensive. It is expected to be either called infrequently,
/// or called with a small amount of data. For example, bounded amount of
/// non-master commits.
fn non_master_parent_names(
map: &SyncableIdMap,
dag: &SyncableDag,
) -> Result<HashMap<Box<[u8]>, Vec<Box<[u8]>>>> {
let parent_ids = dag.non_master_parent_ids()?;
// Map id to name.
let parent_names = parent_ids
.iter()
.map(|(id, parent_ids)| {
let name = map.slice(*id)?;
let parent_names = parent_ids
.into_iter()
.map(|p| map.slice(*p))
.collect::<Result<Vec<_>>>()?;
Ok((name, parent_names))
})
.collect::<Result<HashMap<_, _>>>()?;
Ok(parent_names)
}
/// Re-assign ids and segments for non-master group.
pub fn rebuild_non_master(map: &mut SyncableIdMap, dag: &mut SyncableDag) -> Result<()> {
// backup part of the named graph in memory.
let parents = non_master_parent_names(map, dag)?;
let mut heads = parents
.keys()
.collect::<HashSet<_>>()
.difference(
&parents
.values()
.flat_map(|ps| ps.into_iter())
.collect::<HashSet<_>>(),
)
.map(|&v| v.clone())
.collect::<Vec<_>>();
heads.sort_unstable();
// Remove existing non-master data.
dag.remove_non_master()?;
map.remove_non_master()?;
// Rebuild them.
let parent_func = |name: &[u8]| match parents.get(name) {
Some(names) => Ok(names.iter().cloned().collect()),
None => bail!(
"bug: parents of {:?} is missing (in rebuild_non_master)",
name
),
};
build(map, dag, parent_func, &[], &heads[..])?;
Ok(())
}
/// Build IdMap and Segments for the given heads.
pub fn build<F>(
map: &mut SyncableIdMap,
@ -130,8 +189,10 @@ where
}
}
// XXX: Remove the hack and rebuild non-master data.
map.need_rebuild_non_master = false;
// Rebuild non-master ids and segments.
if map.need_rebuild_non_master {
rebuild_non_master(map, dag)?;
}
Ok(())
}

View File

@ -1320,6 +1320,24 @@ impl SyncableDag {
Ok(())
}
/// Export non-master DAG as parent_id_func on HashMap.
///
/// This can be expensive if there are a lot of non-master ids.
/// It is currently only used to rebuild non-master groups after
/// id re-assignment.
pub fn non_master_parent_ids(&self) -> Result<HashMap<Id, Vec<Id>>> {
let mut parents = HashMap::new();
let start = Group::NON_MASTER.min_id();
for seg in self.dag.next_segments(start, 0)? {
let span = seg.span()?;
parents.insert(span.low, seg.parents()?);
for i in (span.low + 1).to(span.high) {
parents.insert(i, vec![i - 1]);
}
}
Ok(parents)
}
/// Mark non-master segments as "removed".
pub fn remove_non_master(&mut self) -> Result<()> {
self.dag.remove_non_master()

View File

@ -334,17 +334,18 @@ Lv1: R0-6[] N0-N3[1] N4-N8[5, N3]
\ \ /
N0-N1-N2-N3 7--8--9--10
\ \
-------------N7--N8
Lv0: RH0-1[] H2-3[1] H4-6[3] 7-10[5] H11-12[6, 10] N0-N1[1] N2-N3[N1] N4-N6[5] N7-N8[N3, N6]
Lv1: R0-6[] 7-12[5, 6] N0-N3[1] N4-N8[5, N3]
Lv2: R0-12[] N0-N8[1, 5]"#
-------------N4--N5
Lv0: RH0-1[] H2-3[1] H4-6[3] 7-10[5] H11-12[6, 10] N0-N3[1] N4-N5[N3, 9]
Lv1: R0-6[] 7-12[5, 6] N0-N5[1, 9]
Lv2: R0-12[] N0-N5[1, 9]"#
);
// 'm' has 2 ids: 8 (master) and 5 (non-master).
// Notice that N4 to N6 were re-written in the last step.
// 'm' only has 1 id: 8 (master). The old id (N5) is now taken by 'q'.
assert_eq!(built.id_map.find_id_by_slice(b"m").unwrap().unwrap(), Id(8));
assert_eq!(built.id_map.find_slice_by_id(Id(8)).unwrap().unwrap(), b"m");
let id = Group::NON_MASTER.min_id() + 5;
assert_eq!(built.id_map.find_slice_by_id(id).unwrap().unwrap(), b"m");
assert_eq!(built.id_map.find_slice_by_id(id).unwrap().unwrap(), b"q");
}
#[test]