mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 00:45:18 +03:00
mononoke: add segmented_changelog crate
Summary: Segmented changelog is a new way to represent the commit graph that can compress and quickly iterate over section of commits that have only one parent. [1] There is a good amount of code already for this structure in the client. The `dag` crate in that project has a lot of the functionality already implemented. [2] Long term we ideally have one library shared between the client and the server but right now the runtime environments are different enough that it is easier to reimplement the required parts for the server. The implementation requirements also seem different enough as we look at what the server requires. With segmented changelogs we will want new protocols for talking between server and client. This change is add a seed crate with a simple implementation for idmap, the first component of the segmented changelog. It is just relabling commits using continuous increasing integers. The short term goal will be to add some simple implementations that can be used to define a valid interface. From there we can look in parallel at protocols on one side and proper server implementation on the other. [1] https://github.com/facebookexperimental/eden/blob/master/eden/scm/slides/201904-segmented-changelog/segmented-changelog.pdf [2] https://github.com/facebookexperimental/eden/tree/master/eden/scm/lib/dag Reviewed By: krallin Differential Revision: D19525583 fbshipit-source-id: 7e27fb4a30c901b30a3bbaccddeb91060721f407
This commit is contained in:
parent
3675ca79d3
commit
e828f24edf
192
mononoke/segmented_changelog/src/lib.rs
Normal file
192
mononoke/segmented_changelog/src/lib.rs
Normal file
@ -0,0 +1,192 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This software may be used and distributed according to the terms of the
|
||||
* GNU General Public License found in the LICENSE file in the root
|
||||
* directory of this source tree.
|
||||
*/
|
||||
|
||||
#![feature(backtrace)]
|
||||
#![deny(warnings)]
|
||||
|
||||
///! segmented_changelog
|
||||
///!
|
||||
///! Data structures and algorithms for a commit graph used by source control.
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use futures_preview::compat::Future01CompatExt;
|
||||
use maplit::{hashmap, hashset};
|
||||
|
||||
use blobrepo::BlobRepo;
|
||||
use context::CoreContext;
|
||||
use mononoke_types::ChangesetId;
|
||||
|
||||
/// An identifier in Segmented Changelog
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)]
|
||||
pub struct SegmentedChangelogId(u64);
|
||||
|
||||
/// Assign an id for a head in a DAG. This implies ancestors of the
|
||||
/// head will also have ids assigned.
|
||||
pub async fn build_idmap(
|
||||
ctx: &CoreContext,
|
||||
blob_repo: &BlobRepo,
|
||||
head: ChangesetId,
|
||||
) -> Result<HashMap<ChangesetId, SegmentedChangelogId>> {
|
||||
struct Parents<'a> {
|
||||
ctx: &'a CoreContext,
|
||||
blob_repo: &'a BlobRepo,
|
||||
}
|
||||
impl<'a> Parents<'a> {
|
||||
async fn get(&self, changeset_id: ChangesetId) -> Result<Vec<ChangesetId>> {
|
||||
let parents = self
|
||||
.blob_repo
|
||||
.get_changeset_parents_by_bonsai(self.ctx.clone(), changeset_id)
|
||||
.compat()
|
||||
.await?;
|
||||
Ok(parents)
|
||||
}
|
||||
}
|
||||
enum Todo {
|
||||
Visit(ChangesetId),
|
||||
Assign(ChangesetId),
|
||||
}
|
||||
let parents = Parents { ctx, blob_repo };
|
||||
let mut todo_stack = vec![Todo::Visit(head)];
|
||||
let mut seen = hashset![head];
|
||||
let mut idmap = hashmap![];
|
||||
let mut next_segmented_changelog_id = 1;
|
||||
|
||||
while let Some(todo) = todo_stack.pop() {
|
||||
match todo {
|
||||
Todo::Visit(changeset_id) => {
|
||||
todo_stack.push(Todo::Assign(changeset_id));
|
||||
|
||||
let parents = parents.get(changeset_id).await?;
|
||||
for parent in parents.into_iter().rev() {
|
||||
// Note: iterating parents in reverse is a small optimization because in our setup p1 is master.
|
||||
if !seen.contains(&parent) {
|
||||
seen.insert(parent);
|
||||
todo_stack.push(Todo::Visit(parent));
|
||||
}
|
||||
}
|
||||
}
|
||||
Todo::Assign(changeset_id) => {
|
||||
idmap.insert(
|
||||
changeset_id,
|
||||
SegmentedChangelogId(next_segmented_changelog_id),
|
||||
);
|
||||
next_segmented_changelog_id += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(idmap)
|
||||
}
|
||||
|
||||
// TODO(sfilip):
|
||||
// generate_graph
|
||||
// struct Dag(idmap, segments)
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use fbinit::FacebookInit;
|
||||
use futures::stream::{self, Stream};
|
||||
use futures_preview::StreamExt;
|
||||
use futures_preview::{FutureExt as NewFutureExt, TryFutureExt};
|
||||
use futures_util::compat::Stream01CompatExt;
|
||||
use revset::AncestorsNodeStream;
|
||||
|
||||
use fixtures::{linear, merge_even, merge_uneven};
|
||||
|
||||
async fn assert_topologic_sorted(
|
||||
ctx: &CoreContext,
|
||||
blob_repo: &BlobRepo,
|
||||
head: ChangesetId,
|
||||
idmap: HashMap<ChangesetId, SegmentedChangelogId>,
|
||||
) -> Result<()> {
|
||||
// It's a bit weird to use the ancestors stream. The idea is just to do
|
||||
// something different than the core implementation.
|
||||
let ancestors =
|
||||
AncestorsNodeStream::new(ctx.clone(), &blob_repo.get_changeset_fetcher(), head);
|
||||
let mut to_check = stream::iter_result(vec![Ok(head)])
|
||||
.chain(ancestors)
|
||||
.compat();
|
||||
while let Some(changeset_id) = to_check.next().await {
|
||||
let changeset_id = changeset_id?;
|
||||
let parents = blob_repo
|
||||
.get_changeset_parents_by_bonsai(ctx.clone(), changeset_id)
|
||||
.compat()
|
||||
.await?;
|
||||
for parent in parents {
|
||||
assert!(idmap.get(&parent).unwrap() < idmap.get(&changeset_id).unwrap());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[fbinit::test]
|
||||
fn test_build_idmap_linear(fb: FacebookInit) -> Result<()> {
|
||||
let mut runtime = tokio_compat::runtime::Runtime::new().unwrap();
|
||||
runtime.block_on(
|
||||
async move {
|
||||
let ctx = CoreContext::test_mock(fb);
|
||||
let repo = linear::getrepo(fb);
|
||||
|
||||
let head = ChangesetId::from_str(
|
||||
"7785606eb1f26ff5722c831de402350cf97052dc44bc175da6ac0d715a3dbbf6",
|
||||
)?;
|
||||
let idmap = build_idmap(&ctx, &repo, head).await?;
|
||||
assert_topologic_sorted(&ctx, &repo, head, idmap).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
.boxed()
|
||||
.compat(),
|
||||
)
|
||||
}
|
||||
|
||||
#[fbinit::test]
|
||||
fn test_build_idmap_merge_even(fb: FacebookInit) -> Result<()> {
|
||||
let mut runtime = tokio_compat::runtime::Runtime::new().unwrap();
|
||||
runtime.block_on(
|
||||
async move {
|
||||
let ctx = CoreContext::test_mock(fb);
|
||||
let repo = merge_even::getrepo(fb);
|
||||
|
||||
let head = ChangesetId::from_str(
|
||||
"567a25d453cafaef6550de955c52b91bf9295faf38d67b6421d5d2e532e5adef",
|
||||
)?;
|
||||
let idmap = build_idmap(&ctx, &repo, head).await?;
|
||||
assert_topologic_sorted(&ctx, &repo, head, idmap).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
.boxed()
|
||||
.compat(),
|
||||
)
|
||||
}
|
||||
|
||||
#[fbinit::test]
|
||||
fn test_build_idmap_merge_uneven(fb: FacebookInit) -> Result<()> {
|
||||
let mut runtime = tokio_compat::runtime::Runtime::new().unwrap();
|
||||
runtime.block_on(
|
||||
async move {
|
||||
let ctx = CoreContext::test_mock(fb);
|
||||
let repo = merge_uneven::getrepo(fb);
|
||||
|
||||
let head = ChangesetId::from_str(
|
||||
"288d72de7fd26ebcd19f5e4f1b41542f22f4a9f7e2f6845fa04e8fd70064973d",
|
||||
)?;
|
||||
let idmap = build_idmap(&ctx, &repo, head).await?;
|
||||
assert_topologic_sorted(&ctx, &repo, head, idmap).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
.boxed()
|
||||
.compat(),
|
||||
)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user