Add simple backfiller for commit graph

Summary:
I added a very simple backfiller from the current commit graph structure to the new one.

It simply lists changesets in order and adds them to the new structure.

Reviewed By: YousefSalama

Differential Revision: D42208979

fbshipit-source-id: 60875ae6f8dfafafb596f4910bb5a7f21d67cce8
This commit is contained in:
Yan Soares Couto 2022-12-23 02:25:53 -08:00 committed by Facebook GitHub Bot
parent 4328ecb270
commit 081fdc6a9f
4 changed files with 185 additions and 0 deletions

View File

@ -29,6 +29,7 @@ chrono = { version = "0.4", features = ["clock", "serde", "std"], default-featur
clap = { version = "3.2.17", features = ["derive", "env", "regex", "unicode", "wrap_help"] }
cmdlib_displaying = { version = "0.1.0", path = "../../cmdlib/displaying" }
cmdlib_scrubbing = { version = "0.1.0", path = "../../cmdlib/scrubbing" }
commit_graph = { version = "0.1.0", path = "../../repo_attributes/commit_graph/commit_graph" }
context = { version = "0.1.0", path = "../../server/context" }
dag = { version = "0.1.0", path = "../../../scm/lib/dag", features = ["for-tests"] }
ephemeral_blobstore = { version = "0.1.0", path = "../../blobstore/ephemeral_blobstore" }
@ -56,6 +57,7 @@ pushrebase = { version = "0.1.0", path = "../../pushrebase" }
pushrebase_mutation_mapping = { version = "0.1.0", path = "../../pushrebase_mutation_mapping" }
question = "0.2.2"
regex = "1.6.0"
rendezvous = { version = "0.1.0", path = "../../common/rendezvous" }
repo_blobstore = { version = "0.1.0", path = "../../blobrepo/repo_blobstore" }
repo_bookmark_attrs = { version = "0.1.0", path = "../../repo_attributes/repo_bookmark_attrs" }
repo_cross_repo = { version = "0.1.0", path = "../../repo_attributes/repo_cross_repo" }
@ -67,6 +69,7 @@ serde_json = { version = "1.0.79", features = ["float_roundtrip", "unbounded_dep
skiplist = { version = "0.1.0", path = "../../reachabilityindex/skiplist" }
slog = { version = "2.7", features = ["max_level_trace", "nested-values"] }
source_control = { version = "0.1.0", path = "../../scs/if" }
sql_commit_graph_storage = { version = "0.1.0", path = "../../repo_attributes/commit_graph/sql_commit_graph_storage" }
strum_macros = "0.21"
tokio = { version = "1.21.2", features = ["full", "test-util", "tracing"] }
tokio-util = { version = "0.6", features = ["full"] }

View File

@ -11,6 +11,7 @@ mononoke_app::subcommands! {
mod bookmarks;
mod changelog;
mod commit;
mod commit_graph;
mod convert;
mod fetch;
mod filestore;

View File

@ -0,0 +1,129 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::collections::HashSet;
use std::sync::Arc;
use anyhow::Result;
use async_trait::async_trait;
use bulkops::Direction;
use bulkops::PublicChangesetBulkFetch;
use clap::Args;
use commit_graph::CommitGraph;
use context::CoreContext;
use futures::TryStreamExt;
use metaconfig_types::RepoConfigRef;
use mononoke_app::MononokeApp;
use mononoke_types::ChangesetId;
use phases::ArcPhases;
use phases::Phases;
use rendezvous::RendezVousOptions;
use repo_identity::RepoIdentityRef;
use sql_commit_graph_storage::SqlCommitGraphStorageBuilder;
use super::Repo;
#[derive(Args)]
pub struct BackfillArgs {
/// Which id to start backfilling from. Use 0 if nothing is backfilled.
#[clap(long)]
start_id: u64,
}
// We need to pretend all commits are public because we want to backfill all
struct FakeAllCommitsPublic;
#[async_trait]
impl Phases for FakeAllCommitsPublic {
async fn add_reachable_as_public(
&self,
_ctx: &CoreContext,
_heads: Vec<ChangesetId>,
) -> Result<Vec<ChangesetId>> {
unimplemented!()
}
async fn add_public_with_known_public_ancestors(
&self,
_ctx: &CoreContext,
_csids: Vec<ChangesetId>,
) -> Result<()> {
unimplemented!()
}
async fn get_public(
&self,
_ctx: &CoreContext,
csids: Vec<ChangesetId>,
_ephemeral_derive: bool,
) -> Result<HashSet<ChangesetId>> {
Ok(csids.into_iter().collect())
}
async fn get_cached_public(
&self,
_ctx: &CoreContext,
csids: Vec<ChangesetId>,
) -> Result<HashSet<ChangesetId>> {
Ok(csids.into_iter().collect())
}
async fn list_all_public(&self, _ctx: &CoreContext) -> Result<Vec<ChangesetId>> {
unimplemented!()
}
fn with_frozen_public_heads(&self, _heads: Vec<ChangesetId>) -> ArcPhases {
unimplemented!()
}
}
async fn backfill_impl(
ctx: &CoreContext,
commit_graph: &CommitGraph,
repo: &Repo,
args: BackfillArgs,
) -> Result<()> {
let fetcher =
PublicChangesetBulkFetch::new(repo.changesets.clone(), Arc::new(FakeAllCommitsPublic))
.with_read_from_master(true);
let mut done = 0;
fetcher
.fetch_bounded(ctx, Direction::OldestFirst, Some((args.start_id, u64::MAX)))
.try_for_each(|entry| async move {
done += 1;
if done % 1000 == 0 {
println!("Backfilled {} changesets", done);
}
commit_graph
.add(ctx, entry.cs_id, entry.parents.into())
.await?;
Ok(())
})
.await?;
println!("Backfilled everyting starting at id {}", args.start_id);
Ok(())
}
pub(super) async fn backfill(
ctx: &CoreContext,
app: &MononokeApp,
repo: &Repo,
args: BackfillArgs,
) -> Result<()> {
let storage: SqlCommitGraphStorageBuilder = app
.repo_factory()
.sql_factory(&repo.repo_config().storage_config.metadata)
.await?
.open()?;
let commit_graph = CommitGraph::new(Arc::new(storage.build(
RendezVousOptions {
free_connections: 5,
},
repo.repo_identity().id(),
)));
backfill_impl(ctx, &commit_graph, repo, args).await
}

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
mod backfill;
use anyhow::Result;
use backfill::BackfillArgs;
use changesets::Changesets;
use clap::Parser;
use clap::Subcommand;
use metaconfig_types::RepoConfig;
use mononoke_app::args::RepoArgs;
use mononoke_app::MononokeApp;
use repo_identity::RepoIdentity;
#[derive(Parser)]
pub struct CommandArgs {
#[clap(flatten)]
repo: RepoArgs,
#[clap(subcommand)]
subcommand: CommitGraphSubcommand,
}
#[derive(Subcommand)]
pub enum CommitGraphSubcommand {
/// Backfill commit graph entries
Backfill(BackfillArgs),
}
#[facet::container]
pub struct Repo {
#[facet]
changesets: dyn Changesets,
#[facet]
config: RepoConfig,
#[facet]
id: RepoIdentity,
}
pub async fn run(app: MononokeApp, args: CommandArgs) -> Result<()> {
let ctx = app.new_basic_context();
let repo: Repo = app.open_repo(&args.repo).await?;
match args.subcommand {
CommitGraphSubcommand::Backfill(args) => backfill::backfill(&ctx, &app, &repo, args).await,
}
}