Add new command create bonsai

Summary:
We can have different bonsai changesets hash for the same hg changeset. Consider situation - we have hg repo:
```
  o  B (Add file "b")
  │
  o  A (Add file "a")
```
The correct bonsai changeset for B will have only entry `(<Path_to_b>,Some(<hash_b>))` in `file_changes`. But we can also have bonsai changeset for B with 2 entries `(<Path_to_b>,Some(<hash_b>)), (<Path_to_a>,Some(<hash_a>))`. This diff provides the functionality to manually create such situation. And later it will be used for verification blobimport backups

Reviewed By: StanislavGlebik

Differential Revision: D24589387

fbshipit-source-id: 89c56fca935dffe3cbfb282995efb287726a3ca9
This commit is contained in:
Egor Tkachenko 2020-11-04 01:27:21 -08:00 committed by Facebook GitHub Bot
parent 424f9d5214
commit f44014de6f
4 changed files with 245 additions and 3 deletions

View File

@ -0,0 +1,141 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::fs::File;
use std::io::Read;
use anyhow::{anyhow, format_err, Error};
use clap::{App, ArgMatches, SubCommand};
use cmdlib::args;
use context::CoreContext;
use fbinit::FacebookInit;
use futures::{compat::Future01CompatExt, TryFutureExt};
use mononoke_types::{
BonsaiChangeset, BonsaiChangesetMut, ChangesetId, DateTime, FileChange, MPath,
};
use serde_derive::Deserialize;
use slog::Logger;
use std::collections::BTreeMap;
use crate::error::SubcommandError;
use blobrepo::save_bonsai_changesets;
use blobrepo_hg::BlobRepoHg;
pub const CREATE_BONSAI: &str = "create-bonsai";
pub fn build_subcommand<'a, 'b>() -> App<'a, 'b> {
SubCommand::with_name(CREATE_BONSAI)
.about("Create and push bonsai changeset")
.args_from_usage(
r#"<BONSAI_FILE> 'path to json of changes'
--dangerous 'It's dangerous command. Do you really need to run this command?'"#,
)
}
pub async fn subcommand_create_bonsai<'a>(
fb: FacebookInit,
logger: Logger,
matches: &'a ArgMatches<'_>,
sub_m: &'a ArgMatches<'_>,
) -> Result<(), SubcommandError> {
if !sub_m.is_present("dangerous") {
return Err(SubcommandError::Error(anyhow!(
"--dangerous was not provided. Think twice before use"
)));
}
let path = sub_m.value_of("BONSAI_FILE").unwrap().to_string();
let mut content = String::new();
File::open(path)
.map_err(|e| SubcommandError::Error(anyhow!(e)))?
.read_to_string(&mut content)
.map_err(|e| SubcommandError::Error(anyhow!(e)))?;
args::init_cachelib(fb, &matches, None);
let ctx = CoreContext::new_with_logger(fb, logger.clone());
let bcs: BonsaiChangeset = match serde_json::from_str(&content) {
Ok(val) => {
let bcs_deser: DeserializableBonsaiChangeset = val;
bcs_deser.into_bonsai()?.freeze()?
}
Err(e) => return Err(SubcommandError::Error(anyhow!(e))),
};
let blobrepo = args::open_repo(fb, &logger, &matches).await?;
for (_, change) in bcs.file_changes() {
if let Some(change) = change {
if filestore::get_metadata(
&blobrepo.get_blobstore(),
ctx.clone(),
&change.content_id().into(),
)
.compat()
.await?
.is_none()
{
return Err(SubcommandError::Error(format_err!(
"filenode {} is not found in the filestore",
&change.content_id()
)));
}
}
}
let bcs_id = bcs.get_changeset_id();
save_bonsai_changesets(vec![bcs], ctx.clone(), blobrepo.clone())
.compat()
.map_err(|e| SubcommandError::Error(anyhow!(e)))
.await?;
let hg_cs = blobrepo
.get_hg_from_bonsai_changeset(ctx, bcs_id)
.compat()
.await?;
println!(
"Created bonsai changeset {} for hg_changeset {}",
bcs_id, hg_cs
);
Ok(())
}
#[derive(Debug, Deserialize)]
pub struct DeserializableBonsaiChangeset {
pub parents: Vec<ChangesetId>,
pub author: String,
pub author_date: DateTime,
pub committer: Option<String>,
// XXX should committer date always be recorded? If so, it should probably be a
// monotonically increasing value:
// max(author date, max(committer date of parents) + epsilon)
pub committer_date: Option<DateTime>,
pub message: String,
pub extra: BTreeMap<String, Vec<u8>>,
pub file_changes: BTreeMap<String, Option<FileChange>>,
}
impl DeserializableBonsaiChangeset {
pub fn into_bonsai(self) -> Result<BonsaiChangesetMut, Error> {
let files = self
.file_changes
.into_iter()
.map::<Result<_, Error>, _>(|(path, changes)| {
Ok((MPath::new(path.as_bytes())?, changes))
})
.collect::<Result<Vec<_>, _>>()?;
Ok(BonsaiChangesetMut {
parents: self.parents,
author: self.author,
author_date: self.author_date,
committer: self.committer,
committer_date: self.committer_date,
message: self.message,
extra: self.extra,
file_changes: files.into_iter().collect(),
})
}
}

View File

@ -21,6 +21,7 @@ use slog::error;
use crate::blobstore_fetch::subcommand_blobstore_fetch;
use crate::bonsai_fetch::subcommand_bonsai_fetch;
use crate::content_fetch::subcommand_content_fetch;
use crate::create_bonsai::subcommand_create_bonsai;
use crate::crossrepo::subcommand_crossrepo;
use crate::error::SubcommandError;
use crate::filenodes::subcommand_filenodes;
@ -36,6 +37,7 @@ mod bonsai_fetch;
mod bookmarks_manager;
mod common;
mod content_fetch;
mod create_bonsai;
mod crossrepo;
mod derived_data;
mod error;
@ -65,6 +67,7 @@ fn setup_app<'a, 'b>() -> App<'a, 'b> {
.about("Poke at mononoke internals for debugging and investigating data structures.")
.subcommand(blobstore_fetch::build_subcommand())
.subcommand(bonsai_fetch::build_subcommand())
.subcommand(create_bonsai::build_subcommand())
.subcommand(content_fetch::build_subcommand())
.subcommand(bookmarks_manager::build_subcommand())
.subcommand(hg_changeset::build_subcommand())
@ -106,6 +109,9 @@ fn main(fb: FacebookInit) -> ExitCode {
(bonsai_fetch::BONSAI_FETCH, Some(sub_m)) => {
subcommand_bonsai_fetch(fb, logger, &matches, sub_m).await
}
(create_bonsai::CREATE_BONSAI, Some(sub_m)) => {
subcommand_create_bonsai(fb, logger, &matches, sub_m).await
}
(content_fetch::CONTENT_FETCH, Some(sub_m)) => {
subcommand_content_fetch(fb, logger, &matches, sub_m).await
}

View File

@ -11,14 +11,14 @@ use anyhow::{bail, Context, Result};
use edenapi_types::FileType as EdenapiFileType;
use quickcheck::{empty_shrinker, single_shrinker, Arbitrary, Gen};
use rand::{seq::SliceRandom, Rng};
use serde_derive::Serialize;
use serde_derive::{Deserialize, Serialize};
use crate::errors::ErrorKind;
use crate::path::MPath;
use crate::thrift;
use crate::typed_hash::{ChangesetId, ContentId};
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)]
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub struct FileChange {
content_id: ContentId,
file_type: FileType,
@ -171,7 +171,18 @@ impl Arbitrary for FileChange {
///
/// Symlink is also the same as Regular, but the content of the file is interpolated into a path
/// being traversed during lookup.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize)]
#[derive(
Debug,
Clone,
Copy,
Eq,
PartialEq,
Hash,
Ord,
PartialOrd,
Serialize,
Deserialize
)]
pub enum FileType {
Regular,
Executable,

View File

@ -0,0 +1,84 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License found in the LICENSE file in the root
# directory of this source tree.
$ . "${TEST_FIXTURES}/library.sh"
setup configuration
$ default_setup_pre_blobimport "blob_files"
hg repo
o C [draft;rev=2;26805aba1e60]
o B [draft;rev=1;112478962961]
o A [draft;rev=0;426bada5c675]
$
$ setup_mononoke_config
$ cd $TESTTMP
$ blobimport repo-hg/.hg repo --commits-limit 2
$ blobimport --log repo-hg/.hg repo --find-already-imported-rev-only
* using repo "repo" repoid RepositoryId(0) (glob)
* latest imported revision 1 (glob)
$ sqlite3 "$TESTTMP/monsql/sqlite_dbs" "select * from mutable_counters";
0|highest-imported-gen-num|2
$ REPONAME=backup REPOID=2 setup_mononoke_config
$ cd $TESTTMP/repo-hg
$ hg up master_bookmark
3 files updated, 0 files merged, 0 files removed, 0 files unresolved
(activating bookmark master_bookmark)
# Get content_id for file B
$ mononoke_admin filestore store B
* using repo "repo" repoid RepositoryId(0) (glob)
* Wrote 55662471e2a28db8257939b2f9a2d24e65b46a758bac12914a58f17dcde6905f (1 bytes) (glob)
# Upload C as it wasn't imported
$ mononoke_admin filestore store C
* using repo "repo" repoid RepositoryId(0) (glob)
* Wrote 896ad5879a5df0403bfc93fc96507ad9c93b31b11f3d0fa05445da7918241e5d (1 bytes) (glob)
$ cd $TESTTMP
$ cat > bonsai_file <<EOF
> {
> "parents": [
> "459f16ae564c501cb408c1e5b60fc98a1e8b8e97b9409c7520658bfa1577fb66"
> ],
> "author": "test",
> "author_date": "1970-01-01T00:00:00+00:00",
> "committer": null,
> "committer_date": null,
> "message": "C",
> "extra": {},
> "file_changes": {
> "C": {
> "content_id": "896ad5879a5df0403bfc93fc96507ad9c93b31b11f3d0fa05445da7918241e5d",
> "file_type": "Regular",
> "size": 1,
> "copy_from": null
> },
> "B": {
> "content_id": "55662471e2a28db8257939b2f9a2d24e65b46a758bac12914a58f17dcde6905f",
> "file_type": "Regular",
> "size": 1,
> "copy_from": null
> }
> }
> }
> EOF
$ mononoke_admin create-bonsai bonsai_file --dangerous
* using repo "repo" repoid RepositoryId(0) (glob)
Created bonsai changeset 4b71c845e8783e58fce825fa80254840eba291d323a5d69218ad927fc801153c for hg_changeset 26805aba1e600a82e93661149f2313866a221a7b
$ mononoke_admin bookmarks set master_bookmark 26805aba1e600a82e93661149f2313866a221a7b 2>/dev/null
$ mononoke_admin bookmarks list --kind publishing 2>/dev/null
master_bookmark 4b71c845e8783e58fce825fa80254840eba291d323a5d69218ad927fc801153c 26805aba1e600a82e93661149f2313866a221a7b
$ REPOID=2 blobimport repo-hg/.hg backup
$ sqlite3 "$TESTTMP/monsql/sqlite_dbs" "select * from mutable_counters";
0|highest-imported-gen-num|2
2|highest-imported-gen-num|3
$ REPOID=2 mononoke_admin bookmarks list --kind publishing 2>/dev/null
master_bookmark c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd 26805aba1e600a82e93661149f2313866a221a7b