mononoke: Add a rechunker binary

Summary:
Create a new binary that can be used to rechunk files content using the filestore.
The binary accepts multiple filenodes, that it will then go and rechunk using the filestore
config provided to it.

Reviewed By: krallin

Differential Revision: D16802701

fbshipit-source-id: d7c05729f5072ff2925bbc90cdd89fcfed56bba2
This commit is contained in:
Harvey Hunt 2019-08-16 09:07:36 -07:00 committed by Facebook Github Bot
parent d2af7b30a0
commit b778fd2482
3 changed files with 160 additions and 0 deletions

98
cmds/rechunker.rs Normal file
View File

@ -0,0 +1,98 @@
// Copyright (c) 2019-present, Facebook, Inc.
// All Rights Reserved.
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
#![deny(warnings)]
use clap::Arg;
use cloned::cloned;
use context::CoreContext;
use failure_ext::{err_msg, Error};
use futures::future::Future;
use futures::stream;
use futures::stream::Stream;
use mercurial_types::{HgFileNodeId, HgNodeHash};
use std::str::FromStr;
use tokio;
use cmdlib::args;
const NAME: &str = "rechunker";
const DEFAULT_NUM_JOBS: usize = 10;
fn main() -> Result<(), Error> {
let app = args::MononokeApp {
safe_writes: false,
hide_advanced_args: true,
default_glog: false,
};
let matches = app
.build(NAME)
.version("0.0.0")
.about("Rechunk blobs using the filestore")
.arg(
Arg::with_name("filenodes")
.value_name("FILENODES")
.takes_value(true)
.required(true)
.min_values(1)
.help("filenode IDs for blobs to be rechunked"),
)
.arg(
Arg::with_name("jobs")
.short("j")
.long("jobs")
.value_name("JOBS")
.takes_value(true)
.help("The number of filenodes to rechunk in parallel"),
)
.get_matches();
args::init_cachelib(&matches);
let logger = args::get_logger(&matches);
let ctx = CoreContext::test_mock();
let blobrepo = args::open_repo(&logger, &matches);
let jobs: usize = matches
.value_of("jobs")
.map_or(Ok(DEFAULT_NUM_JOBS), |j| j.parse())
.map_err(Error::from)?;
let filenode_ids: Vec<_> = matches
.values_of("filenodes")
.unwrap()
.into_iter()
.map(|f| {
HgNodeHash::from_str(f)
.map(HgFileNodeId::new)
.map_err(|e| err_msg(format!("Invalid Sha1: {}", e)))
})
.collect();
let rechunk = blobrepo.and_then(move |blobrepo| {
stream::iter_result(filenode_ids)
.map({
cloned!(blobrepo);
move |fid| {
blobrepo
.get_file_envelope(ctx.clone(), fid)
.map(|env| env.content_id())
.and_then({
cloned!(blobrepo, ctx);
move |content_id| blobrepo.rechunk_file_by_content_id(ctx, content_id)
})
}
})
.buffered(jobs)
.for_each(|_| Ok(()))
});
let mut runtime = tokio::runtime::Runtime::new()?;
let result = runtime.block_on(rechunk);
runtime.shutdown_on_idle();
result
}

View File

@ -92,6 +92,13 @@ function mononoke_hg_sync {
ssh://user@dummy/"$1" sync-once --start-id "$2"
}
function mononoke_rechunker {
"$MONONOKE_RECHUNKER" \
"${CACHING_ARGS[@]}" \
--mononoke-config-path mononoke-config \
"$@"
}
function mononoke_hg_sync_with_retry {
$MONONOKE_HG_SYNC \
"${CACHING_ARGS[@]}" \
@ -365,6 +372,14 @@ readonly=true
CONFIG
fi
if [[ -v FILESTORE ]]; then
cat >> "repos/$reponame/server.toml" <<CONFIG
[filestore]
chunk_size = ${FILESTORE_CHUNK_SIZE:-10}
concurrency = 24
CONFIG
fi
if [[ -v CENSORING_DISABLED ]]; then
cat >> "repos/$reponame/server.toml" <<CONFIG
censoring=false

View File

@ -0,0 +1,47 @@
$ CACHEDIR=$PWD/cachepath
$ . "${TEST_FIXTURES}/library.sh"
# setup config
$ REPOTYPE="blob:files"
$ setup_common_config "$REPOTYPE"
$ cd "$TESTTMP"
$ hginit_treemanifest repo-hg
$ cd repo-hg
$ setup_hg_server
# Commit files
$ echo -n f1 > f1
$ hg commit -Aqm "f1"
$ hg bookmark master_bookmark -r tip
$ cd "$TESTTMP"
$ blobimport repo-hg/.hg repo
$ FILENODE=$(ls "$TESTTMP/repo/blobs" | grep "hgfilenode" | cut -d "." -f 4)
# Check that nothing happens if the filestore is not enabled
$ mononoke_rechunker "$FILENODE"
* INFO using repo "repo" repoid RepositoryId(0) (glob)
$ ls "$TESTTMP/repo/blobs" | grep hgfilenode
blob-repo0000.hgfilenode.sha1.92c09d364cd563132d6eb5f1424ff63523d51f73
# Check that the rechunker complains about an unknown filenode
$ mononoke_rechunker "ffffffffffffffffffffffffffffffffffffffff"
* INFO using repo "repo" repoid RepositoryId(0) (glob)
Error: HgContentMissing(HgNodeHash(Sha1(ffffffffffffffffffffffffffffffffffffffff)), File(Regular))
[1]
# Create a new config with the filestore configured
$ rm -rf "$TESTTMP/mononoke-config"
$ FILESTORE_CHUNK_SIZE=1 FILESTORE=1 setup_common_config "$REPOTYPE"
$ cd "$TESTTMP"
$ mononoke_rechunker "$FILENODE"
* INFO using repo "repo" repoid RepositoryId(0) (glob)
$ ls "$TESTTMP/repo/blobs" | grep chunk | wc -l
2