mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 08:47:12 +03:00
benchmarks: add benchmark_large_directory
Summary: Add a microbenchmark for deriving data with large directories. This benchmark creates a commit with 100k files in a single directory, and then derives data for that commit and 10 descendant commits, each of which add, modify and remove some files. Reviewed By: ahornby Differential Revision: D26947361 fbshipit-source-id: 4215f1ac806c53a112217ceb10e50cfad56f4f28
This commit is contained in:
parent
eb4d31cc82
commit
36f78eadb8
@ -239,6 +239,7 @@ toml = { git = "https://github.com/jsgf/toml-rs", branch = "dotted-table-0.5.7"
|
||||
members = [
|
||||
".",
|
||||
"alpn",
|
||||
"benchmarks/derived_data",
|
||||
"benchmarks/simulated_repo",
|
||||
"blobimport_lib",
|
||||
"blobimport_lib/consts",
|
||||
|
27
eden/mononoke/benchmarks/derived_data/Cargo.toml
Normal file
27
eden/mononoke/benchmarks/derived_data/Cargo.toml
Normal file
@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "benchmark_large_directory"
|
||||
version = "0.1.0"
|
||||
authors = ["Facebook"]
|
||||
edition = "2018"
|
||||
license = "GPLv2+"
|
||||
|
||||
[[bin]]
|
||||
name = "benchmark_large_directory"
|
||||
path = "benchmark_large_directory.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
blobrepo = { version = "0.1.0", path = "../../blobrepo" }
|
||||
blobrepo_factory = { version = "0.1.0", path = "../../blobrepo/factory" }
|
||||
context = { version = "0.1.0", path = "../../server/context" }
|
||||
deleted_files_manifest = { version = "0.1.0", path = "../../derived_data/deleted_files_manifest" }
|
||||
derived_data = { version = "0.1.0", path = "../../derived_data" }
|
||||
fbinit = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" }
|
||||
fbinit-tokio-02 = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" }
|
||||
fsnodes = { version = "0.1.0", path = "../../derived_data/fsnodes" }
|
||||
futures_stats = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" }
|
||||
mononoke_types = { version = "0.1.0", path = "../../mononoke_types" }
|
||||
rand = { version = "0.7", features = ["small_rng"] }
|
||||
skeleton_manifest = { version = "0.1.0", path = "../../derived_data/skeleton_manifest" }
|
||||
tests_utils = { version = "0.1.0", path = "../../tests/utils" }
|
||||
unodes = { version = "0.1.0", path = "../../derived_data/unodes" }
|
@ -0,0 +1,181 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This software may be used and distributed according to the terms of the
|
||||
* GNU General Public License version 2.
|
||||
*/
|
||||
|
||||
//! This benchmark generates a single initial commit that adds 100k files to
|
||||
//! a single large directory, and then 10 more commits that add, modify, and
|
||||
//! remove some of those files at random.
|
||||
//!
|
||||
//! It then benchmarks deriving one of the derived data types (fsnodes,
|
||||
//! unodes, skeleton manifest or deleted files manifests) for those commits.
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use anyhow::Result;
|
||||
use blobrepo::BlobRepo;
|
||||
use context::CoreContext;
|
||||
use deleted_files_manifest::RootDeletedManifestId;
|
||||
use derived_data::{BonsaiDerivable, BonsaiDerived};
|
||||
use fbinit::FacebookInit;
|
||||
use fsnodes::RootFsnodeId;
|
||||
use futures_stats::TimedFutureExt;
|
||||
use mononoke_types::ChangesetId;
|
||||
use rand::distributions::{Alphanumeric, Uniform};
|
||||
use rand::{thread_rng, Rng};
|
||||
use skeleton_manifest::RootSkeletonManifestId;
|
||||
use tests_utils::CreateCommitContext;
|
||||
use unodes::RootUnodeManifestId;
|
||||
|
||||
fn gen_filename(rng: &mut impl Rng, len: usize) -> String {
|
||||
std::iter::repeat_with(|| rng.sample(Alphanumeric))
|
||||
.take(len)
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn make_initial_large_directory(
|
||||
ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
count: usize,
|
||||
) -> Result<(ChangesetId, BTreeSet<String>)> {
|
||||
let mut filenames = BTreeSet::new();
|
||||
let mut rng = thread_rng();
|
||||
let len_distr = Uniform::new(5, 50);
|
||||
while filenames.len() < count {
|
||||
let len = rng.sample(len_distr);
|
||||
let filename = gen_filename(&mut rng, len);
|
||||
filenames.insert(filename);
|
||||
}
|
||||
|
||||
let mut create = CreateCommitContext::new_root(ctx, repo);
|
||||
for filename in filenames.iter() {
|
||||
create = create.add_file(
|
||||
format!("large_directory/{}", filename).as_str(),
|
||||
format!("content of {}", filename),
|
||||
);
|
||||
}
|
||||
let csid = create.commit().await?;
|
||||
|
||||
Ok((csid, filenames))
|
||||
}
|
||||
|
||||
async fn modify_large_directory(
|
||||
ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
filenames: &mut BTreeSet<String>,
|
||||
csid: ChangesetId,
|
||||
index: usize,
|
||||
add_count: usize,
|
||||
modify_count: usize,
|
||||
delete_count: usize,
|
||||
) -> Result<ChangesetId> {
|
||||
let mut create = CreateCommitContext::new(ctx, repo, vec![csid]);
|
||||
let mut rng = thread_rng();
|
||||
let len_distr = Uniform::new(5, 50);
|
||||
|
||||
let mut add_filenames = BTreeSet::new();
|
||||
while add_filenames.len() < add_count {
|
||||
let len = rng.sample(len_distr);
|
||||
let filename = gen_filename(&mut rng, len);
|
||||
if !filenames.contains(&filename) {
|
||||
add_filenames.insert(filename);
|
||||
}
|
||||
}
|
||||
|
||||
let delete_count = delete_count.min(filenames.len());
|
||||
let modify_count = modify_count.min(filenames.len() - delete_count);
|
||||
let mut modify_filename_indexes = BTreeSet::new();
|
||||
let index_distr = Uniform::new(0, filenames.len());
|
||||
while modify_filename_indexes.len() < modify_count {
|
||||
let index = rng.sample(index_distr);
|
||||
modify_filename_indexes.insert(index);
|
||||
}
|
||||
let mut delete_filename_indexes = BTreeSet::new();
|
||||
while delete_filename_indexes.len() < delete_count {
|
||||
let index = rng.sample(index_distr);
|
||||
if !modify_filename_indexes.contains(&index) {
|
||||
delete_filename_indexes.insert(index);
|
||||
}
|
||||
}
|
||||
let mut modify_filenames = BTreeSet::new();
|
||||
let mut delete_filenames = BTreeSet::new();
|
||||
for (index, filename) in filenames.iter().enumerate() {
|
||||
if modify_filename_indexes.contains(&index) {
|
||||
modify_filenames.insert(filename);
|
||||
} else if delete_filename_indexes.contains(&index) {
|
||||
delete_filenames.insert(filename);
|
||||
}
|
||||
}
|
||||
|
||||
for filename in add_filenames.iter().chain(modify_filenames) {
|
||||
create = create.add_file(
|
||||
format!("large_directory/{}", filename).as_str(),
|
||||
format!("content {} of {}", index, filename),
|
||||
);
|
||||
}
|
||||
for filename in delete_filenames.iter() {
|
||||
create = create.delete_file(format!("large_directory/{}", filename).as_str());
|
||||
}
|
||||
|
||||
let csid = create.commit().await?;
|
||||
Ok(csid)
|
||||
}
|
||||
|
||||
async fn derive(ctx: &CoreContext, repo: &BlobRepo, data: &str, csid: ChangesetId) -> String {
|
||||
match data {
|
||||
RootSkeletonManifestId::NAME => RootSkeletonManifestId::derive(&ctx, &repo, csid)
|
||||
.await
|
||||
.unwrap()
|
||||
.skeleton_manifest_id()
|
||||
.to_string(),
|
||||
RootUnodeManifestId::NAME => RootUnodeManifestId::derive(&ctx, &repo, csid)
|
||||
.await
|
||||
.unwrap()
|
||||
.manifest_unode_id()
|
||||
.to_string(),
|
||||
RootDeletedManifestId::NAME => RootDeletedManifestId::derive(&ctx, &repo, csid)
|
||||
.await
|
||||
.unwrap()
|
||||
.deleted_manifest_id()
|
||||
.to_string(),
|
||||
RootFsnodeId::NAME => RootFsnodeId::derive(&ctx, &repo, csid)
|
||||
.await
|
||||
.unwrap()
|
||||
.fsnode_id()
|
||||
.to_string(),
|
||||
_ => panic!("invalid derived data type: {}", data),
|
||||
}
|
||||
}
|
||||
|
||||
#[fbinit::main]
|
||||
async fn main(fb: FacebookInit) -> Result<()> {
|
||||
let ctx = CoreContext::test_mock(fb);
|
||||
|
||||
let mut args = std::env::args();
|
||||
let _ = args.next();
|
||||
let data = args.next().unwrap_or_else(|| String::from("fsnodes"));
|
||||
println!("Deriving: {}", data);
|
||||
|
||||
let repo = blobrepo_factory::new_memblob_empty(None)?;
|
||||
|
||||
let (mut csid, mut filenames) = make_initial_large_directory(&ctx, &repo, 100_000).await?;
|
||||
|
||||
println!("First commit: {}", csid);
|
||||
let (stats, derived_id) = derive(&ctx, &repo, &data, csid).timed().await;
|
||||
println!("Derived id: {} stats: {:?}", derived_id, stats);
|
||||
|
||||
let commit_count = 10;
|
||||
|
||||
for commit in 0..commit_count {
|
||||
csid =
|
||||
modify_large_directory(&ctx, &repo, &mut filenames, csid, commit, 25, 100, 25).await?;
|
||||
}
|
||||
|
||||
println!("Last commit: {}", csid);
|
||||
let (stats, derived_id) = derive(&ctx, &repo, &data, csid).timed().await;
|
||||
println!("Derived id: {} stats: {:?}", derived_id, stats);
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue
Block a user