mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 08:18:15 +03:00
mononoke: add a simple binary that can compute statistics about commits
Summary: It's nice to have that can quickly count and print stats about a commit. I'm using it now to understand performance of derived data. Reviewed By: ahornby Differential Revision: D30865267 fbshipit-source-id: 26b91c3c05a1c417015b5be228796589348bf064
This commit is contained in:
parent
8cbd19ba9c
commit
0bceacec63
@ -11,12 +11,13 @@ use anyhow::{bail, format_err, Context, Error, Result};
|
||||
use fbinit::FacebookInit;
|
||||
use futures::{
|
||||
future::{self, Either},
|
||||
TryFutureExt,
|
||||
stream, StreamExt, TryFutureExt, TryStreamExt,
|
||||
};
|
||||
use services::Fb303Service;
|
||||
use slog::{error, info, Logger};
|
||||
use tokio::runtime::{Handle, Runtime};
|
||||
use tokio::{
|
||||
io::AsyncBufReadExt,
|
||||
signal::unix::{signal, SignalKind},
|
||||
time,
|
||||
};
|
||||
@ -75,19 +76,43 @@ pub async fn csid_resolve(
|
||||
container: impl RepoIdentityRef + BonsaiHgMappingRef + BookmarksRef,
|
||||
hash_or_bookmark: impl ToString,
|
||||
) -> Result<ChangesetId, Error> {
|
||||
let res = csid_resolve_impl(ctx, container, hash_or_bookmark).await;
|
||||
let res = csid_resolve_impl(ctx, &container, hash_or_bookmark).await;
|
||||
if let Ok(csid) = &res {
|
||||
info!(ctx.logger(), "changeset resolved as: {:?}", csid);
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
/// Resolve changeset id by either bookmark name, hg hash, or changset id hash
|
||||
async fn csid_resolve_impl(
|
||||
/// Read and resolve a list of changeset from file
|
||||
pub async fn csids_resolve_from_file(
|
||||
ctx: &CoreContext,
|
||||
container: impl RepoIdentityRef + BonsaiHgMappingRef + BookmarksRef,
|
||||
filename: &str,
|
||||
) -> Result<Vec<ChangesetId>, Error> {
|
||||
let file = tokio::fs::File::open(filename).await?;
|
||||
let file = tokio::io::BufReader::new(file);
|
||||
let mut lines = file.lines();
|
||||
let mut csids = vec![];
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
csids.push(line);
|
||||
}
|
||||
|
||||
stream::iter(csids)
|
||||
.map(|csid| csid_resolve_impl(&ctx, &container, csid))
|
||||
.buffered(100)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
}
|
||||
|
||||
/// Resolve changeset id by either bookmark name, hg hash, or changset id hash
|
||||
async fn csid_resolve_impl<C>(
|
||||
ctx: &CoreContext,
|
||||
container: &C,
|
||||
hash_or_bookmark: impl ToString,
|
||||
) -> Result<ChangesetId, Error> {
|
||||
) -> Result<ChangesetId, Error>
|
||||
where
|
||||
C: RepoIdentityRef + BonsaiHgMappingRef + BookmarksRef,
|
||||
{
|
||||
let hash_or_bookmark = hash_or_bookmark.to_string();
|
||||
if let Ok(name) = BookmarkName::new(hash_or_bookmark.clone()) {
|
||||
if let Some(cs_id) = container.bookmarks().get(ctx.clone(), &name).await? {
|
||||
|
@ -52,7 +52,6 @@ use std::{
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use time_ext::DurationExt;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tunables::tunables;
|
||||
|
||||
mod benchmark;
|
||||
@ -628,14 +627,7 @@ async fn run_subcmd<'a>(
|
||||
.ok_or_else(|| anyhow!("{} is not set", ARG_INPUT_FILE))?;
|
||||
|
||||
let repo: BlobRepo = args::open_repo_unredacted(fb, logger, matches).await?;
|
||||
let file = tokio::fs::File::open(input_file).await?;
|
||||
let file = tokio::io::BufReader::new(file);
|
||||
let mut lines = file.lines();
|
||||
let mut csids = vec![];
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
csids.push(line);
|
||||
}
|
||||
let csids = resolve_csids(&ctx, &repo, csids).await?;
|
||||
let csids = helpers::csids_resolve_from_file(&ctx, &repo, input_file).await?;
|
||||
|
||||
let derived_data_type = sub_m
|
||||
.value_of(ARG_DERIVED_DATA_TYPE)
|
||||
@ -670,18 +662,6 @@ async fn run_subcmd<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
async fn resolve_csids(
|
||||
ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
csids: Vec<String>,
|
||||
) -> Result<Vec<ChangesetId>, Error> {
|
||||
stream::iter(csids)
|
||||
.map(|csid| helpers::csid_resolve(&ctx, repo.clone(), csid))
|
||||
.buffered(100)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
}
|
||||
|
||||
fn parse_serialized_commits<P: AsRef<Path>>(file: P) -> Result<Vec<ChangesetEntry>> {
|
||||
let data = fs::read(file)?;
|
||||
deserialize_cs_entries(&Bytes::from(data))
|
||||
|
136
eden/mononoke/cmds/compute_commit_stats/src/main.rs
Normal file
136
eden/mononoke/cmds/compute_commit_stats/src/main.rs
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This software may be used and distributed according to the terms of the
|
||||
* GNU General Public License version 2.
|
||||
*/
|
||||
|
||||
use anyhow::{anyhow, Error};
|
||||
use blobrepo::BlobRepo;
|
||||
use blobstore::Loadable;
|
||||
use borrowed::borrowed;
|
||||
use clap::Arg;
|
||||
use cmdlib::{
|
||||
args::{self, MononokeMatches},
|
||||
helpers,
|
||||
};
|
||||
use context::CoreContext;
|
||||
use derived_data::BonsaiDerived;
|
||||
use fbinit::FacebookInit;
|
||||
use futures::{stream, StreamExt, TryStreamExt};
|
||||
use manifest::{Entry, ManifestOps};
|
||||
use mononoke_types::ChangesetId;
|
||||
use serde::Serialize;
|
||||
use skeleton_manifest::RootSkeletonManifestId;
|
||||
|
||||
const ARG_IN_FILE: &str = "input-file";
|
||||
|
||||
async fn run<'a>(fb: FacebookInit, matches: &'a MononokeMatches<'a>) -> Result<(), Error> {
|
||||
let logger = matches.logger();
|
||||
let ctx = CoreContext::new_with_logger(fb, logger.clone());
|
||||
|
||||
let repo: BlobRepo = args::open_repo(fb, ctx.logger(), &matches).await?;
|
||||
|
||||
let input_file = matches
|
||||
.value_of(ARG_IN_FILE)
|
||||
.ok_or_else(|| anyhow!("{} not set", ARG_IN_FILE))?;
|
||||
|
||||
let csids = helpers::csids_resolve_from_file(&ctx, &repo, input_file).await?;
|
||||
|
||||
borrowed!(ctx, repo);
|
||||
let commit_stats = stream::iter(csids)
|
||||
.map(|cs_id| async move { find_commit_stat(&ctx, &repo, cs_id).await })
|
||||
.buffered(100)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&commit_stats)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct CommitStat {
|
||||
cs_id: ChangesetId,
|
||||
largest_touched_dir_size: u64,
|
||||
largest_touched_dir_name: String,
|
||||
num_changed_files: u64,
|
||||
sum_of_sizes_of_all_changed_directories: u64,
|
||||
}
|
||||
|
||||
async fn find_commit_stat(
|
||||
ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
cs_id: ChangesetId,
|
||||
) -> Result<CommitStat, Error> {
|
||||
let bcs = cs_id.load(ctx, repo.blobstore()).await?;
|
||||
let mut paths = vec![];
|
||||
for (path, _) in bcs.file_changes() {
|
||||
paths.extend(path.clone().into_parent_dir_iter());
|
||||
}
|
||||
let root_skeleton_id = RootSkeletonManifestId::derive(&ctx, repo, cs_id).await?;
|
||||
let entries = root_skeleton_id
|
||||
.skeleton_manifest_id()
|
||||
.find_entries(ctx.clone(), repo.get_blobstore(), paths)
|
||||
.try_filter_map(|(path, entry)| async move {
|
||||
let tree = match entry {
|
||||
Entry::Tree(tree_id) => Some((path, tree_id)),
|
||||
Entry::Leaf(_) => None,
|
||||
};
|
||||
Ok(tree)
|
||||
})
|
||||
.map_ok(|(path, tree_id)| async move {
|
||||
let entry = tree_id.load(ctx, &repo.get_blobstore()).await?;
|
||||
Ok((path, entry.list().collect::<Vec<_>>().len()))
|
||||
})
|
||||
.try_buffer_unordered(100)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
|
||||
let mut sum_of_sizes_of_all_changed_directories: u64 = 0;
|
||||
for (_, size) in &entries {
|
||||
sum_of_sizes_of_all_changed_directories += (*size) as u64;
|
||||
}
|
||||
|
||||
let (largest_touched_dir_size, largest_touched_dir_name) = entries
|
||||
.into_iter()
|
||||
.max_by_key(|(_, size)| *size)
|
||||
.map(|(path, size)| (size as u64, path))
|
||||
.unwrap_or_else(|| (0, None));
|
||||
|
||||
let largest_touched_dir_name = match largest_touched_dir_name {
|
||||
Some(dir_name) => {
|
||||
format!("{}", dir_name)
|
||||
}
|
||||
None => "root".to_string(),
|
||||
};
|
||||
let stat = CommitStat {
|
||||
cs_id,
|
||||
largest_touched_dir_size,
|
||||
largest_touched_dir_name,
|
||||
num_changed_files: bcs.file_changes_map().len() as u64,
|
||||
sum_of_sizes_of_all_changed_directories,
|
||||
};
|
||||
|
||||
Ok(stat)
|
||||
}
|
||||
|
||||
#[fbinit::main]
|
||||
fn main(fb: FacebookInit) -> Result<(), Error> {
|
||||
let matches = args::MononokeAppBuilder::new("Binary that can compute stats about commits")
|
||||
.with_advanced_args_hidden()
|
||||
.build()
|
||||
.about("A tool to collect different stat about commits")
|
||||
.arg(
|
||||
Arg::with_name(ARG_IN_FILE)
|
||||
.long(ARG_IN_FILE)
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.help("Filename with commit hashes or bookmarks"),
|
||||
)
|
||||
.get_matches(fb)?;
|
||||
|
||||
let runtime = tokio::runtime::Runtime::new()?;
|
||||
runtime.block_on(run(fb, &matches))
|
||||
}
|
Loading…
Reference in New Issue
Block a user