mononoke: walker add selectable output format

Summary: Sometimes pretty debug format is too verbose and one per line regular debug format is preferrable.

Reviewed By: ikostia

Differential Revision: D24996432

fbshipit-source-id: 1acda3985658e4c17b57e36734c77b7579e7e28a
This commit is contained in:
Alex Hornby 2020-11-18 04:45:49 -08:00 committed by Facebook GitHub Bot
parent 04c26ffd11
commit 5f5e222c3d
3 changed files with 50 additions and 6 deletions

View File

@ -61,3 +61,13 @@ Output pretty debug to stdout
Final count: (2, 2)
Bytes/s,* (glob)
* Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:1,1,0 (glob)
Output non-pretty debug to stdout
$ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I shallow -i bonsai --include-output-node-type=Changeset --output-format=Debug 2>&1 | strip_glog
Walking roots * (glob)
Walking edge types [BookmarkToChangeset]
Walking node types [Bookmark, Changeset]
Node Changeset(ChangesetId(Blake2(c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd))): NodeData: Some(Changeset(BonsaiChangeset { inner: BonsaiChangesetMut { parents: [ChangesetId(Blake2(459f16ae564c501cb408c1e5b60fc98a1e8b8e97b9409c7520658bfa1577fb66))], author: "test", author_date: DateTime(1970-01-01T00:00:00+00:00), committer: None, committer_date: None, message: "C", extra: {}, file_changes: {MPath("C"): Some(FileChange { content_id: ContentId(Blake2(896ad5879a5df0403bfc93fc96507ad9c93b31b11f3d0fa05445da7918241e5d)), file_type: Regular, size: 1, copy_from: None })} }, id: ChangesetId(Blake2(c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd)) }))
Final count: (2, 2)
Bytes/s,* (glob)
* Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:1,1,0 (glob)

View File

@ -13,10 +13,11 @@ use crate::progress::{
};
use crate::sampling::{SamplingWalkVisitor, WalkSampleMapping};
use crate::setup::{
parse_node_types, setup_common, DEFAULT_INCLUDE_NODE_TYPES, EXCLUDE_OUTPUT_NODE_TYPE_ARG,
EXCLUDE_SAMPLE_NODE_TYPE_ARG, INCLUDE_OUTPUT_NODE_TYPE_ARG, INCLUDE_SAMPLE_NODE_TYPE_ARG,
LIMIT_DATA_FETCH_ARG, PROGRESS_INTERVAL_ARG, PROGRESS_SAMPLE_DURATION_S, PROGRESS_SAMPLE_RATE,
PROGRESS_SAMPLE_RATE_ARG, SAMPLE_OFFSET_ARG, SAMPLE_RATE_ARG, SCRUB,
parse_node_types, setup_common, OutputFormat, DEFAULT_INCLUDE_NODE_TYPES,
EXCLUDE_OUTPUT_NODE_TYPE_ARG, EXCLUDE_SAMPLE_NODE_TYPE_ARG, INCLUDE_OUTPUT_NODE_TYPE_ARG,
INCLUDE_SAMPLE_NODE_TYPE_ARG, LIMIT_DATA_FETCH_ARG, OUTPUT_FORMAT_ARG, PROGRESS_INTERVAL_ARG,
PROGRESS_SAMPLE_DURATION_S, PROGRESS_SAMPLE_RATE, PROGRESS_SAMPLE_RATE_ARG, SAMPLE_OFFSET_ARG,
SAMPLE_RATE_ARG, SCRUB,
};
use crate::sizing::SizingSample;
use crate::tail::{walk_exact_tail, RepoWalkRun};
@ -41,6 +42,7 @@ use stats::prelude::*;
use std::{
collections::{HashMap, HashSet},
fmt,
str::FromStr,
sync::Arc,
time::Duration,
};
@ -100,6 +102,7 @@ fn loading_stream<InStream, SS>(
s: InStream,
sampler: Arc<WalkSampleMapping<Node, ScrubSample>>,
output_node_types: HashSet<NodeType>,
output_format: OutputFormat,
) -> impl Stream<Item = Result<(Node, Option<NodeData>, Option<ScrubStats>), Error>>
where
InStream: Stream<Item = Result<(Node, Option<NodeData>, Option<SS>), Error>> + 'static + Send,
@ -125,7 +128,13 @@ where
}
data_opt => {
if output_node_types.contains(&n.get_type()) {
println!("Node {:?}: NodeData: {:#?}", n, data_opt);
match output_format {
OutputFormat::Debug => println!("Node {:?}: NodeData: {:?}", n, data_opt),
// Keep Node as non-Pretty so its on same line
OutputFormat::PrettyDebug => {
println!("Node {:?}: NodeData: {:#?}", n, data_opt)
}
}
}
let size = data_opt
.as_ref()
@ -357,6 +366,10 @@ pub async fn scrub_objects<'a>(
&[],
)?;
let output_format = sub_m
.value_of(OUTPUT_FORMAT_ARG)
.map_or(Ok(OutputFormat::PrettyDebug), OutputFormat::from_str)?;
let mut sampling_node_types = parse_node_types(
sub_m,
INCLUDE_SAMPLE_NODE_TYPE_ARG,
@ -388,6 +401,7 @@ pub async fn scrub_objects<'a>(
walk_progress,
scrub_sampler,
output_node_types,
output_format,
);
let report_sizing = progress_stream(quiet, &sizing_progress_state.clone(), loading);

View File

@ -41,7 +41,8 @@ use std::{
sync::Arc,
time::Duration,
};
use strum::IntoEnumIterator;
use strum::{IntoEnumIterator, VariantNames};
use strum_macros::{AsRefStr, EnumString, EnumVariantNames};
pub struct RepoWalkDatasources {
pub blobrepo: BlobRepo,
@ -100,6 +101,7 @@ pub const EXCLUDE_SAMPLE_NODE_TYPE_ARG: &str = "exclude-sample-node-type";
pub const INCLUDE_SAMPLE_NODE_TYPE_ARG: &str = "include-sample-node-type";
pub const EXCLUDE_OUTPUT_NODE_TYPE_ARG: &str = "exclude-output-node-type";
pub const INCLUDE_OUTPUT_NODE_TYPE_ARG: &str = "include-output-node-type";
pub const OUTPUT_FORMAT_ARG: &str = "output-format";
pub const OUTPUT_DIR_ARG: &str = "output-dir";
const SCUBA_TABLE_ARG: &str = "scuba-table";
const SCUBA_LOG_FILE_ARG: &str = "scuba-log-file";
@ -260,6 +262,12 @@ const CONTENT_META_EDGE_TYPES: &[EdgeType] = &[
EdgeType::AliasContentMappingToFileContent,
];
#[derive(Clone, Debug, PartialEq, Eq, AsRefStr, EnumVariantNames, EnumString)]
pub enum OutputFormat {
Debug,
PrettyDebug,
}
// Things like phases and obs markers will go here
const MARKER_EDGE_TYPES: &[EdgeType] = &[EdgeType::ChangesetToPhaseMapping];
@ -365,6 +373,18 @@ pub fn setup_toplevel_app<'a, 'b>(app_name: &str) -> App<'a, 'b> {
.number_of_values(1)
.required(false)
.help("Node types to output in debug stdout"),
)
.arg(
Arg::with_name(OUTPUT_FORMAT_ARG)
.long(OUTPUT_FORMAT_ARG)
.short("F")
.takes_value(true)
.multiple(false)
.number_of_values(1)
.possible_values(OutputFormat::VARIANTS)
.default_value(OutputFormat::PrettyDebug.as_ref())
.required(false)
.help("Set the output format"),
);
let compression_benefit = setup_subcommand_args(