mononoke: walker add selectable output format

Summary: Sometimes pretty debug format is too verbose and one per line regular debug format is preferrable. Reviewed By: ikostia Differential Revision: D24996432 fbshipit-source-id: 1acda3985658e4c17b57e36734c77b7579e7e28a
2024-10-10 16:57:49 +03:00 · 2020-11-18 04:45:49 -08:00 · 2020-11-18 04:45:49 -08:00 · 5f5e222c3d
commit 5f5e222c3d
parent 04c26ffd11
3 changed files with 50 additions and 6 deletions
--- a/eden/mononoke/tests/integration/test-walker-output-objects.t
+++ b/eden/mononoke/tests/integration/test-walker-output-objects.t
@ -61,3 +61,13 @@ Output pretty debug to stdout
  Final count: (2, 2)
  Bytes/s,* (glob)
  * Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:1,1,0 (glob)
+
+Output non-pretty debug to stdout
+  $ mononoke_walker --readonly-storage scrub -q --bookmark master_bookmark -I shallow -i bonsai --include-output-node-type=Changeset --output-format=Debug 2>&1 | strip_glog
+  Walking roots * (glob)
+  Walking edge types [BookmarkToChangeset]
+  Walking node types [Bookmark, Changeset]
+  Node Changeset(ChangesetId(Blake2(c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd))): NodeData: Some(Changeset(BonsaiChangeset { inner: BonsaiChangesetMut { parents: [ChangesetId(Blake2(459f16ae564c501cb408c1e5b60fc98a1e8b8e97b9409c7520658bfa1577fb66))], author: "test", author_date: DateTime(1970-01-01T00:00:00+00:00), committer: None, committer_date: None, message: "C", extra: {}, file_changes: {MPath("C"): Some(FileChange { content_id: ContentId(Blake2(896ad5879a5df0403bfc93fc96507ad9c93b31b11f3d0fa05445da7918241e5d)), file_type: Regular, size: 1, copy_from: None })} }, id: ChangesetId(Blake2(c3384961b16276f2db77df9d7c874bbe981cf0525bd6f84a502f919044f2dabd)) }))
+  Final count: (2, 2)
+  Bytes/s,* (glob)
+  * Type:Walked,Checks,Children Bookmark:1,1,1 Changeset:1,1,0 (glob)
--- a/eden/mononoke/walker/src/scrub.rs
+++ b/eden/mononoke/walker/src/scrub.rs
@ -13,10 +13,11 @@ use crate::progress::{
 };
 use crate::sampling::{SamplingWalkVisitor, WalkSampleMapping};
 use crate::setup::{
-    parse_node_types, setup_common, DEFAULT_INCLUDE_NODE_TYPES, EXCLUDE_OUTPUT_NODE_TYPE_ARG,
-    EXCLUDE_SAMPLE_NODE_TYPE_ARG, INCLUDE_OUTPUT_NODE_TYPE_ARG, INCLUDE_SAMPLE_NODE_TYPE_ARG,
-    LIMIT_DATA_FETCH_ARG, PROGRESS_INTERVAL_ARG, PROGRESS_SAMPLE_DURATION_S, PROGRESS_SAMPLE_RATE,
-    PROGRESS_SAMPLE_RATE_ARG, SAMPLE_OFFSET_ARG, SAMPLE_RATE_ARG, SCRUB,
+    parse_node_types, setup_common, OutputFormat, DEFAULT_INCLUDE_NODE_TYPES,
+    EXCLUDE_OUTPUT_NODE_TYPE_ARG, EXCLUDE_SAMPLE_NODE_TYPE_ARG, INCLUDE_OUTPUT_NODE_TYPE_ARG,
+    INCLUDE_SAMPLE_NODE_TYPE_ARG, LIMIT_DATA_FETCH_ARG, OUTPUT_FORMAT_ARG, PROGRESS_INTERVAL_ARG,
+    PROGRESS_SAMPLE_DURATION_S, PROGRESS_SAMPLE_RATE, PROGRESS_SAMPLE_RATE_ARG, SAMPLE_OFFSET_ARG,
+    SAMPLE_RATE_ARG, SCRUB,
 };
 use crate::sizing::SizingSample;
 use crate::tail::{walk_exact_tail, RepoWalkRun};
@ -41,6 +42,7 @@ use stats::prelude::*;
 use std::{
    collections::{HashMap, HashSet},
    fmt,
+    str::FromStr,
    sync::Arc,
    time::Duration,
 };
@ -100,6 +102,7 @@ fn loading_stream<InStream, SS>(
    s: InStream,
    sampler: Arc<WalkSampleMapping<Node, ScrubSample>>,
    output_node_types: HashSet<NodeType>,
+    output_format: OutputFormat,
 ) -> impl Stream<Item = Result<(Node, Option<NodeData>, Option<ScrubStats>), Error>>
 where
    InStream: Stream<Item = Result<(Node, Option<NodeData>, Option<SS>), Error>> + 'static + Send,
@ -125,7 +128,13 @@ where
            }
            data_opt => {
                if output_node_types.contains(&n.get_type()) {
-                    println!("Node {:?}: NodeData: {:#?}", n, data_opt);
+                    match output_format {
+                        OutputFormat::Debug => println!("Node {:?}: NodeData: {:?}", n, data_opt),
+                        // Keep Node as non-Pretty so its on same line
+                        OutputFormat::PrettyDebug => {
+                            println!("Node {:?}: NodeData: {:#?}", n, data_opt)
+                        }
+                    }
                }
                let size = data_opt
                    .as_ref()
@ -357,6 +366,10 @@ pub async fn scrub_objects<'a>(
        &[],
    )?;

+    let output_format = sub_m
+        .value_of(OUTPUT_FORMAT_ARG)
+        .map_or(Ok(OutputFormat::PrettyDebug), OutputFormat::from_str)?;
+
    let mut sampling_node_types = parse_node_types(
        sub_m,
        INCLUDE_SAMPLE_NODE_TYPE_ARG,
@ -388,6 +401,7 @@ pub async fn scrub_objects<'a>(
                    walk_progress,
                    scrub_sampler,
                    output_node_types,
+                    output_format,
                );
                let report_sizing = progress_stream(quiet, &sizing_progress_state.clone(), loading);

--- a/eden/mononoke/walker/src/setup.rs
+++ b/eden/mononoke/walker/src/setup.rs
@ -41,7 +41,8 @@ use std::{
    sync::Arc,
    time::Duration,
 };
-use strum::IntoEnumIterator;
+use strum::{IntoEnumIterator, VariantNames};
+use strum_macros::{AsRefStr, EnumString, EnumVariantNames};

 pub struct RepoWalkDatasources {
    pub blobrepo: BlobRepo,
@ -100,6 +101,7 @@ pub const EXCLUDE_SAMPLE_NODE_TYPE_ARG: &str = "exclude-sample-node-type";
 pub const INCLUDE_SAMPLE_NODE_TYPE_ARG: &str = "include-sample-node-type";
 pub const EXCLUDE_OUTPUT_NODE_TYPE_ARG: &str = "exclude-output-node-type";
 pub const INCLUDE_OUTPUT_NODE_TYPE_ARG: &str = "include-output-node-type";
+pub const OUTPUT_FORMAT_ARG: &str = "output-format";
 pub const OUTPUT_DIR_ARG: &str = "output-dir";
 const SCUBA_TABLE_ARG: &str = "scuba-table";
 const SCUBA_LOG_FILE_ARG: &str = "scuba-log-file";
@ -260,6 +262,12 @@ const CONTENT_META_EDGE_TYPES: &[EdgeType] = &[
    EdgeType::AliasContentMappingToFileContent,
 ];

+#[derive(Clone, Debug, PartialEq, Eq, AsRefStr, EnumVariantNames, EnumString)]
+pub enum OutputFormat {
+    Debug,
+    PrettyDebug,
+}
+
 // Things like phases and obs markers will go here
 const MARKER_EDGE_TYPES: &[EdgeType] = &[EdgeType::ChangesetToPhaseMapping];

@ -365,6 +373,18 @@ pub fn setup_toplevel_app<'a, 'b>(app_name: &str) -> App<'a, 'b> {
                .number_of_values(1)
                .required(false)
                .help("Node types to output in debug stdout"),
+        )
+        .arg(
+            Arg::with_name(OUTPUT_FORMAT_ARG)
+                .long(OUTPUT_FORMAT_ARG)
+                .short("F")
+                .takes_value(true)
+                .multiple(false)
+                .number_of_values(1)
+                .possible_values(OutputFormat::VARIANTS)
+                .default_value(OutputFormat::PrettyDebug.as_ref())
+                .required(false)
+                .help("Set the output format"),
        );

    let compression_benefit = setup_subcommand_args(