From 8bb43b800852efe5118772793aa57c63cb8ed6b1 Mon Sep 17 00:00:00 2001 From: Alex Hornby Date: Tue, 1 Jun 2021 10:58:49 -0700 Subject: [PATCH] mononoke: log ctime in walker packinfo logs Summary: Log the ctime in the packinfo log so that packer pipeline has the option of using it e.g. to only chose newest version of blob if it has been written more than once in the interval logs are queried over. Reviewed By: Croohand Differential Revision: D28798015 fbshipit-source-id: 72764f133b64c4390c6ade179f823883ac85ac89 --- .../tests/integration/test-walker-pack-info.t | 28 +++++++++---------- eden/mononoke/walker/src/blobstore.rs | 3 +- eden/mononoke/walker/src/pack.rs | 5 +++- eden/mononoke/walker/src/scrub.rs | 9 ++++-- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/eden/mononoke/tests/integration/test-walker-pack-info.t b/eden/mononoke/tests/integration/test-walker-pack-info.t index 039ea37159..6a5d6224d2 100644 --- a/eden/mononoke/tests/integration/test-walker-pack-info.t +++ b/eden/mononoke/tests/integration/test-walker-pack-info.t @@ -29,13 +29,13 @@ Run a scrub with the pack logging enabled Seen,Loaded: 7,7 Check logged pack info. Commit time is forced to zero in tests, hence mtime is 0. Expect compressed sizes and no pack_key yet - $ jq -r '.int * .normal | [ .repo, .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key] | @csv' < pack-info.json | sort | uniq - "repo",1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,108, - "repo",1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,73, - "repo",1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,108, - "repo",1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,10*, (glob) - "repo",1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,10*, (glob) - "repo",1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,1*, (glob) + $ jq -r '.int * .normal | [ .repo, .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key, .ctime] | @csv' < pack-info.json | sort | uniq + "repo",1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,108,,1* (glob) + "repo",1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,73,,1* (glob) + "repo",1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,108,,1* (glob) + "repo",1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,10*,,1* (glob) + "repo",1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,10*,,1* (glob) + "repo",1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,1*,,1* (glob) Now pack the blobs $ (cd blobstore/blobs; ls) | sed -e 's/^blob-//' -e 's/.pack$//' | packer --zstd-level=3 @@ -45,10 +45,10 @@ Run a scrub again now the storage is packed Seen,Loaded: 7,7 Check logged pack info now the store is packed. Expecting multiple in same pack key - $ jq -r '.int * .normal | [ .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key, .relevant_uncompressed_size, .relevant_compressed_size ] | @csv' < pack-info-packed.json | sort | uniq - 1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45* (glob) - 1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,82,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107713,45* (glob) - 1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45* (glob) - 1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4* (glob) - 1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,4*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",10*,4* (glob) - 1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4* (glob) + $ jq -r '.int * .normal | [ .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key, .relevant_uncompressed_size, .relevant_compressed_size, .ctime] | @csv' < pack-info-packed.json | sort | uniq + 1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45*,1* (glob) + 1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,82,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107713,45*,1* (glob) + 1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45*,1* (glob) + 1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4*,1* (glob) + 1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,4*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",10*,4*,1* (glob) + 1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4*,1* (glob) diff --git a/eden/mononoke/walker/src/blobstore.rs b/eden/mononoke/walker/src/blobstore.rs index fa20abdecc..740b9a319f 100644 --- a/eden/mononoke/walker/src/blobstore.rs +++ b/eden/mononoke/walker/src/blobstore.rs @@ -5,6 +5,7 @@ * GNU General Public License version 2. */ +use crate::pack::CTIME; use crate::validate::{CHECK_FAIL, CHECK_TYPE, ERROR_MSG, NODE_KEY, REPO}; use anyhow::{anyhow, Error}; @@ -123,7 +124,7 @@ impl ScrubHandler for StatsScrubHandler { .add(CHECK_TYPE, "scrub_repair") .add(CHECK_FAIL, if is_repaired { 0 } else { 1 }) .add("session", ctx.session().metadata().session_id().to_string()) - .add("ctime", ctime) + .add(CTIME, ctime) .log(); } } diff --git a/eden/mononoke/walker/src/pack.rs b/eden/mononoke/walker/src/pack.rs index 374c470439..1d68ddaa9e 100644 --- a/eden/mononoke/walker/src/pack.rs +++ b/eden/mononoke/walker/src/pack.rs @@ -27,6 +27,7 @@ const UNIQUE_COMPRESSED_SIZE: &str = "unique_compressed_size"; const PACK_KEY: &str = "pack_key"; const RELEVANT_UNCOMPRESSED_SIZE: &str = "relevant_uncompressed_size"; const RELEVANT_COMPRESSED_SIZE: &str = "relevant_compressed_size"; +pub const CTIME: &str = "ctime"; /// What do we log for each blobstore key pub struct PackInfo<'a> { @@ -38,6 +39,7 @@ pub struct PackInfo<'a> { pub mtime: Option, // typically the commit time of Changeset from which this item was reached pub uncompressed_size: u64, // How big is the value for this key, in bytes. pub sizes: Option, + pub ctime: Option, } pub trait PackInfoLogger { @@ -93,7 +95,8 @@ impl PackInfoLogger for ScubaPackInfoLogger { .add_opt(NODE_FINGERPRINT, info.node_fingerprint) .add_opt(SIMILARITY_KEY, info.similarity_key) .add_opt(MTIME, info.mtime) - .add(UNCOMPRESSED_SIZE, info.uncompressed_size); + .add(UNCOMPRESSED_SIZE, info.uncompressed_size) + .add_opt(CTIME, info.ctime); if let Some(sizes) = info.sizes { scuba.add(UNIQUE_COMPRESSED_SIZE, sizes.unique_compressed_size); diff --git a/eden/mononoke/walker/src/scrub.rs b/eden/mononoke/walker/src/scrub.rs index 0debb1b588..0e793c0bcc 100644 --- a/eden/mononoke/walker/src/scrub.rs +++ b/eden/mononoke/walker/src/scrub.rs @@ -196,7 +196,7 @@ fn record_for_packer( { if let Some(mut sample) = sample { for (blobstore_key, mut store_to_key_sizes) in sample.data.drain() { - for (blobstore_id, key_sizes) in store_to_key_sizes.drain() { + for (blobstore_id, key_sample) in store_to_key_sizes.drain() { logger.log(PackInfo { blobstore_id, blobstore_key: blobstore_key.as_str(), @@ -204,8 +204,9 @@ fn record_for_packer( node_fingerprint: walk_key.node.sampling_fingerprint(), similarity_key: walk_key.path.map(|p| p.sampling_fingerprint()), mtime: mtime.map(|mtime| mtime.timestamp_secs() as u64), - uncompressed_size: key_sizes.unique_uncompressed_size, - sizes: key_sizes.sizes, + uncompressed_size: key_sample.unique_uncompressed_size, + sizes: key_sample.sizes, + ctime: key_sample.ctime, }) } } @@ -219,6 +220,7 @@ struct ScrubKeySample { unique_uncompressed_size: u64, // Only keys accessed via a packblob store have SizeMetadata sizes: Option, + ctime: Option, } // Holds a map from blobstore keys to their samples per store @@ -249,6 +251,7 @@ impl ComponentSamplingHandler for WalkSampleMapping { let sample = ScrubKeySample { unique_uncompressed_size: value.as_bytes().len() as u64, sizes: value.as_meta().sizes().cloned(), + ctime: value.as_meta().ctime(), }; guard .data