mononoke: log ctime in walker packinfo logs

Summary: Log the ctime in the packinfo log so that packer pipeline has the option of using it e.g. to only chose newest version of blob if it has been written more than once in the interval logs are queried over.

Reviewed By: Croohand

Differential Revision: D28798015

fbshipit-source-id: 72764f133b64c4390c6ade179f823883ac85ac89
This commit is contained in:
Alex Hornby 2021-06-01 10:58:49 -07:00 committed by Facebook GitHub Bot
parent 38d8d02f78
commit 8bb43b8008
4 changed files with 26 additions and 19 deletions

View File

@ -29,13 +29,13 @@ Run a scrub with the pack logging enabled
Seen,Loaded: 7,7
Check logged pack info. Commit time is forced to zero in tests, hence mtime is 0. Expect compressed sizes and no pack_key yet
$ jq -r '.int * .normal | [ .repo, .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key] | @csv' < pack-info.json | sort | uniq
"repo",1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,108,
"repo",1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,73,
"repo",1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,108,
"repo",1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,10*, (glob)
"repo",1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,10*, (glob)
"repo",1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,1*, (glob)
$ jq -r '.int * .normal | [ .repo, .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key, .ctime] | @csv' < pack-info.json | sort | uniq
"repo",1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,108,,1* (glob)
"repo",1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,73,,1* (glob)
"repo",1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,108,,1* (glob)
"repo",1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,10*,,1* (glob)
"repo",1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,10*,,1* (glob)
"repo",1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,1*,,1* (glob)
Now pack the blobs
$ (cd blobstore/blobs; ls) | sed -e 's/^blob-//' -e 's/.pack$//' | packer --zstd-level=3
@ -45,10 +45,10 @@ Run a scrub again now the storage is packed
Seen,Loaded: 7,7
Check logged pack info now the store is packed. Expecting multiple in same pack key
$ jq -r '.int * .normal | [ .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key, .relevant_uncompressed_size, .relevant_compressed_size ] | @csv' < pack-info-packed.json | sort | uniq
1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45* (glob)
1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,82,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107713,45* (glob)
1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45* (glob)
1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4* (glob)
1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,4*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",10*,4* (glob)
1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4* (glob)
$ jq -r '.int * .normal | [ .chunk_num, .blobstore_key, .node_type, .node_fingerprint, .similarity_key, .mtime, .uncompressed_size, .unique_compressed_size, .pack_key, .relevant_uncompressed_size, .relevant_compressed_size, .ctime] | @csv' < pack-info-packed.json | sort | uniq
1,"repo0000.changeset.blake2.22eaf128d2cd64e1e47f9f0f091f835d893415588cb41c66d8448d892bcc0756","Changeset",-2205411614990931500,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45*,1* (glob)
1,"repo0000.changeset.blake2.67472b417c6772992e6c4ef87258527b01a6256ef707a3f9c5fe6bc9679499f8","Changeset",-7389730255194601000,,0,73,82,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107713,45*,1* (glob)
1,"repo0000.changeset.blake2.99283342831420aaf2c75c890cf3eb98bb26bf07e94d771cf8239b033ca45714","Changeset",-6187923334023141000,,0,108,117,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",107748,45*,1* (glob)
1,"repo0000.content.blake2.4caa3d2f7430890df6f5deb3b652fcc88769e3323c0b7676e9771d172a521bbd","FileContent",975364069869333100,6905401043796602000,0,107626,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4*,1* (glob)
1,"repo0000.content.blake2.7f4c8284eea7351488400d6fdf82e1c262a81e20d4abd8ee469841d19b60c94a","FileContent",1456254697391410200,-6891338160001598000,0,107640,4*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",10*,4*,1* (glob)
1,"repo0000.content.blake2.ca629f1bf107b9986c1dcb16aa8aa45bc31ac0a56871c322a6cd16025b0afd09","FileContent",-7441908177121091000,-6743401566611196000,0,107636,2*,"multiblob-e9fc47da6371e725f7d558a0a7abafc029033a5f35de8f7833baffbd66029d25.pack",21*,4*,1* (glob)

View File

@ -5,6 +5,7 @@
* GNU General Public License version 2.
*/
use crate::pack::CTIME;
use crate::validate::{CHECK_FAIL, CHECK_TYPE, ERROR_MSG, NODE_KEY, REPO};
use anyhow::{anyhow, Error};
@ -123,7 +124,7 @@ impl ScrubHandler for StatsScrubHandler {
.add(CHECK_TYPE, "scrub_repair")
.add(CHECK_FAIL, if is_repaired { 0 } else { 1 })
.add("session", ctx.session().metadata().session_id().to_string())
.add("ctime", ctime)
.add(CTIME, ctime)
.log();
}
}

View File

@ -27,6 +27,7 @@ const UNIQUE_COMPRESSED_SIZE: &str = "unique_compressed_size";
const PACK_KEY: &str = "pack_key";
const RELEVANT_UNCOMPRESSED_SIZE: &str = "relevant_uncompressed_size";
const RELEVANT_COMPRESSED_SIZE: &str = "relevant_compressed_size";
pub const CTIME: &str = "ctime";
/// What do we log for each blobstore key
pub struct PackInfo<'a> {
@ -38,6 +39,7 @@ pub struct PackInfo<'a> {
pub mtime: Option<u64>, // typically the commit time of Changeset from which this item was reached
pub uncompressed_size: u64, // How big is the value for this key, in bytes.
pub sizes: Option<SizeMetadata>,
pub ctime: Option<i64>,
}
pub trait PackInfoLogger {
@ -93,7 +95,8 @@ impl PackInfoLogger for ScubaPackInfoLogger {
.add_opt(NODE_FINGERPRINT, info.node_fingerprint)
.add_opt(SIMILARITY_KEY, info.similarity_key)
.add_opt(MTIME, info.mtime)
.add(UNCOMPRESSED_SIZE, info.uncompressed_size);
.add(UNCOMPRESSED_SIZE, info.uncompressed_size)
.add_opt(CTIME, info.ctime);
if let Some(sizes) = info.sizes {
scuba.add(UNIQUE_COMPRESSED_SIZE, sizes.unique_compressed_size);

View File

@ -196,7 +196,7 @@ fn record_for_packer<L>(
{
if let Some(mut sample) = sample {
for (blobstore_key, mut store_to_key_sizes) in sample.data.drain() {
for (blobstore_id, key_sizes) in store_to_key_sizes.drain() {
for (blobstore_id, key_sample) in store_to_key_sizes.drain() {
logger.log(PackInfo {
blobstore_id,
blobstore_key: blobstore_key.as_str(),
@ -204,8 +204,9 @@ fn record_for_packer<L>(
node_fingerprint: walk_key.node.sampling_fingerprint(),
similarity_key: walk_key.path.map(|p| p.sampling_fingerprint()),
mtime: mtime.map(|mtime| mtime.timestamp_secs() as u64),
uncompressed_size: key_sizes.unique_uncompressed_size,
sizes: key_sizes.sizes,
uncompressed_size: key_sample.unique_uncompressed_size,
sizes: key_sample.sizes,
ctime: key_sample.ctime,
})
}
}
@ -219,6 +220,7 @@ struct ScrubKeySample {
unique_uncompressed_size: u64,
// Only keys accessed via a packblob store have SizeMetadata
sizes: Option<SizeMetadata>,
ctime: Option<i64>,
}
// Holds a map from blobstore keys to their samples per store
@ -249,6 +251,7 @@ impl ComponentSamplingHandler for WalkSampleMapping<Node, ScrubSample> {
let sample = ScrubKeySample {
unique_uncompressed_size: value.as_bytes().len() as u64,
sizes: value.as_meta().sizes().cloned(),
ctime: value.as_meta().ctime(),
};
guard
.data