mononoke: add sampling_fingerprint to hash types

Summary:
Add a fingerprint method that returns a subset of the hash.

This will allow us to see compression benefit, or write out a corpus, sampling 1 in N of a group of keys

Reviewed By: krallin

Differential Revision: D20541312

fbshipit-source-id: 93bd44ba9c14285daf50d8cd18eeb4b6dcc38d82
This commit is contained in:
Alex Hornby 2020-04-02 09:05:08 -07:00 committed by Facebook GitHub Bot
parent 7060cd47d6
commit a156633c1f
6 changed files with 78 additions and 2 deletions

View File

@ -70,6 +70,15 @@ impl Alias {
Alias::Sha256(sha256) => format!("alias.sha256.{}", sha256.to_hex()),
}
}
#[inline]
pub fn sampling_fingerprint(&self) -> u64 {
match self {
Alias::GitSha1(git_sha1) => git_sha1.sampling_fingerprint(),
Alias::Sha1(sha1) => sha1.sampling_fingerprint(),
Alias::Sha256(sha256) => sha256.sampling_fingerprint(),
}
}
}
impl Loadable for Alias {

View File

@ -100,6 +100,15 @@ impl HgNodeHash {
pub fn display_opt<'a>(opt_hash: Option<&'a HgNodeHash>) -> OptDisplay<'a, Self> {
OptDisplay { inner: opt_hash }
}
/// Return a stable hash fingerprint that can be used for sampling
#[inline]
pub fn sampling_fingerprint(&self) -> u64 {
let byte_slice = &self.0.as_ref();
let mut bytes: [u8; 8] = [0; 8];
bytes.copy_from_slice(&byte_slice[0..8]);
u64::from_le_bytes(bytes)
}
}
pub struct OptDisplay<'a, T> {
@ -266,6 +275,11 @@ impl HgChangesetId {
inner: opt_changeset_id,
}
}
#[inline]
pub fn sampling_fingerprint(&self) -> u64 {
self.0.sampling_fingerprint()
}
}
impl AsRef<[u8]> for HgChangesetId {
@ -395,6 +409,11 @@ impl HgManifestId {
pub fn blobstore_key(&self) -> String {
format!("hgmanifest.sha1.{}", self.0)
}
#[inline]
pub fn sampling_fingerprint(&self) -> u64 {
self.0.sampling_fingerprint()
}
}
impl FromStr for HgManifestId {
@ -448,6 +467,11 @@ impl HgFileNodeId {
pub fn blobstore_key(&self) -> String {
format!("hgfilenode.sha1.{}", self.0)
}
#[inline]
pub fn sampling_fingerprint(&self) -> u64 {
self.0.sampling_fingerprint()
}
}
impl FromStr for HgFileNodeId {

View File

@ -368,6 +368,14 @@ macro_rules! impl_hash {
pub fn into_inner(self) -> [u8; $size] {
self.0
}
/// Return a stable hash fingerprint that can be used for sampling
#[inline]
pub fn sampling_fingerprint(&self) -> u64 {
let mut bytes: [u8; 8] = [0; 8];
bytes.copy_from_slice(&&self.0[0..8]);
u64::from_le_bytes(bytes)
}
}
impl From<[u8; $size]> for $type {

View File

@ -47,6 +47,9 @@ pub trait MononokeId: Copy + Sync + Send + 'static {
/// Return a prefix before hash used in blobstore
fn blobstore_key_prefix() -> String;
/// Return a stable hash fingerprint that can be used for sampling
fn sampling_fingerprint(&self) -> u64;
}
/// An identifier for a changeset in Mononoke.
@ -336,6 +339,11 @@ macro_rules! impl_typed_hash {
fn blobstore_key_prefix() -> String {
concat!($key, ".blake2.").to_string()
}
#[inline]
fn sampling_fingerprint(&self) -> u64 {
self.0.sampling_fingerprint()
}
}
}
@ -435,6 +443,11 @@ impl MononokeId for ContentMetadataId {
fn blobstore_key_prefix() -> String {
Self::PREFIX.to_string()
}
#[inline]
fn sampling_fingerprint(&self) -> u64 {
self.0.sampling_fingerprint()
}
}
impl ChangesetIdPrefix {

View File

@ -381,4 +381,27 @@ impl Node {
Node::AliasContentMapping(_) => None,
}
}
/// None means not hash based
pub fn sampling_fingerprint(&self) -> Option<u64> {
match self {
Node::Root => None,
// Bonsai
Node::Bookmark(_k) => None,
Node::BonsaiChangeset(k) => Some(k.sampling_fingerprint()),
Node::BonsaiHgMapping(k) => Some(k.sampling_fingerprint()),
Node::BonsaiPhaseMapping(k) => Some(k.sampling_fingerprint()),
Node::PublishedBookmarks => None,
// Hg
Node::HgBonsaiMapping(k) => Some(k.sampling_fingerprint()),
Node::HgChangeset(k) => Some(k.sampling_fingerprint()),
Node::HgManifest((_, k)) => Some(k.sampling_fingerprint()),
Node::HgFileEnvelope(k) => Some(k.sampling_fingerprint()),
Node::HgFileNode((_, k)) => Some(k.sampling_fingerprint()),
// Content
Node::FileContent(k) => Some(k.sampling_fingerprint()),
Node::FileContentMetadata(k) => Some(k.sampling_fingerprint()),
Node::AliasContentMapping(k) => Some(k.sampling_fingerprint()),
}
}
}

View File

@ -118,8 +118,7 @@ where
.map(|p| p.get_path_hash().sampling_fingerprint()),
None => match route {
Some(route) => route.sampling_fingerprint(),
// TODO, sample non-path node types
None => None,
None => step.target.sampling_fingerprint(),
},
};