mononoke: change the way file content blobs are hashed

Summary:
File content blobs are thrift encoded in Mononoke. This is done so
that we can change the encoding of content blobs easily. For example, we can
add compression or we can add split the blobs in chunks.

However there is a problem. At the moment file content blob key is a hash of
the actual data that's written to blobstore i.e. of a thrift encoded data. That
means that if we add compression or change thrift encoding in any way, then the
file content blob key changes and it changes the commit hashes.

This is wrong. To fix it let's use hash of the actual file content as the key.

Reviewed By: farnz

Differential Revision: D12884898

fbshipit-source-id: e60a7b326c39dad86e2b26c6f637defcb0acc8e8
This commit is contained in:
Stanislau Hlebik 2019-01-17 02:26:01 -08:00 committed by Facebook Github Bot
parent b64f2a7136
commit 8ef5d4ba64
4 changed files with 15 additions and 13 deletions

View File

@ -166,7 +166,7 @@ fn upload_blob_aliases() {
run_future(future).unwrap();
let expected_content =
"content.blake2.07ccc95f3ee9252a9e1dbdeaef59844d6aabd9dcf911fa29f542e891a4c5e90a";
"content.blake2.8d53819fadd0306a42cef7a9a9ac6814120efaaaedddb77d41d53a8e65e91bd0";
let contents = run_future(prefixed_blobstore.get(ctx.clone(), alias_key.to_string()))
.unwrap()

View File

@ -85,15 +85,15 @@ impl BlobstoreValue for FileContents {
type Key = ContentId;
fn into_blob(self) -> ContentBlob {
let mut context = ContentIdContext::new();
context.update(self.as_bytes());
let id = context.finish();
let thrift = self.into_thrift();
let data = compact_protocol::serialize(&thrift);
let mut context = ContentIdContext::new();
context.update(&data);
let id = context.finish();
Blob::new(id, data)
}
fn from_blob(blob: Blob<Self::Key>) -> Result<Self> {
fn from_blob(blob: ContentBlob) -> Result<Self> {
// TODO (T27336549) stop using SyncFailure once thrift is converted to failure
let thrift_tc = compact_protocol::deserialize(blob.data().as_ref())
.map_err(SyncFailure::new)

View File

@ -44,17 +44,17 @@
* Alias Verification finished: 3 errors found (glob)
$ aliasverify verify --debug 2>&1 | grep "Missing alias blob"
* Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(73f63d223a944d13b617aaefd255742e870cde0107d19c52d02d45b0d5ed690d)) (glob)
* Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(37db98f1ac02014d83cc39d05cfeaf1fee9798d398e57adf34d03d3b8f79fd42)) (glob)
* Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(b607a8c12dcea1585f68283b4c02b7013a8637165c83b068201fd32127dadbb6)) (glob)
* Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(1af04efffa454f843420a538617f0c4166550da421b65a59ed95a85b43a25ada)) (glob)
* Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(7ee06cac57ab4267c097ebc8ec36e903fb3c25867934fe360e069ea1ab2ed7fd)) (glob)
* Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(1a3f1094cdae123ec6999b7baf4211ffd94f47970bedd71e13ec07f24a9aba6a)) (glob)
$ ls $TESTTMP/repo/blobs | grep "alias" | wc -l
0
$ aliasverify generate --debug 2>&1 | grep "Missing alias blob"
* Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(73f63d223a944d13b617aaefd255742e870cde0107d19c52d02d45b0d5ed690d)) (glob)
* Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(37db98f1ac02014d83cc39d05cfeaf1fee9798d398e57adf34d03d3b8f79fd42)) (glob)
* Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(b607a8c12dcea1585f68283b4c02b7013a8637165c83b068201fd32127dadbb6)) (glob)
* Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(1af04efffa454f843420a538617f0c4166550da421b65a59ed95a85b43a25ada)) (glob)
* Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(7ee06cac57ab4267c097ebc8ec36e903fb3c25867934fe360e069ea1ab2ed7fd)) (glob)
* Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(1a3f1094cdae123ec6999b7baf4211ffd94f47970bedd71e13ec07f24a9aba6a)) (glob)
$ ls $TESTTMP/repo/blobs | grep "alias"
blob-repo0000.alias.sha256.2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66

View File

@ -102,6 +102,9 @@
new changesets 0db8825b9792
(run 'hg update' to get a working copy)
$ ls $TESTTMP/repo/blobs | grep "alias.content" | wc -l
0
$ hgmn update -r master_bookmark -v
remote: * DEBG Session with Mononoke started with uuid: * (glob)
resolving manifests
@ -112,7 +115,6 @@
calling hook update.prefetch: hgext.remotefilelog.wcpprefetch
2 files updated, 0 files merged, 0 files removed, 0 files unresolved
# Check that alias.sha1.hgfilenode -> sha256.file_content is generated
$ ls $TESTTMP/repo/blobs | grep "alias.content.blake2.012adc56e51db8dde54a982024b9c2bec84a3c906302b6ca693b7fa7b13e2ae4" | wc -l
$ ls $TESTTMP/repo/blobs | grep "alias.content" | wc -l
1