From 8ef5d4ba643496e67151b8f1eb10987f2f369a09 Mon Sep 17 00:00:00 2001 From: Stanislau Hlebik Date: Thu, 17 Jan 2019 02:26:01 -0800 Subject: [PATCH] mononoke: change the way file content blobs are hashed Summary: File content blobs are thrift encoded in Mononoke. This is done so that we can change the encoding of content blobs easily. For example, we can add compression or we can add split the blobs in chunks. However there is a problem. At the moment file content blob key is a hash of the actual data that's written to blobstore i.e. of a thrift encoded data. That means that if we add compression or change thrift encoding in any way, then the file content blob key changes and it changes the commit hashes. This is wrong. To fix it let's use hash of the actual file content as the key. Reviewed By: farnz Differential Revision: D12884898 fbshipit-source-id: e60a7b326c39dad86e2b26c6f637defcb0acc8e8 --- blobrepo/test/main.rs | 2 +- mononoke-types/src/file_contents.rs | 8 ++++---- tests/integration/test-alias-verify.t | 12 ++++++------ tests/integration/test-lfs-upload-alias-on-fetch.t | 6 ++++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/blobrepo/test/main.rs b/blobrepo/test/main.rs index 2daf9325d3..9dadaee3e7 100644 --- a/blobrepo/test/main.rs +++ b/blobrepo/test/main.rs @@ -166,7 +166,7 @@ fn upload_blob_aliases() { run_future(future).unwrap(); let expected_content = - "content.blake2.07ccc95f3ee9252a9e1dbdeaef59844d6aabd9dcf911fa29f542e891a4c5e90a"; + "content.blake2.8d53819fadd0306a42cef7a9a9ac6814120efaaaedddb77d41d53a8e65e91bd0"; let contents = run_future(prefixed_blobstore.get(ctx.clone(), alias_key.to_string())) .unwrap() diff --git a/mononoke-types/src/file_contents.rs b/mononoke-types/src/file_contents.rs index a9ceb2e8e3..743beb4ee3 100644 --- a/mononoke-types/src/file_contents.rs +++ b/mononoke-types/src/file_contents.rs @@ -85,15 +85,15 @@ impl BlobstoreValue for FileContents { type Key = ContentId; fn into_blob(self) -> ContentBlob { + let mut context = ContentIdContext::new(); + context.update(self.as_bytes()); + let id = context.finish(); let thrift = self.into_thrift(); let data = compact_protocol::serialize(&thrift); - let mut context = ContentIdContext::new(); - context.update(&data); - let id = context.finish(); Blob::new(id, data) } - fn from_blob(blob: Blob) -> Result { + fn from_blob(blob: ContentBlob) -> Result { // TODO (T27336549) stop using SyncFailure once thrift is converted to failure let thrift_tc = compact_protocol::deserialize(blob.data().as_ref()) .map_err(SyncFailure::new) diff --git a/tests/integration/test-alias-verify.t b/tests/integration/test-alias-verify.t index 634d4bfb8b..e014419e6c 100644 --- a/tests/integration/test-alias-verify.t +++ b/tests/integration/test-alias-verify.t @@ -44,17 +44,17 @@ * Alias Verification finished: 3 errors found (glob) $ aliasverify verify --debug 2>&1 | grep "Missing alias blob" - * Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(73f63d223a944d13b617aaefd255742e870cde0107d19c52d02d45b0d5ed690d)) (glob) - * Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(37db98f1ac02014d83cc39d05cfeaf1fee9798d398e57adf34d03d3b8f79fd42)) (glob) - * Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(b607a8c12dcea1585f68283b4c02b7013a8637165c83b068201fd32127dadbb6)) (glob) + * Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(1af04efffa454f843420a538617f0c4166550da421b65a59ed95a85b43a25ada)) (glob) + * Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(7ee06cac57ab4267c097ebc8ec36e903fb3c25867934fe360e069ea1ab2ed7fd)) (glob) + * Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(1a3f1094cdae123ec6999b7baf4211ffd94f47970bedd71e13ec07f24a9aba6a)) (glob) $ ls $TESTTMP/repo/blobs | grep "alias" | wc -l 0 $ aliasverify generate --debug 2>&1 | grep "Missing alias blob" - * Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(73f63d223a944d13b617aaefd255742e870cde0107d19c52d02d45b0d5ed690d)) (glob) - * Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(37db98f1ac02014d83cc39d05cfeaf1fee9798d398e57adf34d03d3b8f79fd42)) (glob) - * Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(b607a8c12dcea1585f68283b4c02b7013a8637165c83b068201fd32127dadbb6)) (glob) + * Missing alias blob: alias Sha256(b9a294f298d0ed2b65ca4488a42b473ff5f75d0b9843cbea84e1b472f9a514d1), content_id ContentId(Blake2(1af04efffa454f843420a538617f0c4166550da421b65a59ed95a85b43a25ada)) (glob) + * Missing alias blob: alias Sha256(d690916cdea320e620748799a2051a0f4e07d6d0c3e2bc199ea3c69e0c0b5e4f), content_id ContentId(Blake2(7ee06cac57ab4267c097ebc8ec36e903fb3c25867934fe360e069ea1ab2ed7fd)) (glob) + * Missing alias blob: alias Sha256(2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66), content_id ContentId(Blake2(1a3f1094cdae123ec6999b7baf4211ffd94f47970bedd71e13ec07f24a9aba6a)) (glob) $ ls $TESTTMP/repo/blobs | grep "alias" blob-repo0000.alias.sha256.2ba85baaa7922ff4c0dfdbc00fd07bd69dcb1dce745c6a8c676fe8b5642a0d66 diff --git a/tests/integration/test-lfs-upload-alias-on-fetch.t b/tests/integration/test-lfs-upload-alias-on-fetch.t index 61772a8aba..7315cb40b2 100644 --- a/tests/integration/test-lfs-upload-alias-on-fetch.t +++ b/tests/integration/test-lfs-upload-alias-on-fetch.t @@ -102,6 +102,9 @@ new changesets 0db8825b9792 (run 'hg update' to get a working copy) + $ ls $TESTTMP/repo/blobs | grep "alias.content" | wc -l + 0 + $ hgmn update -r master_bookmark -v remote: * DEBG Session with Mononoke started with uuid: * (glob) resolving manifests @@ -112,7 +115,6 @@ calling hook update.prefetch: hgext.remotefilelog.wcpprefetch 2 files updated, 0 files merged, 0 files removed, 0 files unresolved - # Check that alias.sha1.hgfilenode -> sha256.file_content is generated - $ ls $TESTTMP/repo/blobs | grep "alias.content.blake2.012adc56e51db8dde54a982024b9c2bec84a3c906302b6ca693b7fa7b13e2ae4" | wc -l + $ ls $TESTTMP/repo/blobs | grep "alias.content" | wc -l 1