From e54b5f34d509ca3581b34a7dca0720682cddeab0 Mon Sep 17 00:00:00 2001 From: Thomas Orozco Date: Fri, 16 Aug 2019 03:00:24 -0700 Subject: [PATCH] mononoke/filestore: make FetchKey safer Summary: We have a FetchKey object in the filestore that serves as both the thing you use to ask for reads and to write aliases. One downside of this is that the same enum is used for reads and writes, and that means that you could potentially accidentally try to write an alias for the "canonical" fetch key, which would effectively overwrite your content. That feels bad, and the underlying reason is that the typing doens't really reflect the reality of what we are trying to do here, which is to support a set of aliases for pieces of content, and expose an enum that allows clients to access content by alias or canonical ID. This patch updates the type hierarchy to be more in line with what we are trying to do (note that you can no longer store a canonical fetch key). One downside is that it makes the API a bit more verbose, since now you need to say e.g. `FetchKey::Aliased(Alias::Sha256(...))` instead of just `FetchKey::Sha256`. I think the tradeoff is reasonable, but I'm curious to hear other folk's thoughts. Reviewed By: farnz Differential Revision: D16764582 fbshipit-source-id: 00c596d1370b52ef2aea04ac44cdcd19b7a2c92a --- blobimport_lib/src/lfs.rs | 79 ++++++++++++++------------- blobrepo/src/repo.rs | 23 ++++---- cmds/admin/filestore.rs | 28 ++++++---- cmds/aliasverify.rs | 17 +++--- filestore/src/fetch_key.rs | 74 ++++++++++++++++++------- filestore/src/finalize.rs | 10 ++-- filestore/src/lib.rs | 36 ++---------- filestore/src/test/test_api.rs | 38 ++++++++----- filestore/src/test/test_invariants.rs | 20 +++++-- 9 files changed, 185 insertions(+), 140 deletions(-) diff --git a/blobimport_lib/src/lfs.rs b/blobimport_lib/src/lfs.rs index 20670b600a..1a6308e6c2 100644 --- a/blobimport_lib/src/lfs.rs +++ b/blobimport_lib/src/lfs.rs @@ -9,7 +9,7 @@ use bytes::Bytes; use cloned::cloned; use context::CoreContext; use failure_ext::{Error, Fail, Result}; -use filestore::{self, FetchKey, StoreRequest}; +use filestore::{self, Alias, FetchKey, StoreRequest}; use futures::{ future::{loop_fn, Loop}, Future, IntoFuture, Stream, @@ -56,45 +56,46 @@ fn do_lfs_upload( ) -> BoxFuture { let blobstore = blobrepo.get_blobstore(); - filestore::get_metadata(&blobstore, ctx.clone(), &FetchKey::Sha256(lfs.oid())) - .and_then({ - move |metadata| match metadata { - Some(metadata) => { - info!( - ctx.logger(), - "lfs_upload: reusing blob {:?}", metadata.sha256 - ); - Ok(metadata).into_future() - } - .left_future(), - None => { - info!(ctx.logger(), "lfs_upload: importing blob {:?}", lfs.oid()); - let req = StoreRequest::with_sha256(lfs.size(), lfs.oid()); - - lfs_stream(&lfs_helper, &lfs) - .into_future() - .and_then(move |(child, stream)| { - let upload_fut = blobrepo.upload_file(ctx.clone(), &req, stream); - - // NOTE: We ignore the child exit code here. Since the Filestore validates the object - // we're uploading by SHA256, that's indeed fine (it doesn't matter if the Child failed - // if it gave us exactly the content we wanted). - (upload_fut, child.from_err()).into_future().map({ - cloned!(ctx); - move |(meta, _)| { - info!( - ctx.logger(), - "lfs_upload: imported blob {:?}", meta.sha256 - ); - meta - } - }) - }) - } - .right_future(), + filestore::get_metadata( + &blobstore, + ctx.clone(), + &FetchKey::Aliased(Alias::Sha256(lfs.oid())), + ) + .and_then({ + move |metadata| match metadata { + Some(metadata) => { + info!( + ctx.logger(), + "lfs_upload: reusing blob {:?}", metadata.sha256 + ); + Ok(metadata).into_future() } - }) - .boxify() + .left_future(), + None => { + info!(ctx.logger(), "lfs_upload: importing blob {:?}", lfs.oid()); + let req = StoreRequest::with_sha256(lfs.size(), lfs.oid()); + + lfs_stream(&lfs_helper, &lfs) + .into_future() + .and_then(move |(child, stream)| { + let upload_fut = blobrepo.upload_file(ctx.clone(), &req, stream); + + // NOTE: We ignore the child exit code here. Since the Filestore validates the object + // we're uploading by SHA256, that's indeed fine (it doesn't matter if the Child failed + // if it gave us exactly the content we wanted). + (upload_fut, child.from_err()).into_future().map({ + cloned!(ctx); + move |(meta, _)| { + info!(ctx.logger(), "lfs_upload: imported blob {:?}", meta.sha256); + meta + } + }) + }) + } + .right_future(), + } + }) + .boxify() } pub fn lfs_upload( diff --git a/blobrepo/src/repo.rs b/blobrepo/src/repo.rs index b5fddf14a6..3bc4810811 100644 --- a/blobrepo/src/repo.rs +++ b/blobrepo/src/repo.rs @@ -32,7 +32,7 @@ use cloned::cloned; use context::CoreContext; use failure_ext::{bail_err, prelude::*, Error, FutureFailureErrorExt, FutureFailureExt, Result}; use filenodes::{FilenodeInfo, Filenodes}; -use filestore::{self, FetchKey, FilestoreConfig, StoreRequest}; +use filestore::{self, Alias, FetchKey, FilestoreConfig, StoreRequest}; use futures::future::{self, loop_fn, ok, Either, Future, Loop}; use futures::stream::{self, once, FuturesUnordered, Stream}; use futures::sync::oneshot; @@ -318,7 +318,8 @@ impl BlobRepo { ctx: CoreContext, key: Sha256, ) -> BoxFuture { - filestore::get_canonical_id(&self.blobstore, ctx, &FetchKey::Sha256(key)) + FetchKey::Aliased(Alias::Sha256(key)) + .load(ctx, &self.blobstore) .and_then(move |content_id| { content_id.ok_or(ErrorKind::ContentBlobByAliasMissing(key).into()) }) @@ -344,15 +345,17 @@ impl BlobRepo { pub fn get_file_content_by_alias( &self, ctx: CoreContext, - alias: Sha256, + sha256: Sha256, ) -> BoxStream { - filestore::fetch(&self.blobstore, ctx, &FetchKey::Sha256(alias)) - .and_then(move |stream| { - stream.ok_or(ErrorKind::ContentBlobByAliasMissing(alias).into()) - }) - .flatten_stream() - .map(FileBytes) - .boxify() + filestore::fetch( + &self.blobstore, + ctx, + &FetchKey::Aliased(Alias::Sha256(sha256)), + ) + .and_then(move |stream| stream.ok_or(ErrorKind::ContentBlobByAliasMissing(sha256).into())) + .flatten_stream() + .map(FileBytes) + .boxify() } /// Get Mercurial heads, which we approximate as publishing Bonsai Bookmarks. diff --git a/cmds/admin/filestore.rs b/cmds/admin/filestore.rs index b50b79b847..15b2af80f2 100644 --- a/cmds/admin/filestore.rs +++ b/cmds/admin/filestore.rs @@ -9,7 +9,7 @@ use cloned::cloned; use cmdlib::args; use context::CoreContext; use failure_ext::{err_msg, format_err, Result}; -use filestore::{self, FetchKey, StoreRequest}; +use filestore::{self, Alias, FetchKey, StoreRequest}; use futures::{Future, IntoFuture, Stream}; use futures_ext::{BoxFuture, FutureExt}; use mononoke_types::{ @@ -139,23 +139,31 @@ pub fn execute_command( .and_then({ cloned!(blobstore, ctx); move |metadata| { - use FetchKey::*; + use Alias::*; ( filestore::fetch( &blobstore, ctx.clone(), - &Canonical(metadata.content_id), + &FetchKey::Canonical(metadata.content_id), ) .then(Ok), - filestore::fetch(&blobstore, ctx.clone(), &Sha1(metadata.sha1)) - .then(Ok), - filestore::fetch(&blobstore, ctx.clone(), &Sha256(metadata.sha256)) - .then(Ok), filestore::fetch( &blobstore, ctx.clone(), - &GitSha1(metadata.git_sha1), + &FetchKey::Aliased(Sha1(metadata.sha1)), + ) + .then(Ok), + filestore::fetch( + &blobstore, + ctx.clone(), + &FetchKey::Aliased(Sha256(metadata.sha256)), + ) + .then(Ok), + filestore::fetch( + &blobstore, + ctx.clone(), + &FetchKey::Aliased(GitSha1(metadata.git_sha1)), ) .then(Ok), ) @@ -183,8 +191,8 @@ fn extract_fetch_key(matches: &ArgMatches<'_>) -> Result { match matches.value_of(ARG_KIND).unwrap() { "id" => Ok(FetchKey::Canonical(ContentId::from_str(id)?)), - "sha1" => Ok(FetchKey::Sha1(Sha1::from_str(id)?)), - "sha256" => Ok(FetchKey::Sha256(Sha256::from_str(id)?)), + "sha1" => Ok(FetchKey::Aliased(Alias::Sha1(Sha1::from_str(id)?))), + "sha256" => Ok(FetchKey::Aliased(Alias::Sha256(Sha256::from_str(id)?))), kind => Err(format_err!("Invalid kind: {}", kind)), } } diff --git a/cmds/aliasverify.rs b/cmds/aliasverify.rs index fdd0540dd8..67ff47ceac 100644 --- a/cmds/aliasverify.rs +++ b/cmds/aliasverify.rs @@ -38,11 +38,11 @@ use slog::Logger; use tokio::prelude::stream::iter_ok; use blobrepo::BlobRepo; -use blobstore::Blobstore; +use blobstore::Storable; use changesets::SqlChangesets; use cmdlib::args; use context::CoreContext; -use filestore::{self, FetchKey}; +use filestore::{self, Alias, AliasBlob, FetchKey}; use mononoke_types::{ hash::{self, Sha256}, ChangesetId, ContentAlias, ContentId, FileChange, RepositoryId, @@ -167,13 +167,12 @@ impl AliasVerification { cloned!(blobstore); move |meta| { if meta.sha256 == alias { - blobstore - .put( - ctx.clone(), - FetchKey::Sha256(meta.sha256).blobstore_key(), - ContentAlias::from_content_id(content_id).into_blob(), - ) - .left_future() + AliasBlob( + Alias::Sha256(meta.sha256), + ContentAlias::from_content_id(content_id), + ) + .store(ctx.clone(), &blobstore) + .left_future() } else { Err(format_err!( "Inconsistent hashes for {:?}, got {:?}, meta is {:?}", diff --git a/filestore/src/fetch_key.rs b/filestore/src/fetch_key.rs index e8f9d7c50d..ffa4b08f36 100644 --- a/filestore/src/fetch_key.rs +++ b/filestore/src/fetch_key.rs @@ -4,51 +4,87 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. -use blobstore::{Blobstore, Storable}; +use blobstore::{Blobstore, Loadable, Storable}; use context::CoreContext; use failure_ext::Error; -use futures::Future; +use futures::{Future, IntoFuture}; use futures_ext::{BoxFuture, FutureExt}; -use mononoke_types::{hash, ContentAlias, ContentId, MononokeId}; +use mononoke_types::{hash, ContentAlias, ContentId}; /// Key for fetching - we can access with any of the supported key types #[derive(Debug, Clone)] pub enum FetchKey { Canonical(ContentId), + Aliased(Alias), +} + +#[derive(Debug, Clone)] +pub enum Alias { Sha1(hash::Sha1), Sha256(hash::Sha256), GitSha1(hash::GitSha1), } -impl FetchKey { - pub fn blobstore_key(&self) -> String { - use FetchKey::*; +impl Loadable for FetchKey { + type Value = Option; + /// Return the canonical ID for a key. It doesn't check if the corresponding content actually + /// exists (its possible for an alias to exist before the ID if there was an interrupted store + /// operation). + fn load( + &self, + ctx: CoreContext, + blobstore: &B, + ) -> BoxFuture { match self { - Canonical(contentid) => contentid.blobstore_key(), - GitSha1(gitkey) => format!("alias.gitsha1.{}", gitkey.to_hex()), - Sha1(sha1) => format!("alias.sha1.{}", sha1.to_hex()), - Sha256(sha256) => format!("alias.sha256.{}", sha256.to_hex()), + FetchKey::Canonical(content_id) => Ok(Some(*content_id)).into_future().boxify(), + FetchKey::Aliased(alias) => alias.load(ctx, blobstore), } } } -pub struct AliasBlob(pub FetchKey, pub ContentAlias); +impl Alias { + fn blobstore_key(&self) -> String { + match self { + Alias::GitSha1(git_sha1) => format!("alias.gitsha1.{}", git_sha1.to_hex()), + Alias::Sha1(sha1) => format!("alias.sha1.{}", sha1.to_hex()), + Alias::Sha256(sha256) => format!("alias.sha256.{}", sha256.to_hex()), + } + } +} + +impl Loadable for Alias { + type Value = Option; + + fn load( + &self, + ctx: CoreContext, + blobstore: &B, + ) -> BoxFuture { + blobstore + .get(ctx, self.blobstore_key()) + .and_then(|maybe_alias| { + maybe_alias + .map(|blob| { + ContentAlias::from_bytes(blob.into_bytes().into()) + .map(|alias| alias.content_id()) + }) + .transpose() + }) + .boxify() + } +} + +pub struct AliasBlob(pub Alias, pub ContentAlias); impl Storable for AliasBlob { - type Key = FetchKey; + type Key = (); fn store( self, ctx: CoreContext, blobstore: &B, ) -> BoxFuture { - let key = self.0; - let alias = self.1; - - blobstore - .put(ctx, key.blobstore_key(), alias.into_blob()) - .map(move |_| key) - .boxify() + blobstore.put(ctx, self.0.blobstore_key(), self.1.into_blob()) } } diff --git a/filestore/src/finalize.rs b/filestore/src/finalize.rs index 15442aa06e..0104346623 100644 --- a/filestore/src/finalize.rs +++ b/filestore/src/finalize.rs @@ -12,9 +12,8 @@ use futures_ext::{try_left_future, FutureExt}; use mononoke_types::{BlobstoreValue, ContentAlias, ContentMetadata}; use crate::errors::{ErrorKind, InvalidHash}; -use crate::fetch_key::AliasBlob; +use crate::fetch_key::{Alias, AliasBlob}; use crate::prepare::Prepared; -use crate::FetchKey; use crate::StoreRequest; // Verify that a given $expected hash matches the $effective hash, and otherwise return a left @@ -78,13 +77,12 @@ pub fn finalize( let alias = ContentAlias::from_content_id(content_id); - let put_sha1 = AliasBlob(FetchKey::Sha1(sha1), alias.clone()).store(ctx.clone(), &blobstore); + let put_sha1 = AliasBlob(Alias::Sha1(sha1), alias.clone()).store(ctx.clone(), &blobstore); - let put_sha256 = - AliasBlob(FetchKey::Sha256(sha256), alias.clone()).store(ctx.clone(), &blobstore); + let put_sha256 = AliasBlob(Alias::Sha256(sha256), alias.clone()).store(ctx.clone(), &blobstore); let put_git_sha1 = - AliasBlob(FetchKey::GitSha1(git_sha1), alias.clone()).store(ctx.clone(), &blobstore); + AliasBlob(Alias::GitSha1(git_sha1), alias.clone()).store(ctx.clone(), &blobstore); let metadata = ContentMetadata { total_size, diff --git a/filestore/src/lib.rs b/filestore/src/lib.rs index 916535be19..37d599e257 100644 --- a/filestore/src/lib.rs +++ b/filestore/src/lib.rs @@ -14,9 +14,9 @@ use failure_ext::Error; use futures::{Future, IntoFuture, Stream}; use futures_ext::FutureExt; -use blobstore::Blobstore; +use blobstore::{Blobstore, Loadable}; use context::CoreContext; -use mononoke_types::{hash, ContentAlias, ContentId, ContentMetadata, FileContents, MononokeId}; +use mononoke_types::{hash, ContentId, ContentMetadata, FileContents, MononokeId}; mod alias; mod chunk; @@ -32,7 +32,7 @@ mod prepare; mod spawn; mod streamhash; -pub use fetch_key::FetchKey; +pub use fetch_key::{Alias, AliasBlob, FetchKey}; #[cfg(test)] mod test; @@ -142,30 +142,6 @@ impl StoreRequest { } } -/// Return the canonical ID for a key. It doesn't check if the corresponding content -/// actually exists (its possible for an alias to exist before the ID if there was an -/// interrupted store operation). -pub fn get_canonical_id( - blobstore: &B, - ctx: CoreContext, - key: &FetchKey, -) -> impl Future, Error = Error> { - match key { - FetchKey::Canonical(canonical) => Ok(Some(*canonical)).into_future().left_future(), - aliaskey => blobstore - .get(ctx, aliaskey.blobstore_key()) - .and_then(|maybe_alias| { - maybe_alias - .map(|blob| { - ContentAlias::from_bytes(blob.into_bytes().into()) - .map(|alias| alias.content_id()) - }) - .transpose() - }) - .right_future(), - } -} - /// Fetch the metadata for the underlying content. This will return None if the content does /// not exist. It might recompute metadata on the fly if the content exists but the metadata does /// not. @@ -174,7 +150,7 @@ pub fn get_metadata( ctx: CoreContext, key: &FetchKey, ) -> impl Future, Error = Error> { - get_canonical_id(blobstore, ctx.clone(), key).and_then({ + key.load(ctx.clone(), blobstore).and_then({ cloned!(blobstore, ctx); move |maybe_id| match maybe_id { Some(id) => metadata::get_metadata(blobstore, ctx, id).left_future(), @@ -190,7 +166,7 @@ pub fn exists( ctx: CoreContext, key: &FetchKey, ) -> impl Future { - get_canonical_id(blobstore, ctx.clone(), &key) + key.load(ctx.clone(), blobstore) .and_then({ cloned!(blobstore, ctx); move |maybe_id| maybe_id.map(|id| blobstore.is_present(ctx, id.blobstore_key())) @@ -209,7 +185,7 @@ pub fn fetch( ctx: CoreContext, key: &FetchKey, ) -> impl Future>, Error = Error> { - get_canonical_id(blobstore, ctx.clone(), key).and_then({ + key.load(ctx.clone(), blobstore).and_then({ cloned!(blobstore, ctx); move |content_id| match content_id { Some(content_id) => fetch::fetch(blobstore, ctx, content_id).left_future(), diff --git a/filestore/src/test/test_api.rs b/filestore/src/test/test_api.rs index 1a9299bddf..84a788e2c2 100644 --- a/filestore/src/test/test_api.rs +++ b/filestore/src/test/test_api.rs @@ -6,7 +6,7 @@ use super::{canonical, chunk, request}; use crate as filestore; -use crate::{errors, FetchKey, FilestoreConfig, StoreRequest}; +use crate::{errors, Alias, FetchKey, FilestoreConfig, StoreRequest}; use super::failing_blobstore::{FailingBlobstore, FailingBlobstoreError}; use assert_matches::assert_matches; @@ -138,9 +138,13 @@ fn filestore_put_get_sha1() -> Result<()> { ))?; let res = rt.block_on( - filestore::fetch(&blob, ctx, &FetchKey::Sha1(*HELLO_WORLD_SHA1)) - .map(|maybe_str| maybe_str.map(|s| s.concat2())) - .flatten(), + filestore::fetch( + &blob, + ctx, + &FetchKey::Aliased(Alias::Sha1(*HELLO_WORLD_SHA1)), + ) + .map(|maybe_str| maybe_str.map(|s| s.concat2())) + .flatten(), ); println!("res = {:#?}", res); @@ -167,9 +171,13 @@ fn filestore_put_get_git_sha1() -> Result<()> { ))?; let res = rt.block_on( - filestore::fetch(&blob, ctx, &FetchKey::GitSha1(*HELLO_WORLD_GIT_SHA1)) - .map(|maybe_str| maybe_str.map(|s| s.concat2())) - .flatten(), + filestore::fetch( + &blob, + ctx, + &FetchKey::Aliased(Alias::GitSha1(*HELLO_WORLD_GIT_SHA1)), + ) + .map(|maybe_str| maybe_str.map(|s| s.concat2())) + .flatten(), ); println!("res = {:#?}", res); @@ -196,9 +204,13 @@ fn filestore_put_get_sha256() -> Result<()> { ))?; let res = rt.block_on( - filestore::fetch(&blob, ctx, &FetchKey::Sha256(*HELLO_WORLD_SHA256)) - .map(|maybe_str| maybe_str.map(|s| s.concat2())) - .flatten(), + filestore::fetch( + &blob, + ctx, + &FetchKey::Aliased(Alias::Sha256(*HELLO_WORLD_SHA256)), + ) + .map(|maybe_str| maybe_str.map(|s| s.concat2())) + .flatten(), ); println!("res = {:#?}", res); @@ -649,7 +661,7 @@ fn filestore_test_missing_metadata() -> Result<()> { let res = rt.block_on(filestore::get_metadata( &blob, ctx.clone(), - &FetchKey::Sha1(*HELLO_WORLD_SHA1), + &FetchKey::Aliased(Alias::Sha1(*HELLO_WORLD_SHA1)), )); println!("res = {:#?}", res); assert_eq!(res?, None); @@ -657,7 +669,7 @@ fn filestore_test_missing_metadata() -> Result<()> { let res = rt.block_on(filestore::get_metadata( &blob, ctx.clone(), - &FetchKey::Sha256(*HELLO_WORLD_SHA256), + &FetchKey::Aliased(Alias::Sha256(*HELLO_WORLD_SHA256)), )); println!("res = {:#?}", res); assert_eq!(res?, None); @@ -665,7 +677,7 @@ fn filestore_test_missing_metadata() -> Result<()> { let res = rt.block_on(filestore::get_metadata( &blob, ctx.clone(), - &FetchKey::GitSha1(*HELLO_WORLD_GIT_SHA1), + &FetchKey::Aliased(Alias::GitSha1(*HELLO_WORLD_GIT_SHA1)), )); println!("res = {:#?}", res); assert_eq!(res?, None); diff --git a/filestore/src/test/test_invariants.rs b/filestore/src/test/test_invariants.rs index 9aae03d72e..cfa5e993a2 100644 --- a/filestore/src/test/test_invariants.rs +++ b/filestore/src/test/test_invariants.rs @@ -21,7 +21,7 @@ use crate::incremental_hash::{ hash_bytes, ContentIdIncrementalHasher, GitSha1IncrementalHasher, Sha1IncrementalHasher, Sha256IncrementalHasher, }; -use crate::{FetchKey, FilestoreConfig}; +use crate::{Alias, FetchKey, FilestoreConfig}; use super::failing_blobstore::FailingBlobstore; use super::request; @@ -39,9 +39,21 @@ fn check_consistency( let futs = vec![ filestore::fetch(blobstore, ctx.clone(), &FetchKey::Canonical(content_id)), - filestore::fetch(blobstore, ctx.clone(), &FetchKey::Sha1(sha1)), - filestore::fetch(blobstore, ctx.clone(), &FetchKey::Sha256(sha256)), - filestore::fetch(blobstore, ctx.clone(), &FetchKey::GitSha1(git_sha1)), + filestore::fetch( + blobstore, + ctx.clone(), + &FetchKey::Aliased(Alias::Sha1(sha1)), + ), + filestore::fetch( + blobstore, + ctx.clone(), + &FetchKey::Aliased(Alias::Sha256(sha256)), + ), + filestore::fetch( + blobstore, + ctx.clone(), + &FetchKey::Aliased(Alias::GitSha1(git_sha1)), + ), ]; let futs: Vec<_> = futs.into_iter().map(|f| f.map(|r| r.is_some())).collect();