From e742e3befc749fbf76356c27ca48f64a1fd6760d Mon Sep 17 00:00:00 2001 From: Meyer Jacobs Date: Tue, 13 Jul 2021 15:15:39 -0700 Subject: [PATCH] edenapi: propagate missing content errors instead of panicking Summary: Remove the panicking APIs in D29647203 and instead propagate "missing content attribute" errors. It was difficult to make these changes at the bottom of my diff stack, so I've added them here instead. Reviewed By: DurhamG Differential Revision: D29670495 fbshipit-source-id: 952ed4913a413c39ac3dff14a22f56e4766512ff --- .../lib/edenapi/tools/read_res/src/main.rs | 3 + eden/scm/lib/edenapi/types/src/file.rs | 155 ++++++++++-------- eden/scm/lib/revisionstore/src/datastore.rs | 2 +- .../revisionstore/src/indexedlogdatastore.rs | 9 +- .../lib/revisionstore/src/scmstore/file.rs | 10 +- 5 files changed, 100 insertions(+), 79 deletions(-) diff --git a/eden/scm/lib/edenapi/tools/read_res/src/main.rs b/eden/scm/lib/edenapi/tools/read_res/src/main.rs index 64130bdd19..d56c191ca2 100644 --- a/eden/scm/lib/edenapi/tools/read_res/src/main.rs +++ b/eden/scm/lib/edenapi/tools/read_res/src/main.rs @@ -340,6 +340,9 @@ fn cmd_file_check(args: DataCheckArgs) -> Result<()> { Err(FileError::Lfs(..)) => { println!("{} [LFS pointer]", entry.key()); } + Err(FileError::MissingContent(..)) => { + println!("{} [Missing content]", entry.key()); + } } } Ok(()) diff --git a/eden/scm/lib/edenapi/types/src/file.rs b/eden/scm/lib/edenapi/types/src/file.rs index 7ce0f71f56..1c23b33a1b 100644 --- a/eden/scm/lib/edenapi/types/src/file.rs +++ b/eden/scm/lib/edenapi/types/src/file.rs @@ -35,6 +35,8 @@ pub enum FileError { Redacted(Key, Bytes), #[error("Can't validate filenode hash for {0} because it is an LFS pointer")] Lfs(Key, Bytes), + #[error("Content for {0} is unavailable")] + MissingContent(Key), } impl FileError { @@ -45,6 +47,7 @@ impl FileError { Corrupt(InvalidHgId { data, .. }) => data, Redacted(_, data) => data, Lfs(_, data) => data, + MissingContent(_) => panic!("no content attribute available for FileEntry"), } .clone() } @@ -66,6 +69,68 @@ pub struct FileContent { pub metadata: Metadata, } +impl FileContent { + /// Get this entry's data. Checks data integrity but allows hash mismatches + /// if the content is redacted or contains an LFS pointer. + pub fn data(&self, key: &Key, parents: Parents) -> Result { + use FileError::*; + self.data_checked(key, parents).or_else(|e| match e { + Corrupt(_) => Err(e), + Redacted(..) => Ok(e.data()), + Lfs(..) => Ok(e.data()), + MissingContent(_) => Err(e), + }) + } + + /// Get this entry's data after verifying the hgid hash. + /// + /// This method will return an error if the computed hash doesn't match + /// the provided hash, regardless of the reason. Such mismatches are + /// sometimes expected (e.g., for redacted files or LFS pointers), so most + /// application logic should call `FileEntry::data` instead, which allows + /// these exceptions. `FileEntry::data_checked` should only be used when + /// strict filenode validation is required. + pub fn data_checked(&self, key: &Key, parents: Parents) -> Result { + // TODO(meyer): Clean this up, make LFS Pointers and redaction strongly typed all the way from here through scmstore + let data = &self.hg_file_blob; + + // TODO(T48685378): Handle redacted content in a less hacky way. + if data.len() == REDACTED_TOMBSTONE.len() && data == REDACTED_TOMBSTONE { + return Err(FileError::Redacted(key.clone(), data.clone())); + } + + // We can't check the hash of an LFS blob since it is computed using the + // full file content, but the file entry only contains the LFS pointer. + if self.metadata().is_lfs() { + return Err(FileError::Lfs(key.clone(), data.clone())); + } + + let computed = HgId::from_content(&data, parents); + if computed != key.hgid { + let err = InvalidHgId { + expected: key.hgid, + computed, + parents, + data: data.clone(), + }; + + return Err(FileError::Corrupt(err)); + } + + Ok(data.clone()) + } + + /// Get this entry's data without verifying the hgid hash. + pub fn data_unchecked(&self) -> &Bytes { + &self.hg_file_blob + } + + /// Get this entry's metadata. + pub fn metadata(&self) -> &Metadata { + &self.metadata + } +} + /// Structure representing source control file content on the wire. /// Includes the information required to add the data to a mutable store, /// along with the parents for hash validation. @@ -100,77 +165,6 @@ impl FileEntry { &self.key } - /// Get this entry's data. Checks data integrity but allows hash mismatches - /// if the content is redacted or contains an LFS pointer. - pub fn data(&self) -> Result { - use FileError::*; - self.data_checked().or_else(|e| match e { - Corrupt(_) => Err(e), - Redacted(..) => Ok(e.data()), - Lfs(..) => Ok(e.data()), - }) - } - - /// Get this entry's data after verifying the hgid hash. - /// - /// This method will return an error if the computed hash doesn't match - /// the provided hash, regardless of the reason. Such mismatches are - /// sometimes expected (e.g., for redacted files or LFS pointers), so most - /// application logic should call `FileEntry::data` instead, which allows - /// these exceptions. `FileEntry::data_checked` should only be used when - /// strict filenode validation is required. - pub fn data_checked(&self) -> Result { - // TODO(meyer): Clean this up, make LFS Pointers and redaction strongly typed all the way from here through scmstore - let data = self - .content - .as_ref() - .map(|c| &c.hg_file_blob) - .expect("no content available"); - - // TODO(T48685378): Handle redacted content in a less hacky way. - if data.len() == REDACTED_TOMBSTONE.len() && data == REDACTED_TOMBSTONE { - return Err(FileError::Redacted(self.key.clone(), data.clone())); - } - - // We can't check the hash of an LFS blob since it is computed using the - // full file content, but the file entry only contains the LFS pointer. - if self.metadata().is_lfs() { - return Err(FileError::Lfs(self.key.clone(), data.clone())); - } - - let computed = HgId::from_content(&data, self.parents); - if computed != self.key.hgid { - let err = InvalidHgId { - expected: self.key.hgid, - computed, - parents: self.parents, - data: data.clone(), - }; - - return Err(FileError::Corrupt(err)); - } - - Ok(data.clone()) - } - - /// Get this entry's data without verifying the hgid hash. - pub fn data_unchecked(&self) -> Bytes { - self.content - .as_ref() - .map(|c| c.hg_file_blob.clone()) - .expect("no content available") - } - - /// Get this entry's metadata. - pub fn metadata(&self) -> &Metadata { - // TODO(meyer): Does this differ from the content in the envelope? - &self - .content - .as_ref() - .map(|c| &c.metadata) - .expect("no content available") - } - pub fn parents(&self) -> &Parents { &self.parents } @@ -178,6 +172,25 @@ impl FileEntry { pub fn aux_data(&self) -> Option<&FileAuxData> { self.aux_data.as_ref() } + + pub fn content(&self) -> Option<&FileContent> { + self.content.as_ref() + } + + pub fn data(&self) -> Result { + self.content + .as_ref() + .ok_or_else(|| FileError::MissingContent(self.key().clone()))? + .data(self.key(), *self.parents()) + } + + pub fn metadata(&self) -> Result<&Metadata, FileError> { + Ok(self + .content + .as_ref() + .ok_or_else(|| FileError::MissingContent(self.key().clone()))? + .metadata()) + } } #[cfg(any(test, feature = "for-tests"))] diff --git a/eden/scm/lib/revisionstore/src/datastore.rs b/eden/scm/lib/revisionstore/src/datastore.rs index 0c84140507..68eb533376 100644 --- a/eden/scm/lib/revisionstore/src/datastore.rs +++ b/eden/scm/lib/revisionstore/src/datastore.rs @@ -84,7 +84,7 @@ pub trait HgIdMutableDeltaStore: HgIdDataStore + Send + Sync { base: None, key: entry.key().clone(), }; - self.add(&delta, entry.metadata()) + self.add(&delta, entry.metadata()?) } fn add_tree(&self, entry: &TreeEntry) -> Result<()> { diff --git a/eden/scm/lib/revisionstore/src/indexedlogdatastore.rs b/eden/scm/lib/revisionstore/src/indexedlogdatastore.rs index b4573cccea..27590a1897 100644 --- a/eden/scm/lib/revisionstore/src/indexedlogdatastore.rs +++ b/eden/scm/lib/revisionstore/src/indexedlogdatastore.rs @@ -355,6 +355,7 @@ impl std::convert::TryFrom for crate::memcache::McData { } } +// TODO(meyer): Remove these infallible conversions, replace with fallible or inherent in LazyFile. impl std::convert::From for Entry { fn from(v: TreeEntry) -> Self { Entry::new( @@ -370,8 +371,12 @@ impl std::convert::From for Entry { fn from(v: FileEntry) -> Self { Entry::new( v.key().clone(), - v.data_unchecked().into(), - v.metadata().clone(), + v.content() + .expect("missing content") + .data_unchecked() + .clone() + .into(), + v.metadata().expect("missing content").clone(), ) } } diff --git a/eden/scm/lib/revisionstore/src/scmstore/file.rs b/eden/scm/lib/revisionstore/src/scmstore/file.rs index f04a698e55..5449a4e4b4 100644 --- a/eden/scm/lib/revisionstore/src/scmstore/file.rs +++ b/eden/scm/lib/revisionstore/src/scmstore/file.rs @@ -1047,7 +1047,7 @@ impl LazyFile { flags: None, }, ContentStore(_, ref meta) => meta.clone(), - EdenApi(ref entry) => entry.metadata().clone(), + EdenApi(ref entry) => entry.metadata()?.clone(), Memcache(ref entry) => entry.metadata.clone(), }) } @@ -1061,7 +1061,7 @@ impl LazyFile { EdenApi(ref entry) => Some(Entry::new( key, entry.data()?.into(), - entry.metadata().clone(), + entry.metadata()?.clone(), )), // TODO(meyer): We shouldn't ever need to replace the key with Memcache, can probably just clone this. Memcache(ref entry) => Some({ @@ -1104,7 +1104,7 @@ impl TryFrom for LfsPointersEntry { type Error = Error; fn try_from(e: FileEntry) -> Result { - if e.metadata().is_lfs() { + if e.metadata()?.is_lfs() { Ok(LfsPointersEntry::from_bytes(e.data()?, e.key().hgid)?) } else { bail!("failed to convert EdenApi FileEntry to LFS pointer, but is_lfs is false") @@ -1489,8 +1489,8 @@ impl FetchState { let aux_data: FileAuxData = aux_data.clone().into(); self.found_attributes(key.clone(), aux_data.into(), None); } - if entry.content.is_some() { - if entry.metadata().is_lfs() { + if let Some(content) = entry.content() { + if content.metadata().is_lfs() { match entry.try_into() { Ok(ptr) => self.found_pointer(key, ptr, StoreType::Shared), Err(err) => self.errors.keyed_error(key, err),