edenapi: propagate missing content errors instead of panicking

Summary:
Remove the panicking APIs in D29647203 and instead propagate "missing content attribute" errors.

It was difficult to make these changes at the bottom of my diff stack, so I've added them here instead.

Reviewed By: DurhamG

Differential Revision: D29670495

fbshipit-source-id: 952ed4913a413c39ac3dff14a22f56e4766512ff
This commit is contained in:
Meyer Jacobs 2021-07-13 15:15:39 -07:00 committed by Facebook GitHub Bot
parent 3be8fcb6f9
commit e742e3befc
5 changed files with 100 additions and 79 deletions

View File

@ -340,6 +340,9 @@ fn cmd_file_check(args: DataCheckArgs) -> Result<()> {
Err(FileError::Lfs(..)) => {
println!("{} [LFS pointer]", entry.key());
}
Err(FileError::MissingContent(..)) => {
println!("{} [Missing content]", entry.key());
}
}
}
Ok(())

View File

@ -35,6 +35,8 @@ pub enum FileError {
Redacted(Key, Bytes),
#[error("Can't validate filenode hash for {0} because it is an LFS pointer")]
Lfs(Key, Bytes),
#[error("Content for {0} is unavailable")]
MissingContent(Key),
}
impl FileError {
@ -45,6 +47,7 @@ impl FileError {
Corrupt(InvalidHgId { data, .. }) => data,
Redacted(_, data) => data,
Lfs(_, data) => data,
MissingContent(_) => panic!("no content attribute available for FileEntry"),
}
.clone()
}
@ -66,6 +69,68 @@ pub struct FileContent {
pub metadata: Metadata,
}
impl FileContent {
/// Get this entry's data. Checks data integrity but allows hash mismatches
/// if the content is redacted or contains an LFS pointer.
pub fn data(&self, key: &Key, parents: Parents) -> Result<Bytes, FileError> {
use FileError::*;
self.data_checked(key, parents).or_else(|e| match e {
Corrupt(_) => Err(e),
Redacted(..) => Ok(e.data()),
Lfs(..) => Ok(e.data()),
MissingContent(_) => Err(e),
})
}
/// Get this entry's data after verifying the hgid hash.
///
/// This method will return an error if the computed hash doesn't match
/// the provided hash, regardless of the reason. Such mismatches are
/// sometimes expected (e.g., for redacted files or LFS pointers), so most
/// application logic should call `FileEntry::data` instead, which allows
/// these exceptions. `FileEntry::data_checked` should only be used when
/// strict filenode validation is required.
pub fn data_checked(&self, key: &Key, parents: Parents) -> Result<Bytes, FileError> {
// TODO(meyer): Clean this up, make LFS Pointers and redaction strongly typed all the way from here through scmstore
let data = &self.hg_file_blob;
// TODO(T48685378): Handle redacted content in a less hacky way.
if data.len() == REDACTED_TOMBSTONE.len() && data == REDACTED_TOMBSTONE {
return Err(FileError::Redacted(key.clone(), data.clone()));
}
// We can't check the hash of an LFS blob since it is computed using the
// full file content, but the file entry only contains the LFS pointer.
if self.metadata().is_lfs() {
return Err(FileError::Lfs(key.clone(), data.clone()));
}
let computed = HgId::from_content(&data, parents);
if computed != key.hgid {
let err = InvalidHgId {
expected: key.hgid,
computed,
parents,
data: data.clone(),
};
return Err(FileError::Corrupt(err));
}
Ok(data.clone())
}
/// Get this entry's data without verifying the hgid hash.
pub fn data_unchecked(&self) -> &Bytes {
&self.hg_file_blob
}
/// Get this entry's metadata.
pub fn metadata(&self) -> &Metadata {
&self.metadata
}
}
/// Structure representing source control file content on the wire.
/// Includes the information required to add the data to a mutable store,
/// along with the parents for hash validation.
@ -100,77 +165,6 @@ impl FileEntry {
&self.key
}
/// Get this entry's data. Checks data integrity but allows hash mismatches
/// if the content is redacted or contains an LFS pointer.
pub fn data(&self) -> Result<Bytes, FileError> {
use FileError::*;
self.data_checked().or_else(|e| match e {
Corrupt(_) => Err(e),
Redacted(..) => Ok(e.data()),
Lfs(..) => Ok(e.data()),
})
}
/// Get this entry's data after verifying the hgid hash.
///
/// This method will return an error if the computed hash doesn't match
/// the provided hash, regardless of the reason. Such mismatches are
/// sometimes expected (e.g., for redacted files or LFS pointers), so most
/// application logic should call `FileEntry::data` instead, which allows
/// these exceptions. `FileEntry::data_checked` should only be used when
/// strict filenode validation is required.
pub fn data_checked(&self) -> Result<Bytes, FileError> {
// TODO(meyer): Clean this up, make LFS Pointers and redaction strongly typed all the way from here through scmstore
let data = self
.content
.as_ref()
.map(|c| &c.hg_file_blob)
.expect("no content available");
// TODO(T48685378): Handle redacted content in a less hacky way.
if data.len() == REDACTED_TOMBSTONE.len() && data == REDACTED_TOMBSTONE {
return Err(FileError::Redacted(self.key.clone(), data.clone()));
}
// We can't check the hash of an LFS blob since it is computed using the
// full file content, but the file entry only contains the LFS pointer.
if self.metadata().is_lfs() {
return Err(FileError::Lfs(self.key.clone(), data.clone()));
}
let computed = HgId::from_content(&data, self.parents);
if computed != self.key.hgid {
let err = InvalidHgId {
expected: self.key.hgid,
computed,
parents: self.parents,
data: data.clone(),
};
return Err(FileError::Corrupt(err));
}
Ok(data.clone())
}
/// Get this entry's data without verifying the hgid hash.
pub fn data_unchecked(&self) -> Bytes {
self.content
.as_ref()
.map(|c| c.hg_file_blob.clone())
.expect("no content available")
}
/// Get this entry's metadata.
pub fn metadata(&self) -> &Metadata {
// TODO(meyer): Does this differ from the content in the envelope?
&self
.content
.as_ref()
.map(|c| &c.metadata)
.expect("no content available")
}
pub fn parents(&self) -> &Parents {
&self.parents
}
@ -178,6 +172,25 @@ impl FileEntry {
pub fn aux_data(&self) -> Option<&FileAuxData> {
self.aux_data.as_ref()
}
pub fn content(&self) -> Option<&FileContent> {
self.content.as_ref()
}
pub fn data(&self) -> Result<Bytes, FileError> {
self.content
.as_ref()
.ok_or_else(|| FileError::MissingContent(self.key().clone()))?
.data(self.key(), *self.parents())
}
pub fn metadata(&self) -> Result<&Metadata, FileError> {
Ok(self
.content
.as_ref()
.ok_or_else(|| FileError::MissingContent(self.key().clone()))?
.metadata())
}
}
#[cfg(any(test, feature = "for-tests"))]

View File

@ -84,7 +84,7 @@ pub trait HgIdMutableDeltaStore: HgIdDataStore + Send + Sync {
base: None,
key: entry.key().clone(),
};
self.add(&delta, entry.metadata())
self.add(&delta, entry.metadata()?)
}
fn add_tree(&self, entry: &TreeEntry) -> Result<()> {

View File

@ -355,6 +355,7 @@ impl std::convert::TryFrom<Entry> for crate::memcache::McData {
}
}
// TODO(meyer): Remove these infallible conversions, replace with fallible or inherent in LazyFile.
impl std::convert::From<TreeEntry> for Entry {
fn from(v: TreeEntry) -> Self {
Entry::new(
@ -370,8 +371,12 @@ impl std::convert::From<FileEntry> for Entry {
fn from(v: FileEntry) -> Self {
Entry::new(
v.key().clone(),
v.data_unchecked().into(),
v.metadata().clone(),
v.content()
.expect("missing content")
.data_unchecked()
.clone()
.into(),
v.metadata().expect("missing content").clone(),
)
}
}

View File

@ -1047,7 +1047,7 @@ impl LazyFile {
flags: None,
},
ContentStore(_, ref meta) => meta.clone(),
EdenApi(ref entry) => entry.metadata().clone(),
EdenApi(ref entry) => entry.metadata()?.clone(),
Memcache(ref entry) => entry.metadata.clone(),
})
}
@ -1061,7 +1061,7 @@ impl LazyFile {
EdenApi(ref entry) => Some(Entry::new(
key,
entry.data()?.into(),
entry.metadata().clone(),
entry.metadata()?.clone(),
)),
// TODO(meyer): We shouldn't ever need to replace the key with Memcache, can probably just clone this.
Memcache(ref entry) => Some({
@ -1104,7 +1104,7 @@ impl TryFrom<FileEntry> for LfsPointersEntry {
type Error = Error;
fn try_from(e: FileEntry) -> Result<Self, Self::Error> {
if e.metadata().is_lfs() {
if e.metadata()?.is_lfs() {
Ok(LfsPointersEntry::from_bytes(e.data()?, e.key().hgid)?)
} else {
bail!("failed to convert EdenApi FileEntry to LFS pointer, but is_lfs is false")
@ -1489,8 +1489,8 @@ impl FetchState {
let aux_data: FileAuxData = aux_data.clone().into();
self.found_attributes(key.clone(), aux_data.into(), None);
}
if entry.content.is_some() {
if entry.metadata().is_lfs() {
if let Some(content) = entry.content() {
if content.metadata().is_lfs() {
match entry.try_into() {
Ok(ptr) => self.found_pointer(key, ptr, StoreType::Shared),
Err(err) => self.errors.keyed_error(key, err),