edenapi: skip hash check for LFS files

Summary:
EdenAPI always checks the integrity of filenode hashes before returning file data to the application. In the case of LFS files, this resulted in errors because the filenode hash is computed using the full file content, but the blob from the server only contains an LFS pointer.

Fix the bug by exempting LFS blobs from filenode integrity checks. (If integrity checks for LFS blobs are desired, the LFS code should be able to do this on its own since LFS blobs are content-addressed.)

Reviewed By: quark-zju

Differential Revision: D24145027

fbshipit-source-id: d7d86e2b912f267eba4120d1f5186908c3f4e9e3
This commit is contained in:
Arun Kulshreshtha 2020-10-06 16:16:59 -07:00 committed by Facebook GitHub Bot
parent ee82a84a29
commit 7576d60c9c
2 changed files with 24 additions and 4 deletions

View File

@ -268,11 +268,14 @@ fn cmd_file_check(args: DataCheckArgs) -> Result<()> {
for entry in entries.into_iter().filter_map(to_api) {
match entry.data() {
Ok(_) => {}
Err(FileError::Corrupt(e)) => {
println!("{} [Invalid hash] {}", entry.key(), e);
}
Err(FileError::Redacted(..)) => {
println!("{} [Contents redacted]", entry.key());
}
Err(FileError::Corrupt(e)) => {
println!("{} [Invalid hash] {}", entry.key(), e);
Err(FileError::Lfs(..)) => {
println!("{} [LFS pointer]", entry.key());
}
}
}

View File

@ -33,6 +33,8 @@ pub enum FileError {
/// redacted data.
#[error("Content for {0} is redacted")]
Redacted(Key, Bytes),
#[error("Can't validate filenode hash for {0} because it is an LFS pointer")]
Lfs(Key, Bytes),
}
impl FileError {
@ -40,8 +42,9 @@ impl FileError {
pub fn data(&self) -> Bytes {
use FileError::*;
match self {
Redacted(_, data) => data,
Corrupt(InvalidHgId { data, .. }) => data,
Redacted(_, data) => data,
Lfs(_, data) => data,
}
.clone()
}
@ -73,22 +76,36 @@ impl FileEntry {
}
/// Get this entry's data. Checks data integrity but allows hash mismatches
/// if the content is redacted.
/// if the content is redacted or contains an LFS pointer.
pub fn data(&self) -> Result<Bytes, FileError> {
use FileError::*;
self.data_checked().or_else(|e| match e {
Corrupt(_) => Err(e),
Redacted(..) => Ok(e.data()),
Lfs(..) => Ok(e.data()),
})
}
/// Get this entry's data after verifying the hgid hash.
///
/// This method will return an error if the computed hash doesn't match
/// the provided hash, regardless of the reason. Such mismatches are
/// sometimes expected (e.g., for redacted files or LFS pointers), so most
/// application logic should call `FileEntry::data` instead, which allows
/// these exceptions. `FileEntry::data_checked` should only be used when
/// strict filenode validation is required.
pub fn data_checked(&self) -> Result<Bytes, FileError> {
// TODO(T48685378): Handle redacted content in a less hacky way.
if self.data.len() == REDACTED_TOMBSTONE.len() && self.data == REDACTED_TOMBSTONE {
return Err(FileError::Redacted(self.key.clone(), self.data.clone()));
}
// We can't check the hash of an LFS blob since it is computed using the
// full file content, but the file entry only contains the LFS pointer.
if self.metadata.is_lfs() {
return Err(FileError::Lfs(self.key.clone(), self.data.clone()));
}
let computed = HgId::from_content(&self.data, self.parents);
if computed != self.key.hgid {
let err = InvalidHgId {