mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 16:57:49 +03:00
edenapi: skip hash check for LFS files
Summary: EdenAPI always checks the integrity of filenode hashes before returning file data to the application. In the case of LFS files, this resulted in errors because the filenode hash is computed using the full file content, but the blob from the server only contains an LFS pointer. Fix the bug by exempting LFS blobs from filenode integrity checks. (If integrity checks for LFS blobs are desired, the LFS code should be able to do this on its own since LFS blobs are content-addressed.) Reviewed By: quark-zju Differential Revision: D24145027 fbshipit-source-id: d7d86e2b912f267eba4120d1f5186908c3f4e9e3
This commit is contained in:
parent
ee82a84a29
commit
7576d60c9c
@ -268,11 +268,14 @@ fn cmd_file_check(args: DataCheckArgs) -> Result<()> {
|
||||
for entry in entries.into_iter().filter_map(to_api) {
|
||||
match entry.data() {
|
||||
Ok(_) => {}
|
||||
Err(FileError::Corrupt(e)) => {
|
||||
println!("{} [Invalid hash] {}", entry.key(), e);
|
||||
}
|
||||
Err(FileError::Redacted(..)) => {
|
||||
println!("{} [Contents redacted]", entry.key());
|
||||
}
|
||||
Err(FileError::Corrupt(e)) => {
|
||||
println!("{} [Invalid hash] {}", entry.key(), e);
|
||||
Err(FileError::Lfs(..)) => {
|
||||
println!("{} [LFS pointer]", entry.key());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,8 @@ pub enum FileError {
|
||||
/// redacted data.
|
||||
#[error("Content for {0} is redacted")]
|
||||
Redacted(Key, Bytes),
|
||||
#[error("Can't validate filenode hash for {0} because it is an LFS pointer")]
|
||||
Lfs(Key, Bytes),
|
||||
}
|
||||
|
||||
impl FileError {
|
||||
@ -40,8 +42,9 @@ impl FileError {
|
||||
pub fn data(&self) -> Bytes {
|
||||
use FileError::*;
|
||||
match self {
|
||||
Redacted(_, data) => data,
|
||||
Corrupt(InvalidHgId { data, .. }) => data,
|
||||
Redacted(_, data) => data,
|
||||
Lfs(_, data) => data,
|
||||
}
|
||||
.clone()
|
||||
}
|
||||
@ -73,22 +76,36 @@ impl FileEntry {
|
||||
}
|
||||
|
||||
/// Get this entry's data. Checks data integrity but allows hash mismatches
|
||||
/// if the content is redacted.
|
||||
/// if the content is redacted or contains an LFS pointer.
|
||||
pub fn data(&self) -> Result<Bytes, FileError> {
|
||||
use FileError::*;
|
||||
self.data_checked().or_else(|e| match e {
|
||||
Corrupt(_) => Err(e),
|
||||
Redacted(..) => Ok(e.data()),
|
||||
Lfs(..) => Ok(e.data()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get this entry's data after verifying the hgid hash.
|
||||
///
|
||||
/// This method will return an error if the computed hash doesn't match
|
||||
/// the provided hash, regardless of the reason. Such mismatches are
|
||||
/// sometimes expected (e.g., for redacted files or LFS pointers), so most
|
||||
/// application logic should call `FileEntry::data` instead, which allows
|
||||
/// these exceptions. `FileEntry::data_checked` should only be used when
|
||||
/// strict filenode validation is required.
|
||||
pub fn data_checked(&self) -> Result<Bytes, FileError> {
|
||||
// TODO(T48685378): Handle redacted content in a less hacky way.
|
||||
if self.data.len() == REDACTED_TOMBSTONE.len() && self.data == REDACTED_TOMBSTONE {
|
||||
return Err(FileError::Redacted(self.key.clone(), self.data.clone()));
|
||||
}
|
||||
|
||||
// We can't check the hash of an LFS blob since it is computed using the
|
||||
// full file content, but the file entry only contains the LFS pointer.
|
||||
if self.metadata.is_lfs() {
|
||||
return Err(FileError::Lfs(self.key.clone(), self.data.clone()));
|
||||
}
|
||||
|
||||
let computed = HgId::from_content(&self.data, self.parents);
|
||||
if computed != self.key.hgid {
|
||||
let err = InvalidHgId {
|
||||
|
Loading…
Reference in New Issue
Block a user