Deduplicate uploads of raw file blobs

Summary:
When uploading exactly the same file more than once, let's only upload each blob once.

This is already done on the function that also uploads filenodes, but not here.

Reviewed By: liubov-dmitrieva

Differential Revision: D29941483

fbshipit-source-id: ef8509223a11816c1b6f1e7f376d05b96f074340
This commit is contained in:
Yan Soares Couto 2021-07-28 04:23:32 -07:00 committed by Facebook GitHub Bot
parent 7652fc3d4a
commit 963e703210

View File

@ -530,14 +530,14 @@ pub trait EdenApiPyExt: EdenApi {
let store = as_legacystore(py, store)?;
let downcast_error = "incorrect upload token, failed to downcast 'token.data.id' to 'AnyId::AnyFileContentId::ContentId' type";
let (content_ids, data): (Vec<_>, Vec<_>) = keys
let (content_ids, mut data): (Vec<_>, Vec<_>) = keys
.into_iter()
.map(|key| {
let content = store.get_file_content(&key).map_pyerr(py)?;
match content {
Some(v) => {
let content_id = calc_contentid(&v);
Ok((content_id, (AnyFileContentId::ContentId(content_id), v)))
Ok((content_id, (content_id, v)))
}
None => Err(format_err!(
"failed to fetch file content for the key '{}'",
@ -551,6 +551,15 @@ pub trait EdenApiPyExt: EdenApi {
.into_iter()
.unzip();
// Deduplicate upload data
let mut uniques = BTreeSet::new();
data.retain(|(content_id, _)| uniques.insert(*content_id));
let data = data
.into_iter()
.map(|(content_id, data)| (AnyFileContentId::ContentId(content_id), data))
.collect();
let (responses, stats) = py
.allow_threads(|| {
block_unless_interrupted(async move {