From 963e703210c6b6dfe4b3ee42ab21fe5cc95ada30 Mon Sep 17 00:00:00 2001 From: Yan Soares Couto Date: Wed, 28 Jul 2021 04:23:32 -0700 Subject: [PATCH] Deduplicate uploads of raw file blobs Summary: When uploading exactly the same file more than once, let's only upload each blob once. This is already done on the function that also uploads filenodes, but not here. Reviewed By: liubov-dmitrieva Differential Revision: D29941483 fbshipit-source-id: ef8509223a11816c1b6f1e7f376d05b96f074340 --- .../bindings/modules/pyedenapi/src/pyext.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/eden/scm/edenscmnative/bindings/modules/pyedenapi/src/pyext.rs b/eden/scm/edenscmnative/bindings/modules/pyedenapi/src/pyext.rs index 3ede8f618f..af3a573ee8 100644 --- a/eden/scm/edenscmnative/bindings/modules/pyedenapi/src/pyext.rs +++ b/eden/scm/edenscmnative/bindings/modules/pyedenapi/src/pyext.rs @@ -530,14 +530,14 @@ pub trait EdenApiPyExt: EdenApi { let store = as_legacystore(py, store)?; let downcast_error = "incorrect upload token, failed to downcast 'token.data.id' to 'AnyId::AnyFileContentId::ContentId' type"; - let (content_ids, data): (Vec<_>, Vec<_>) = keys + let (content_ids, mut data): (Vec<_>, Vec<_>) = keys .into_iter() .map(|key| { let content = store.get_file_content(&key).map_pyerr(py)?; match content { Some(v) => { let content_id = calc_contentid(&v); - Ok((content_id, (AnyFileContentId::ContentId(content_id), v))) + Ok((content_id, (content_id, v))) } None => Err(format_err!( "failed to fetch file content for the key '{}'", @@ -551,6 +551,15 @@ pub trait EdenApiPyExt: EdenApi { .into_iter() .unzip(); + // Deduplicate upload data + let mut uniques = BTreeSet::new(); + data.retain(|(content_id, _)| uniques.insert(*content_id)); + let data = data + .into_iter() + .map(|(content_id, data)| (AnyFileContentId::ContentId(content_id), data)) + .collect(); + + let (responses, stats) = py .allow_threads(|| { block_unless_interrupted(async move {