implement batch blob fetching API in backingstore crate

Summary:
NOTE: This stack works together, they are separated for easier reviewing. Check D21723465 if you want to read this stack in one place.

This diff implements getBlobBatch in `backingstore` crate that is able to process multiple blob import request at once. This will help EdenFS to process blob import request efficiently. See following diffs in the stack for usage.

To import a list of import requests, the function first check if the file exists locally. When it is already in local store, the function will call the provided `resolve` function to send the blob back. Then it proceeds to send **ONE** request to remote store (in our case, EdenAPI) for all these missing files if this import permits remote importing.

We use the callback style `resolve` parameter because we want to awake waiting threads as soon as the blob is ready, so we don't waste time on unnecessary waiting.

Reviewed By: xavierd

Differential Revision: D21697580

fbshipit-source-id: b550accf6f6163cf6f2e9be6b628e46f44076c37
This commit is contained in:
Zeyi (Rice) Fan 2020-06-05 15:38:25 -07:00 committed by Facebook GitHub Bot
parent 18e012bb19
commit cfa5945749
7 changed files with 154 additions and 16 deletions

View File

@ -7,7 +7,7 @@
* This file is generated with cbindgen. Please run `./tools/cbindgen.sh` to
* update this file.
*
* @generated SignedSource<<0c58deb4a184da0c06e390219f491aa6>>
* @generated SignedSource<<73be5ea0efc629a3d97af92a1eb36494>>
*
*/
@ -101,6 +101,12 @@ operator folly::ByteRange() const {
}
};
struct RustRequest {
const uint8_t *path;
uintptr_t length;
const uint8_t *node;
};
struct RustTreeEntry {
RustCBytes hash;
RustCBytes name;
@ -128,6 +134,13 @@ RustCFallibleBase rust_backingstore_get_blob(RustBackingStore *store,
uintptr_t node_len,
bool local);
void rust_backingstore_get_blob_batch(RustBackingStore *store,
const RustRequest *requests,
uintptr_t size,
bool local,
void *data,
void (*resolve)(void*, uintptr_t, RustCFallibleBase));
RustCFallibleBase rust_backingstore_get_tree(RustBackingStore *store,
const uint8_t *node,
uintptr_t node_len);

View File

@ -7,6 +7,7 @@
use crate::remotestore::FakeRemoteStore;
use crate::treecontentstore::TreeContentStore;
use crate::utils::key_from_path_node_slice;
use anyhow::Result;
use configparser::config::ConfigSet;
use configparser::hg::ConfigSetHgExt;
@ -16,7 +17,7 @@ use manifest::{List, Manifest};
use manifest_tree::TreeManifest;
use revisionstore::{
ContentStore, ContentStoreBuilder, EdenApiHgIdRemoteStore, HgIdDataStore, LocalStore,
MemcacheStore, StoreKey,
MemcacheStore, RemoteDataStore, StoreKey,
};
use std::path::Path;
use std::sync::Arc;
@ -73,18 +74,7 @@ impl BackingStore {
})
}
/// Reads file from blobstores. When `local_only` is true, this function will only read blobs
/// from on disk stores.
pub fn get_blob(&self, path: &[u8], node: &[u8], local_only: bool) -> Result<Option<Vec<u8>>> {
let path = RepoPath::from_utf8(path)?.to_owned();
let node = Node::from_slice(node)?;
let key = Key::new(path, node);
// check if the blob present on disk
if local_only && !self.blobstore.contains(&StoreKey::from(&key))? {
return Ok(None);
}
fn get_blob_impl(&self, key: Key) -> Result<Option<Vec<u8>>> {
// Return None for LFS blobs
// TODO: LFS support
if let Ok(Some(metadata)) = self.blobstore.get_meta(&key) {
@ -99,6 +89,68 @@ impl BackingStore {
.map(|blob| blob.as_ref().to_vec()))
}
/// Reads file from blobstores. When `local_only` is true, this function will only read blobs
/// from on disk stores.
pub fn get_blob(&self, path: &[u8], node: &[u8], local_only: bool) -> Result<Option<Vec<u8>>> {
let key = key_from_path_node_slice(path, node)?;
// check if the blob present on disk
if local_only && !self.blobstore.contains(&StoreKey::from(&key))? {
return Ok(None);
}
self.get_blob_impl(key)
}
/// Fetch file contents in batch. Whenever a blob is fetched, the supplied `resolve` function is
/// called with the file content or an error message, and the index of the blob in the request
/// array. When `local_only` is enabled, this function will only check local disk for the file
/// content.
pub fn get_blob_batch<F>(&self, keys: Vec<Result<Key>>, local_only: bool, resolve: F)
where
F: Fn(usize, Result<Option<Vec<u8>>>) -> (),
{
// logic:
// 1. convert all path & nodes into `StoreKey`
// 2. try to resolve blobs that are already local
// 3. fetch anything that is not local and refetch
let requests = keys
.into_iter()
.enumerate()
.filter_map(|(index, key)| match key {
Ok(key) => Some((index, key)),
Err(e) => {
// return early when the key is invalid
resolve(index, Err(e));
None
}
});
let mut missing = Vec::new();
let mut missing_requests = Vec::new();
for (index, key) in requests {
let store_key = StoreKey::from(&key);
// Assuming a blob do not exist if `.contains` call fails
if self.blobstore.contains(&store_key).unwrap_or(false) {
resolve(index, self.get_blob_impl(key))
} else if !local_only {
missing.push(store_key);
missing_requests.push((index, key));
}
}
// If this is a local only read, nothing else we can do.
if local_only {
return;
}
let _ = self.blobstore.prefetch(&missing);
for (index, key) in missing_requests {
resolve(index, self.get_blob_impl(key))
}
}
pub fn get_tree(&self, node: &[u8]) -> Result<List> {
let node = Node::from_slice(node)?;
let manifest = TreeManifest::durable(self.treestore.clone(), node);

View File

@ -17,5 +17,6 @@ mod backingstore;
mod raw;
mod remotestore;
mod treecontentstore;
mod utils;
pub use crate::backingstore::BackingStore;

View File

@ -8,12 +8,14 @@
//! Provides the c-bindings for `crate::backingstore`.
use anyhow::{ensure, Error, Result};
use libc::{c_char, size_t};
use libc::{c_char, c_void, size_t};
use std::convert::TryInto;
use std::vec::Vec;
use std::{slice, str};
use types::Key;
use crate::backingstore::BackingStore;
use crate::raw::{CBytes, CFallible, Tree};
use crate::raw::{CBytes, CFallible, Request, Tree};
fn stringpiece_to_slice<'a, T, U>(ptr: *const T, length: size_t) -> Result<&'a [U]> {
ensure!(!ptr.is_null(), "string ptr is null");
@ -82,6 +84,29 @@ pub extern "C" fn rust_backingstore_get_blob(
backingstore_get_blob(store, name, name_len, node, node_len, local).into()
}
#[no_mangle]
pub extern "C" fn rust_backingstore_get_blob_batch(
store: *mut BackingStore,
requests: *const Request,
size: usize,
local: bool,
data: *mut c_void,
resolve: unsafe extern "C" fn(*mut c_void, usize, CFallible<CBytes>),
) {
assert!(!store.is_null());
let store = unsafe { &*store };
let requests: &[Request] = unsafe { slice::from_raw_parts(requests, size) };
let keys: Vec<Result<Key>> = requests.iter().map(|req| req.try_into_key()).collect();
store.get_blob_batch(keys, local, |idx, result| {
let result = result
.and_then(|opt| opt.ok_or_else(|| Error::msg("no blob found")))
.map(CBytes::from_vec)
.map(|result| Box::into_raw(Box::new(result)));
unsafe { resolve(data, idx, result.into()) };
});
}
fn backingstore_get_tree(
store: *mut BackingStore,
node: *const u8,

View File

@ -14,9 +14,11 @@ mod backingstore;
mod cbytes;
mod cfallible;
mod init;
mod request;
mod tests;
mod tree;
pub use cbytes::CBytes;
pub use cfallible::CFallible;
pub use request::Request;
pub use tree::Tree;

View File

@ -0,0 +1,30 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use anyhow::Result;
use std::slice;
use types::Key;
use crate::utils::key_from_path_node_slice;
// Number of bytes of a node.
const NODE_LENGTH: usize = 20;
#[repr(C)]
pub struct Request {
path: *const u8,
length: usize,
node: *const u8,
}
impl Request {
pub fn try_into_key(&self) -> Result<Key> {
let path: &[u8] = unsafe { slice::from_raw_parts(self.path, self.length) };
let node: &[u8] = unsafe { slice::from_raw_parts(self.node, NODE_LENGTH) };
key_from_path_node_slice(path, node)
}
}

View File

@ -0,0 +1,15 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use anyhow::Result;
use types::{Key, Node, RepoPath};
pub fn key_from_path_node_slice(path: &[u8], node: &[u8]) -> Result<Key> {
let path = RepoPath::from_utf8(path)?.to_owned();
let node = Node::from_slice(node)?;
Ok(Key::new(path, node))
}