trees: port SCS aux data request method to edenapi

Summary: Introduces fetching of child entry IDs, and child file metadata for a specified tree manifest ID. The aux data lookup will only be performed if `with_file_metadata` is set, which is actually kind of wrong. Instead `with_children` from the wire type should be exposed in the API request type, and `with_*_metadata` should be hidden or used for data other than the child entry `Key`s.

Reviewed By: kulshrax

Differential Revision: D23886678

fbshipit-source-id: 0cba72cea7be47ae3348a406d407a19b60976c0c
This commit is contained in:
Meyer Jacobs 2020-10-14 11:10:59 -07:00 committed by Facebook GitHub Bot
parent bd0b2a04a4
commit 120fbd3280
7 changed files with 230 additions and 18 deletions

View File

@ -12,6 +12,7 @@ cmdlib = { path = "../cmdlib" }
context = { path = "../server/context" }
edenapi_types = { path = "../../scm/lib/edenapi/types" }
gotham_ext = { path = "../gotham_ext" }
manifest = { path = "../manifest" }
mercurial_types = { path = "../mercurial/types" }
mononoke_api = { path = "../mononoke_api" }
mononoke_types = { path = "../mononoke_types" }

View File

@ -6,19 +6,20 @@
*/
use anyhow::{Context, Error};
use futures::{stream, Stream, StreamExt};
use futures::{stream, Future, Stream, StreamExt, TryStreamExt};
use gotham::state::{FromState, State};
use gotham_derive::{StateData, StaticResponseExtender};
use serde::Deserialize;
use edenapi_types::{
wire::{ToApi, ToWire, WireTreeRequest},
TreeEntry, TreeRequest,
FileMetadata, TreeEntry, TreeRequest,
};
use gotham_ext::{error::HttpError, response::TryIntoResponse};
use mercurial_types::{HgManifestId, HgNodeHash};
use mononoke_api::hg::{HgDataContext, HgDataId, HgRepoContext};
use types::Key;
use manifest::Entry;
use mercurial_types::{FileType, HgFileNodeId, HgManifestId, HgNodeHash};
use mononoke_api::hg::{HgDataContext, HgDataId, HgRepoContext, HgTreeContext};
use types::{Key, RepoPathBuf};
use crate::context::ServerContext;
use crate::errors::ErrorKind;
@ -29,6 +30,7 @@ use super::{EdenApiMethod, HandlerInfo};
/// XXX: This number was chosen arbitrarily.
const MAX_CONCURRENT_TREE_FETCHES_PER_REQUEST: usize = 10;
const MAX_CONCURRENT_METADATA_FETCHES_PER_TREE_FETCH: usize = 100;
#[derive(Debug, Deserialize, StateData, StaticResponseExtender)]
pub struct TreeParams {
@ -64,10 +66,11 @@ fn fetch_all_trees(
repo: HgRepoContext,
request: TreeRequest,
) -> impl Stream<Item = Result<TreeEntry, Error>> {
let fetch_metadata = request.with_file_metadata.is_some();
let fetches = request
.keys
.into_iter()
.map(move |key| fetch_tree(repo.clone(), key));
.map(move |key| fetch_tree(repo.clone(), key, fetch_metadata));
stream::iter(fetches).buffer_unordered(MAX_CONCURRENT_TREE_FETCHES_PER_REQUEST)
}
@ -75,11 +78,15 @@ fn fetch_all_trees(
/// Fetch requested tree for a single key.
/// Note that this function consumes the repo context in order
/// to construct a tree context for the requested blob.
async fn fetch_tree(repo: HgRepoContext, key: Key) -> Result<TreeEntry, Error> {
async fn fetch_tree(
repo: HgRepoContext,
key: Key,
fetch_metadata: bool,
) -> Result<TreeEntry, Error> {
let id = HgManifestId::from_node_hash(HgNodeHash::from(key.hgid));
let ctx = id
.context(repo)
.context(repo.clone())
.await
.with_context(|| ErrorKind::TreeFetchFailed(key.clone()))?
.with_context(|| ErrorKind::KeyDoesNotExist(key.clone()))?;
@ -90,5 +97,66 @@ async fn fetch_tree(repo: HgRepoContext, key: Key) -> Result<TreeEntry, Error> {
.with_context(|| ErrorKind::TreeFetchFailed(key.clone()))?;
let parents = ctx.hg_parents().into();
Ok(TreeEntry::new(key, data, parents, metadata))
let mut entry = TreeEntry::new(key, data, parents, metadata);
if fetch_metadata {
let children = fetch_child_metadata_entries(&repo, &ctx)
.await?
.buffer_unordered(MAX_CONCURRENT_METADATA_FETCHES_PER_TREE_FETCH)
.try_collect()
.await?;
entry.with_children(Some(children));
}
Ok(entry)
}
async fn fetch_child_metadata_entries<'a>(
repo: &'a HgRepoContext,
ctx: &'a HgTreeContext,
) -> Result<impl Stream<Item = impl Future<Output = Result<TreeEntry, Error>> + 'a> + 'a, Error> {
let entries = ctx.entries()?.collect::<Vec<_>>();
Ok(stream::iter(entries)
// .entries iterator is not `Send`
.map({
move |(name, entry)| async move {
let name = RepoPathBuf::from_string(name.to_string())?;
Ok(match entry {
Entry::Leaf((file_type, child_id)) => {
let child_key = Key::new(name, child_id.into_nodehash().into());
fetch_child_file_metadata(repo, file_type, child_key.clone()).await?
}
Entry::Tree(child_id) => TreeEntry::new_directory_entry(Key::new(
name,
child_id.into_nodehash().into(),
)),
})
}
}))
}
async fn fetch_child_file_metadata(
repo: &HgRepoContext,
file_type: FileType,
child_key: Key,
) -> Result<TreeEntry, Error> {
let fsnode = repo
.file(HgFileNodeId::new(child_key.hgid.into()))
.await?
.ok_or_else(|| ErrorKind::FileFetchFailed(child_key.clone()))?
.fetch_fsnode_data(file_type)
.await?;
Ok(TreeEntry::new_file_entry(
child_key,
FileMetadata {
file_type: Some((*fsnode.file_type()).into()),
size: Some(fsnode.size()),
content_sha1: Some((*fsnode.content_sha1()).into()),
content_sha256: Some((*fsnode.content_sha256()).into()),
content_id: Some((*fsnode.content_id()).into()),
..Default::default()
},
))
}

View File

@ -15,9 +15,9 @@ use futures::{
};
use getbundle_response::SessionLfsParams;
use mercurial_types::{
envelope::HgFileEnvelope, HgFileHistoryEntry, HgFileNodeId, HgNodeHash, HgParents,
envelope::HgFileEnvelope, FileType, HgFileHistoryEntry, HgFileNodeId, HgNodeHash, HgParents,
};
use mononoke_types::MPath;
use mononoke_types::{fsnode::FsnodeFile, MPath};
use remotefilelog::create_getpack_v2_blob;
use revisionstore_types::Metadata;
@ -93,6 +93,37 @@ impl HgFileContext {
.compat()
.map_err(MononokeError::from)
}
/// Fetches the metadata that would be present in this file's corresponding FsNode, returning
/// it with the FsNode type, but without actually fetching the FsNode.
///
/// Instead, this method separately reads the `ContentId`, uses that to fetch the size, Sha1,
/// and Sha256, and combines that with the FileType, which the user must be provide (available
/// in the parent tree manifest).
pub async fn fetch_fsnode_data(
&self,
file_type: FileType,
) -> Result<FsnodeFile, MononokeError> {
let content_id = self.envelope.content_id();
let fetch_key = filestore::FetchKey::Canonical(content_id);
let blobstore = self.repo.blob_repo().blobstore();
let metadata = filestore::get_metadata(blobstore, self.repo.ctx().clone(), &fetch_key)
.compat()
.await?
.ok_or_else(|| {
MononokeError::NotAvailable(format!(
"metadata not found for content id {}",
content_id
))
})?;
Ok(FsnodeFile::new(
content_id,
file_type,
metadata.total_size,
metadata.sha1,
metadata.sha256,
))
}
}
#[async_trait]

View File

@ -9,10 +9,12 @@ use async_trait::async_trait;
use bytes::Bytes;
use futures::compat::Future01CompatExt;
use manifest::{Entry, Manifest};
use mercurial_types::{
fetch_manifest_envelope, fetch_manifest_envelope_opt, HgBlobEnvelope, HgManifestEnvelope,
HgManifestId, HgNodeHash, HgParents,
fetch_manifest_envelope, fetch_manifest_envelope_opt, HgBlobEnvelope, HgFileNodeId,
HgManifestEnvelope, HgManifestId, HgNodeHash, HgParents,
};
use mononoke_types::{file_change::FileType, path::MPathElement};
use revisionstore_types::Metadata;
use crate::errors::MononokeError;
@ -64,6 +66,14 @@ impl HgTreeContext {
let blobstore = self.repo.blob_repo().blobstore().boxed();
mercurial_types::blobs::BlobManifest::parse(blobstore, self.envelope)
}
pub fn entries(
&self,
) -> anyhow::Result<
impl Iterator<Item = (MPathElement, Entry<HgManifestId, (FileType, HgFileNodeId)>)>,
> {
Ok(self.clone().into_blob_manifest()?.list())
}
}
#[async_trait]

View File

@ -99,7 +99,7 @@ Check trees in response.
$ edenapi_read_res tree cat res.cbor --debug -p '' -h $ROOT_MFID_1
Reading from file: "res.cbor"
TreeEntry { key: Key { path: RepoPathBuf(""), hgid: HgId("15024c4dc4a27b572d623db342ae6a08d7f7adec") }, data: Some(b"test.txt\0186cafa3319c24956783383dc44c5cbc68c5a0ca\n"), parents: Some(None), file_metadata: None, directory_metadata: None, children: None }
TreeEntry { key: Key { path: RepoPathBuf(""), hgid: HgId("15024c4dc4a27b572d623db342ae6a08d7f7adec") }, data: Some(b"test.txt\0186cafa3319c24956783383dc44c5cbc68c5a0ca\n"), parents: Some(None), file_metadata: None, directory_metadata: None, children: Some([TreeEntry { key: Key { path: RepoPathBuf("test.txt"), hgid: HgId("186cafa3319c24956783383dc44c5cbc68c5a0ca") }, data: None, parents: None, file_metadata: Some(FileMetadata { revisionstore_flags: None, content_id: Some(ContentId("888dcf533a354c23e4bf67e1ada984d96bb1089b0c3c03f4c2cb773709e7aa42")), file_type: Some(Regular), size: Some(13), content_sha1: Some(Sha1("4fe2b8dd12cd9cd6a413ea960cd8c09c25f19527")), content_sha256: Some(Sha256("a1fff0ffefb9eace7230c24e50731f0a91c62f9cefdfe77121c2f607125dffae")) }), directory_metadata: None, children: None }]) }
$ edenapi_read_res tree cat res.cbor -p '' -h $ROOT_MFID_2
Reading from file: "res.cbor"

View File

@ -5,6 +5,8 @@
* GNU General Public License version 2.
*/
use std::fmt;
#[cfg(any(test, feature = "for-tests"))]
use quickcheck::Arbitrary;
use serde_derive::{Deserialize, Serialize};
@ -55,15 +57,75 @@ pub struct FileMetadataRequest {
pub with_content_sha256: bool,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct Sha1(pub [u8; 20]);
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
impl fmt::Display for Sha1 {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "Sha1(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
impl fmt::Debug for Sha1 {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "Sha1(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
#[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct Sha256(pub [u8; 32]);
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
impl fmt::Display for Sha256 {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "Sha256(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
impl fmt::Debug for Sha256 {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "Sha256(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
#[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct ContentId(pub [u8; 32]);
impl fmt::Display for ContentId {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "ContentId(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
impl fmt::Debug for ContentId {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "ContentId(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum FileType {
Regular,
@ -71,9 +133,29 @@ pub enum FileType {
Symlink,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct FsnodeId(pub [u8; 32]);
impl fmt::Display for FsnodeId {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "FsnodeId(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
impl fmt::Debug for FsnodeId {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "FsnodeId(\"")?;
for d in &self.0 {
write!(fmt, "{:02x}", d)?;
}
write!(fmt, "\")")
}
}
#[cfg(any(test, feature = "for-tests"))]
impl Arbitrary for DirectoryMetadata {
fn arbitrary<G: quickcheck::Gen>(g: &mut G) -> Self {

View File

@ -76,6 +76,26 @@ impl TreeEntry {
}
}
pub fn new_file_entry(key: Key, metadata: FileMetadata) -> Self {
Self {
key,
file_metadata: Some(metadata),
..Default::default()
}
}
pub fn new_directory_entry(key: Key) -> Self {
Self {
key,
..Default::default()
}
}
pub fn with_children<'a>(&'a mut self, children: Option<Vec<TreeEntry>>) -> &'a mut Self {
self.children = children;
self
}
pub fn key(&self) -> &Key {
&self.key
}