mirror of
https://github.com/facebook/sapling.git
synced 2024-10-04 05:47:13 +03:00
Implement filter arguments support in Mononoke GRit server
Summary: This diff implements capability to parse and accept filter arguments from git client. A follow up diff will add actual support for filter at the server Reviewed By: markbt Differential Revision: D57052433 fbshipit-source-id: 7bedaa398d938f33f18570fe96e36ac3c83df0ed
This commit is contained in:
parent
b457df85e1
commit
7952fc3222
@ -23,6 +23,7 @@ git_symbolic_refs = { version = "0.1.0", path = "../../git_symbolic_refs" }
|
||||
git_types = { version = "0.1.0", path = "../git_types" }
|
||||
gix-date = "0.7"
|
||||
gix-hash = "0.11"
|
||||
gix-object = "0.33"
|
||||
metaconfig_types = { version = "0.1.0", path = "../../metaconfig/types" }
|
||||
mononoke_types = { version = "0.1.0", path = "../../mononoke_types" }
|
||||
packetline = { version = "0.1.0", path = "../packetline" }
|
||||
|
@ -13,6 +13,7 @@ rust_library(
|
||||
"fbsource//third-party/rust:futures",
|
||||
"fbsource//third-party/rust:gix-date",
|
||||
"fbsource//third-party/rust:gix-hash",
|
||||
"fbsource//third-party/rust:gix-object",
|
||||
"fbsource//third-party/rust:rustc-hash",
|
||||
"fbsource//third-party/rust:tokio",
|
||||
"//common/rust/shed/buffered_weighted:buffered_weighted",
|
||||
|
@ -340,12 +340,20 @@ pub struct FetchRequest {
|
||||
pub deepen_not: Option<ObjectId>,
|
||||
/// Request that various objects from the packfile be omitted using
|
||||
/// one of several filtering techniques
|
||||
pub filter: Option<String>,
|
||||
pub filter: Option<FetchFilter>,
|
||||
/// The concurrency setting to be used for generating the packfile items for the
|
||||
/// fetch request
|
||||
pub concurrency: PackfileConcurrency,
|
||||
}
|
||||
|
||||
/// Struct representing the filtering options that can be used during fetch / clone
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FetchFilter {
|
||||
pub max_blob_size: u64,
|
||||
pub max_tree_depth: u64,
|
||||
pub allowed_object_types: Vec<gix_object::Kind>,
|
||||
}
|
||||
|
||||
/// Struct representing the packfile item response generated for the
|
||||
/// given range of commits
|
||||
pub struct PackItemStreamResponse<'a> {
|
||||
|
@ -33,6 +33,7 @@ futures_stats = { version = "0.1.0", git = "https://github.com/facebookexperimen
|
||||
git_symbolic_refs = { version = "0.1.0", path = "../git_symbolic_refs" }
|
||||
gix-date = "0.7"
|
||||
gix-hash = "0.11"
|
||||
gix-object = "0.33"
|
||||
gix-packetline = { version = "0.17.2", features = ["blocking-io"] }
|
||||
gix-transport = "0.41"
|
||||
gotham = "0.7.1"
|
||||
|
@ -16,6 +16,7 @@ rust_binary(
|
||||
"fbsource//third-party/rust:futures",
|
||||
"fbsource//third-party/rust:gix-date",
|
||||
"fbsource//third-party/rust:gix-hash",
|
||||
"fbsource//third-party/rust:gix-object",
|
||||
"fbsource//third-party/rust:gix-packetline",
|
||||
"fbsource//third-party/rust:gix-transport",
|
||||
"fbsource//third-party/rust:gotham",
|
||||
|
@ -5,11 +5,15 @@
|
||||
* GNU General Public License version 2.
|
||||
*/
|
||||
|
||||
use anyhow::bail;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use gix_hash::ObjectId;
|
||||
use gix_object::Kind;
|
||||
use gix_packetline::PacketLineRef;
|
||||
use gix_packetline::StreamingPeekableIter;
|
||||
use gix_transport::bstr::ByteSlice;
|
||||
use protocol::types::FetchFilter;
|
||||
use protocol::types::FetchRequest;
|
||||
use protocol::types::PackfileConcurrency;
|
||||
|
||||
@ -81,7 +85,7 @@ pub struct FetchArgs {
|
||||
pub deepen_not: Option<ObjectId>,
|
||||
/// Request that various objects from the packfile be omitted using
|
||||
/// one of several filtering techniques
|
||||
pub filter: Option<String>,
|
||||
pub filter: Option<FilterArgs>,
|
||||
/// Indicates to the server that the client wants to retrieve a particular set of
|
||||
/// refs by providing the full name of the ref on the server
|
||||
pub want_refs: Vec<String>,
|
||||
@ -97,7 +101,115 @@ pub struct FetchArgs {
|
||||
pub wait_for_done: bool,
|
||||
}
|
||||
|
||||
fn parse_oid(data: &[u8], oid_type: &[u8]) -> anyhow::Result<ObjectId> {
|
||||
/// Argument for filtering objects during clone/fetch
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FilterArgs {
|
||||
max_blob_size: u64,
|
||||
max_tree_depth: u64,
|
||||
allowed_object_types: Vec<Kind>,
|
||||
}
|
||||
|
||||
impl Default for FilterArgs {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_blob_size: u64::MAX,
|
||||
max_tree_depth: u64::MAX,
|
||||
allowed_object_types: vec![Kind::Blob, Kind::Tree, Kind::Commit, Kind::Tag],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FilterArgs {
|
||||
const COMBINE_PREFIX: &'static str = "combine:";
|
||||
const OBJECT_TYPE_PREFIX: &'static str = "object:type=";
|
||||
const BLOB_PREFIX: &'static str = "blob:";
|
||||
const TREE_PREFIX: &'static str = "tree:";
|
||||
const NO_BLOBS: &'static str = "none";
|
||||
const SIZE_LIMIT: &'static str = "limit=";
|
||||
const FILTER_SPLITTER: &'static str = "+";
|
||||
|
||||
fn parse_size(size: &str) -> Result<u64> {
|
||||
const KB_SUFFIX: &str = "k";
|
||||
const MB_SUFFIX: &str = "m";
|
||||
const GB_SUFFIX: &str = "g";
|
||||
let mut multiplier = 1;
|
||||
let size = if let Some(size_num) = size.strip_suffix(KB_SUFFIX) {
|
||||
multiplier = 1024;
|
||||
size_num
|
||||
} else if let Some(size_num) = size.strip_suffix(MB_SUFFIX) {
|
||||
multiplier = 1024 * 1024;
|
||||
size_num
|
||||
} else if let Some(size_num) = size.strip_suffix(GB_SUFFIX) {
|
||||
multiplier = 1024 * 1024 * 1024;
|
||||
size_num
|
||||
} else {
|
||||
size
|
||||
};
|
||||
size.parse::<u64>()
|
||||
.map(|size| size * multiplier)
|
||||
.with_context(|| format!("Invalid blob size {:?}", size))
|
||||
}
|
||||
|
||||
fn parse_from_spec(data: String) -> Result<Self> {
|
||||
let filter_set = if let Some(combined_filters) = data.strip_prefix(Self::COMBINE_PREFIX) {
|
||||
// There are multiple filters combined together
|
||||
combined_filters
|
||||
.split(Self::FILTER_SPLITTER)
|
||||
.map(String::from)
|
||||
.collect()
|
||||
} else {
|
||||
// There is only one filter
|
||||
vec![data]
|
||||
};
|
||||
let mut filter_args = FilterArgs::default();
|
||||
let mut allowed_type = None;
|
||||
for filter in filter_set {
|
||||
if let Some(object_type) = filter.strip_prefix(Self::OBJECT_TYPE_PREFIX) {
|
||||
let object_kind = Kind::from_bytes(object_type.as_bytes())
|
||||
.with_context(|| format!("Invalid object type {:?}", object_type))?;
|
||||
// Git has this weird behavior if you specify multiple allowed object types
|
||||
// it just honors the first one it comes across. And no, there is no mention
|
||||
// of it in the docs. Found this out through code reading and trail-and-error :)
|
||||
if allowed_type.is_none() {
|
||||
allowed_type = Some(object_kind);
|
||||
}
|
||||
}
|
||||
if let Some(blob_size) = filter.strip_prefix(Self::BLOB_PREFIX) {
|
||||
if blob_size == Self::NO_BLOBS {
|
||||
filter_args.max_blob_size = 0;
|
||||
} else if let Some(blob_limit) = blob_size.strip_prefix(Self::SIZE_LIMIT) {
|
||||
filter_args.max_blob_size = Self::parse_size(blob_limit)?;
|
||||
} else {
|
||||
bail!(
|
||||
"Invalid blob size {:?} in filter spec {}",
|
||||
blob_size,
|
||||
filter
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(tree_depth) = filter.strip_prefix(Self::TREE_PREFIX) {
|
||||
let max_depth = tree_depth
|
||||
.parse::<u64>()
|
||||
.with_context(|| format!("Invalid tree depth {:?}", tree_depth))?;
|
||||
filter_args.max_tree_depth = max_depth;
|
||||
}
|
||||
}
|
||||
if let Some(allowed_type) = allowed_type {
|
||||
filter_args.allowed_object_types = vec![allowed_type];
|
||||
}
|
||||
Ok(filter_args)
|
||||
}
|
||||
|
||||
fn into_fetch_filter(self) -> FetchFilter {
|
||||
FetchFilter {
|
||||
max_blob_size: self.max_blob_size,
|
||||
max_tree_depth: self.max_tree_depth,
|
||||
allowed_object_types: self.allowed_object_types,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_oid(data: &[u8], oid_type: &[u8]) -> Result<ObjectId> {
|
||||
ObjectId::from_hex(data).with_context(|| {
|
||||
format!(
|
||||
"Invalid {:?}object id {:?} received during fetch request",
|
||||
@ -110,11 +222,10 @@ fn bytes_to_str<'a, 'b, 'c>(
|
||||
bytes: &'a [u8],
|
||||
bytes_type: &'b str,
|
||||
arg_type: &'c str,
|
||||
) -> anyhow::Result<&'a str> {
|
||||
) -> Result<&'a str> {
|
||||
std::str::from_utf8(bytes).with_context(|| {
|
||||
format!(
|
||||
"Invalid {} bytes {:?} received for {:?} during fetch command args parsing",
|
||||
bytes_type, arg_type, bytes
|
||||
"Invalid {bytes_type} bytes {bytes:?} received for {arg_type} during fetch command args parsing",
|
||||
)
|
||||
})
|
||||
}
|
||||
@ -133,13 +244,13 @@ impl FetchArgs {
|
||||
self.filter.is_some()
|
||||
}
|
||||
|
||||
fn validate(&self) -> anyhow::Result<()> {
|
||||
fn validate(&self) -> Result<()> {
|
||||
if self.deepen.is_some() && self.deepen_since.is_some() {
|
||||
anyhow::bail!(
|
||||
bail!(
|
||||
"deepen and deepen-since arguments cannot be provided at the same time for fetch command"
|
||||
)
|
||||
} else if self.deepen.is_some() && self.deepen_not.is_some() {
|
||||
anyhow::bail!(
|
||||
bail!(
|
||||
"deepen and deepen-not arguments cannot be provided at the same time for fetch command"
|
||||
)
|
||||
} else {
|
||||
@ -147,7 +258,7 @@ impl FetchArgs {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_from_packetline(args: &[u8]) -> anyhow::Result<Self> {
|
||||
pub fn parse_from_packetline(args: &[u8]) -> Result<Self> {
|
||||
let mut tokens = StreamingPeekableIter::new(args, &[PacketLineRef::Flush], true);
|
||||
let mut fetch_args = Self::default();
|
||||
while let Some(token) = tokens.read_line() {
|
||||
@ -179,7 +290,7 @@ impl FetchArgs {
|
||||
fetch_args.deepen_not = Some(parse_oid(oid_depth, DEEPEN_NOT_PREFIX)?);
|
||||
} else if let Some(filter) = data.strip_prefix(FILTER_PREFIX) {
|
||||
let filter_spec = bytes_to_str(filter, "filter_spec", "filter")?.to_owned();
|
||||
fetch_args.filter = Some(filter_spec);
|
||||
fetch_args.filter = Some(FilterArgs::parse_from_spec(filter_spec)?);
|
||||
} else if let Some(want_ref) = data.strip_prefix(WANT_REF_PREFIX) {
|
||||
let want_ref = bytes_to_str(want_ref, "want_ref", "want-ref")?.to_owned();
|
||||
fetch_args.want_refs.push(want_ref);
|
||||
@ -201,14 +312,14 @@ impl FetchArgs {
|
||||
WAIT_FOR_DONE => fetch_args.wait_for_done = true,
|
||||
SIDEBAND_ALL => fetch_args.sideband_all = true,
|
||||
DEEPEN_RELATIVE => fetch_args.deepen_relative = true,
|
||||
arg => anyhow::bail!(
|
||||
arg => bail!(
|
||||
"Unexpected arg {} in fetch command args",
|
||||
String::from_utf8_lossy(arg)
|
||||
),
|
||||
};
|
||||
}
|
||||
} else {
|
||||
anyhow::bail!(
|
||||
bail!(
|
||||
"Unexpected token {:?} in packetline during fetch command args parsing",
|
||||
token
|
||||
);
|
||||
@ -231,7 +342,7 @@ impl FetchArgs {
|
||||
deepen_since: self.deepen_since,
|
||||
deepen_not: self.deepen_not,
|
||||
deepen_relative: self.deepen_relative,
|
||||
filter: self.filter,
|
||||
filter: self.filter.map(FilterArgs::into_fetch_filter),
|
||||
concurrency,
|
||||
}
|
||||
}
|
||||
@ -241,13 +352,14 @@ impl FetchArgs {
|
||||
mod tests {
|
||||
use std::io::Write;
|
||||
|
||||
use anyhow::Result;
|
||||
use gix_packetline::encode::flush_to_write;
|
||||
use gix_packetline::Writer;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fetch_command_args_parsing() -> anyhow::Result<()> {
|
||||
fn test_fetch_command_args_parsing() -> Result<()> {
|
||||
let inner_writer = Vec::new();
|
||||
let mut packetline_writer = Writer::new(inner_writer);
|
||||
packetline_writer.write_all(b"thin-pack\n")?;
|
||||
@ -266,6 +378,7 @@ mod tests {
|
||||
packetline_writer.write_all(b"want 1000000000000000000000000000000000000001\n")?;
|
||||
packetline_writer.write_all(b"have 2000000000000000000000000000000000000002\n")?;
|
||||
packetline_writer.write_all(b"shallow 1000000000000000000000000000000000000001\n")?;
|
||||
packetline_writer.write_all(b"filter combine:blob:none+tree:5+object:type=blob+object:type=tree+object:type=commit\n")?;
|
||||
packetline_writer.write_all(b"done\n")?;
|
||||
packetline_writer.flush()?;
|
||||
let mut inner_writer = packetline_writer.into_inner();
|
||||
@ -283,11 +396,12 @@ mod tests {
|
||||
assert_eq!(parsed_args.shallow.len(), 2);
|
||||
assert_eq!(parsed_args.haves.len(), 3);
|
||||
assert_eq!(parsed_args.wants.len(), 2);
|
||||
assert!(parsed_args.filter.is_some());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fetch_command_args_validation() -> anyhow::Result<()> {
|
||||
fn test_fetch_command_args_validation() -> Result<()> {
|
||||
let inner_writer = Vec::new();
|
||||
let mut packetline_writer = Writer::new(inner_writer);
|
||||
packetline_writer.write_all(b"deepen 1\n")?;
|
||||
@ -309,7 +423,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fetch_command_args_time_parsing() -> anyhow::Result<()> {
|
||||
fn test_fetch_command_args_time_parsing() -> Result<()> {
|
||||
let inner_writer = Vec::new();
|
||||
let mut packetline_writer = Writer::new(inner_writer);
|
||||
packetline_writer.write_all(b"deepen-since 1979-02-26 18:30:00\n")?;
|
||||
@ -327,4 +441,45 @@ mod tests {
|
||||
assert!(FetchArgs::parse_from_packetline(&inner_writer).is_ok());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_args_parsing() -> Result<()> {
|
||||
let raw_input =
|
||||
"combine:blob:none+tree:5+object:type=blob+object:type=tree+object:type=commit"
|
||||
.to_string();
|
||||
let filter_args = FilterArgs::parse_from_spec(raw_input)?;
|
||||
|
||||
assert_eq!(
|
||||
filter_args.allowed_object_types,
|
||||
vec![Kind::Blob] // Since blob was the first one in the spec, rest are ignored
|
||||
);
|
||||
assert_eq!(filter_args.max_tree_depth, 5);
|
||||
assert_eq!(filter_args.max_blob_size, 0);
|
||||
|
||||
let raw_input = "tree:5".to_string();
|
||||
let filter_args = FilterArgs::parse_from_spec(raw_input)?;
|
||||
assert_eq!(filter_args.max_tree_depth, 5);
|
||||
assert_eq!(filter_args.max_blob_size, u64::MAX);
|
||||
|
||||
let raw_input = "object:type=commit".to_string();
|
||||
let filter_args = FilterArgs::parse_from_spec(raw_input)?;
|
||||
assert_eq!(filter_args.max_tree_depth, u64::MAX);
|
||||
assert_eq!(filter_args.max_blob_size, u64::MAX);
|
||||
assert_eq!(filter_args.allowed_object_types, vec![Kind::Commit]);
|
||||
|
||||
let raw_input = "blob:limit=5m".to_string();
|
||||
let filter_args = FilterArgs::parse_from_spec(raw_input)?;
|
||||
assert_eq!(filter_args.max_tree_depth, u64::MAX);
|
||||
assert_eq!(filter_args.max_blob_size, 5 * 1024 * 1024);
|
||||
|
||||
let raw_input = "blob:limit=49999".to_string();
|
||||
let filter_args = FilterArgs::parse_from_spec(raw_input)?;
|
||||
assert_eq!(filter_args.max_blob_size, 49999);
|
||||
assert_eq!(filter_args.max_tree_depth, u64::MAX);
|
||||
assert_eq!(
|
||||
filter_args.allowed_object_types,
|
||||
vec![Kind::Blob, Kind::Tree, Kind::Commit, Kind::Tag]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -22,9 +22,7 @@ use crate::model::ServiceType;
|
||||
|
||||
const CORE_CAPABILITIES: &[&str] = &[
|
||||
"ls-refs=unborn",
|
||||
"fetch=shallow",
|
||||
"wait-for-done",
|
||||
"filter",
|
||||
"fetch=shallow wait-for-done filter",
|
||||
"ref-in-want",
|
||||
"object-format=sha1",
|
||||
];
|
||||
|
@ -50,4 +50,8 @@
|
||||
# Clone the Git repo from Mononoke
|
||||
$ git_client clone $MONONOKE_GIT_SERVICE_BASE_URL/$REPONAME.git --filter=blob:limit=5k --filter=tree:3 --filter=object:type=blob --filter=object:type=tree --filter=object:type=commit
|
||||
Cloning into 'repo'...
|
||||
warning: filtering not recognized by server, ignoring
|
||||
|
||||
# Verify that we get the same Git repo back that we started with
|
||||
$ cd $REPONAME
|
||||
$ git rev-list --objects --all | git cat-file --batch-check='%(objectname) %(objecttype) %(rest)' | sort > $TESTTMP/new_object_list
|
||||
$ diff -w $TESTTMP/new_object_list $TESTTMP/object_list
|
||||
|
Loading…
Reference in New Issue
Block a user