Command for fetching git objects in repo

Summary:
The git objects that get uploaded for a repo during repo import are not directly mapped to the commit. Hence, fetching those objects can be a bit different than raw blobstore access. Additionally, the objects can be ZLib encoded and the blob objects are stored using the filestore chunking abstraction. Having a dedicated newadmin command that can help in fetching these objects will be useful for debugging and support purposes.

I plan to make use of these during git-protocol implementation.

Reviewed By: gustavoavena

Differential Revision: D52843357

fbshipit-source-id: 6d0a0d774396523227e8ea313f02ee720b6c3d92
This commit is contained in:
Rajiv Sharma 2024-01-18 06:23:34 -08:00 committed by Facebook GitHub Bot
parent 8f9bbd2843
commit 02d5ba54b4
7 changed files with 154 additions and 8 deletions

View File

@ -38,6 +38,7 @@ pub use crate::delta_manifest::GitDeltaManifestEntry;
pub use crate::derive_delta_manifest::RootGitDeltaManifestId;
pub use crate::errors::GitError;
pub use crate::store::fetch_delta_instructions;
pub use crate::store::fetch_git_object;
pub use crate::store::fetch_git_object_bytes;
pub use crate::store::fetch_non_blob_git_object;
pub use crate::store::fetch_non_blob_git_object_bytes;

View File

@ -22,10 +22,7 @@
$ git init -q
$ echo "this is file1" > file1
$ git add file1
$ git commit -am "Add file1"
[master (root-commit) 8ce3eae] Add file1
1 file changed, 1 insertion(+)
create mode 100644 file1
$ git commit -qam "Add file1"
$ git tag -a -m"new tag" first_tag
$ cd "$TESTTMP"
$ git clone "$GIT_REPO_ORIGIN"
@ -50,6 +47,36 @@
blob-repo0000.git_object.8ce3eae44760b500bf3f2c3922a95dcd3c908e9e
blob-repo0000.git_object.cb2ef838eb24e4667fee3a8b89c930234ae6e4bb
# Validate that we are able to view the git objects stored in mononoke store
$ mononoke_newadmin git-objects -R repo fetch --id 8ce3eae44760b500bf3f2c3922a95dcd3c908e9e
The object is a Git Commit
Commit {
tree: Sha1(cb2ef838eb24e4667fee3a8b89c930234ae6e4bb),
parents: [],
author: Signature {
name: "mononoke",
email: "mononoke@mononoke",
time: Time {
seconds: 946684800,
offset: 0,
sign: Plus,
},
},
committer: Signature {
name: "mononoke",
email: "mononoke@mononoke",
time: Time {
seconds: 946684800,
offset: 0,
sign: Plus,
},
},
encoding: None,
message: "Add file1\n",
extra_headers: [],
}
# Validate if creating the commit also uploaded the packfile items for the imported git objects
$ ls $TESTTMP/blobstore/blobs | grep "git_packfile_base_item"
blob-repo0000.git_packfile_base_item.433eb172726bc7b6d60e8d68efb0f0ef4e67a667
@ -70,10 +97,7 @@
$ cd "$GIT_REPO"
$ echo "this is file2" > file2
$ git add file2
$ git commit -am "Add file2"
[master e8615d6] Add file2
1 file changed, 1 insertion(+)
create mode 100644 file2
$ git commit -qam "Add file2"
# Test missing-for-commit flag (against partially imported repo history)
$ cd "$TESTTMP"

View File

@ -63,6 +63,7 @@ futures_stats = { version = "0.1.0", git = "https://github.com/facebookexperimen
git_symbolic_refs = { version = "0.1.0", path = "../../git_symbolic_refs" }
git_types = { version = "0.1.0", path = "../../git/git_types" }
gix-hash = "0.11"
gix-object = "0.33"
itertools = "0.11.0"
manifest = { version = "0.1.0", path = "../../manifest" }
megarepo_api = { version = "0.1.0", path = "../../megarepo_api" }

View File

@ -46,6 +46,7 @@ rust_binary(
"fbsource//third-party/rust:flate2",
"fbsource//third-party/rust:futures",
"fbsource//third-party/rust:gix-hash",
"fbsource//third-party/rust:gix-object",
"fbsource//third-party/rust:itertools",
"fbsource//third-party/rust:prettytable-rs",
"fbsource//third-party/rust:regex",

View File

@ -40,6 +40,7 @@ mononoke_app::subcommands! {
mod fetch;
mod filestore;
mod git_bundle;
mod git_objects;
mod git_symref;
mod hg_sync;
mod list_repos;

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
mod fetch;
use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use clap::Subcommand;
use mononoke_app::args::RepoArgs;
use mononoke_app::MononokeApp;
use repo_blobstore::RepoBlobstore;
use repo_identity::RepoIdentity;
use self::fetch::FetchArgs;
/// Perform git objects related operations.
#[derive(Parser)]
pub struct CommandArgs {
#[clap(flatten)]
repo: RepoArgs,
#[clap(subcommand)]
subcommand: GitObjectsSubcommand,
}
#[facet::container]
pub struct Repo {
#[facet]
repo_identity: RepoIdentity,
#[facet]
repo_blobstore: RepoBlobstore,
}
#[derive(Subcommand)]
pub enum GitObjectsSubcommand {
/// Fetch Git objects
Fetch(FetchArgs),
}
pub async fn run(app: MononokeApp, args: CommandArgs) -> Result<()> {
let ctx = app.new_basic_context();
let repo: Repo = app
.open_repo(&args.repo)
.await
.context("Failed to open repo")?;
match args.subcommand {
GitObjectsSubcommand::Fetch(fetch_args) => fetch::fetch(&repo, &ctx, fetch_args).await?,
}
Ok(())
}

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use anyhow::Context;
use anyhow::Result;
use clap::Args;
use context::CoreContext;
use git_types::fetch_git_object;
use git_types::fetch_non_blob_git_object;
use gix_object::Object::Blob;
use gix_object::Object::Commit;
use gix_object::Object::Tag;
use gix_object::Object::Tree;
use mononoke_types::hash::GitSha1;
use mononoke_types::hash::RichGitSha1;
use super::Repo;
#[derive(Args)]
pub struct FetchArgs {
/// The Git SHA1 object id (in hex form) of the object that is to be fetched
#[clap(long)]
id: GitSha1,
/// The type of the git object to be fetched. Required if the object can be git blob
#[clap(long, requires = "size")]
ty: Option<String>,
/// The size of the git object to be fetched. Required if the object can be git blob
#[clap(long, requires = "ty")]
size: Option<u64>,
}
pub async fn fetch(repo: &Repo, ctx: &CoreContext, mut fetch_args: FetchArgs) -> Result<()> {
let ty = fetch_args.ty.take();
let size = fetch_args.size.take();
let git_object = match (ty, size) {
(Some(ty), Some(size)) => {
let git_hash = RichGitSha1::from_sha1(fetch_args.id, ty.leak(), size);
fetch_git_object(ctx, repo.repo_blobstore.clone(), &git_hash).await?
}
_ => {
let git_hash = fetch_args
.id
.to_object_id()
.with_context(|| format!("Invalid object id {}", fetch_args.id))?;
fetch_non_blob_git_object(ctx, &repo.repo_blobstore, git_hash.as_ref()).await?
}
};
match git_object {
Tree(tree) => println!("The object is a Git Tree\n\n{:#?}", tree),
Blob(blob) => println!(
"The object is a Git Blob\n\n{:#?}",
String::from_utf8_lossy(&blob.data)
),
Commit(commit) => println!("The object is a Git Commit\n\n{:#?}", commit),
Tag(tag) => println!("The object is a Git Tag\n\n{:#?}", tag),
};
Ok(())
}