tools/admin: add redaction subcommand

Summary:
Port the `redaction` subcommand to the new admin tool.

There are a couple of changes vs the old tool:

* Old-style SQL redactions are not supported.  We can continue to use the
  old admin tool until they are removed.

* We use fsnodes, rather than hg manifests to find the content ids.  This avoids
  extra lookups between hg and bonsai hashes, and means we no longer depend on
  Mercurial for redaction.

Reviewed By: yancouto

Differential Revision: D34790119

fbshipit-source-id: 575581675abf42b6048f61e4a449fa3326ba838b
This commit is contained in:
Mark Juggurnauth-Thomas 2022-03-21 10:58:18 -07:00 committed by Facebook GitHub Bot
parent 7650d113db
commit 42bc9fb8d4
9 changed files with 505 additions and 25 deletions

View File

@ -22,7 +22,7 @@ use metaconfig_parser::{RepoConfigs, StorageConfigs};
use metaconfig_types::{BlobConfig, BlobstoreId, Redaction, RepoConfig};
use mononoke_types::RepositoryId;
use prefixblob::PrefixBlobstore;
use redactedblobstore::{RedactedBlobstore, RedactedBlobstoreConfig};
use redactedblobstore::{RedactedBlobstore, RedactedBlobstoreConfig, RedactionConfigBlobstore};
use repo_factory::{RepoFactory, RepoFactoryBuilder};
use scuba_ext::MononokeScubaSampleBuilder;
use slog::Logger;
@ -296,6 +296,29 @@ impl MononokeApp {
Ok(blobstore)
}
pub async fn redaction_config_blobstore(&self) -> Result<Arc<RedactionConfigBlobstore>> {
self.repo_factory
.redaction_config_blobstore_from_config(
&self.repo_configs.common.redaction_config.blobstore,
)
.await
}
pub async fn redaction_config_blobstore_for_darkstorm(
&self,
) -> Result<Arc<RedactionConfigBlobstore>> {
let blobstore_config = self
.repo_configs
.common
.redaction_config
.darkstorm_blobstore
.as_ref()
.ok_or_else(|| anyhow!("Configuration must have darkstorm blobstore"))?;
self.repo_factory
.redaction_config_blobstore_from_config(blobstore_config)
.await
}
fn redaction_scuba_builder(&self) -> Result<MononokeScubaSampleBuilder> {
let params = &self.repo_configs.common.censored_scuba_params;
let mut builder =

View File

@ -17,6 +17,7 @@ use crate::typed_hash::{
};
/// A serialized blob in memory.
#[derive(Clone)]
pub struct Blob<Id> {
id: Id,
data: Bytes,

View File

@ -19,12 +19,18 @@ setup hg server repo
$ cd repo-hg
$ touch a && hg ci -A -q -m 'add a'
$ hg log -T '{short(node)}\n'
ac82d8b1f7c4
create master bookmark
$ hg bookmark master_bookmark -r tip
create another commit that has other content we can redact
$ echo c > c
$ hg ci -A -q -m 'add c'
$ hg bookmark other_bookmark -r tip
$ hg log -T '{short(node)} {bookmarks}\n'
7389ca641397 other_bookmark
ac82d8b1f7c4 master_bookmark
$ cd $TESTTMP
setup repo-pull and repo-push
@ -75,26 +81,53 @@ start mononoke
$ hgmn push -q -r . --to master_bookmark
$ hg log -T '{node}\n'
14961831bd3af3a6331fef7e63367d61cb6c9f6b
ac82d8b1f7c418c61a493ed229ffaa981bda8e90
$ cd "$TESTTMP/repo-pull"
$ hgmn pull -q
$ hgmn up -q 14961831bd3a
Censor the redacted blob (file 'b' in commit '14961831bd3af3a6331fef7e63367d61cb6c9f6b')
$ MONONOKE_EXEC_STAGE=admin mononoke_admin redaction create-key-list 14961831bd3af3a6331fef7e63367d61cb6c9f6b b --force | head -n 1 | sed 's/Redaction saved as: //g' > rs_1
* using repo "repo" repoid RepositoryId(0) (glob)
* changeset resolved as: * (glob)
Redact file 'c' in commit '7389ca6413976090442f3003d4329990bc688ef7'
$ mononoke_newadmin redaction create-key-list -R repo -i 7389ca6413976090442f3003d4329990bc688ef7 c --main-bookmark master_bookmark --output-file rs_0
Checking redacted content doesn't exist in 'master_bookmark' bookmark
No files would be redacted in the main bookmark (master_bookmark)
Redaction saved as: db4bf834eb70b32345de6a2ad146811a6d0591e24cc507b81e30070d01bf2798
To finish the redaction process, you need to commit this id to scm/mononoke/redaction/redaction_sets.cconf in configerator
Attempt to redact file 'b' in commit '14961831bd3af3a6331fef7e63367d61cb6c9f6b'
This initially fails because it is still reachable in 'master'
$ mononoke_newadmin redaction create-key-list -R repo -i 14961831bd3af3a6331fef7e63367d61cb6c9f6b b --main-bookmark master_bookmark
Checking redacted content doesn't exist in 'master_bookmark' bookmark
Redacted content in main bookmark: b content.blake2.21c519fe0eb401bc97888f270902935f858d0c5361211f892fd26ed9ce127ff9
Error: Refusing to create key list because 1 files would be redacted in the main bookmark (master_bookmark)
[1]
Try again with --force
$ mononoke_newadmin redaction create-key-list -R repo -i 14961831bd3af3a6331fef7e63367d61cb6c9f6b b --main-bookmark master_bookmark --force --output-file rs_1
Checking redacted content doesn't exist in 'master_bookmark' bookmark
Redacted content in main bookmark: b content.blake2.21c519fe0eb401bc97888f270902935f858d0c5361211f892fd26ed9ce127ff9
Creating key list despite 1 files being redacted in the main bookmark (master_bookmark) (--force)
Redaction saved as: bd2b6b03fa8e5d9a9a68cf1cebc60b648d95b72781b9ada1debc57e4bba722f6
To finish the redaction process, you need to commit this id to scm/mononoke/redaction/redaction_sets.cconf in configerator
$ cat > "$REDACTION_CONF/redaction_sets" <<EOF
> {
> "all_redactions": [
> {"reason": "T0", "id": "$(cat rs_0)", "enforce": false},
> {"reason": "T1", "id": "$(cat rs_1)", "enforce": true}
> ]
> }
> EOF
$ rm rs_1
$ rm rs_0 rs_1
The files should now be marked as redacted
$ mononoke_newadmin redaction list -R repo -i 14961831bd3af3a6331fef7e63367d61cb6c9f6b
Searching for redacted paths in c58e5684f660c327e9fd4cc0aba5e010bd444b0e0ee23fe4aa0cace2f44c0b46
Found 1 redacted paths
T1 : b
$ mononoke_newadmin redaction list -R repo -i 7389ca6413976090442f3003d4329990bc688ef7
Searching for redacted paths in 39101456281e9b3d34041ded0c91b1712418c9eb59fbfc2bd06e873f3df9a6a4
Found 1 redacted paths
T0 : c (log only)
# We could not restart mononoke here, but then we'd have to wait 60s for it to
# update the redaction config automatically
@ -109,12 +142,6 @@ Restart mononoke
$ hgmn pull -q
$ hgmn up -q 14961831bd3a
$ tglogpnr
@ 14961831bd3a public 'add b' default/master_bookmark
o ac82d8b1f7c4 public 'add a' master_bookmark
Should gives us the tombstone file since it is redacted
$ cat b
This version of the file is redacted and you are not allowed to access it. Update or rebase to a newer commit.
@ -147,12 +174,6 @@ Restart mononoke and disable redaction verification
$ hgmn pull -q
$ hgmn up -q 14961831bd3a
$ tglogpnr
@ 14961831bd3a public 'add b' default/master_bookmark
o ac82d8b1f7c4 public 'add a' master_bookmark
Even is file b is redacted, we will get its content
$ cat b
b

View File

@ -33,6 +33,7 @@ facet = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust
fbinit = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
fbthrift = { version = "0.0.1+unstable", git = "https://github.com/facebook/fbthrift.git", branch = "main" }
filestore = { version = "0.1.0", path = "../../filestore" }
fsnodes = { version = "0.1.0", path = "../../derived_data/fsnodes" }
futures = { version = "0.3.13", features = ["async-await", "compat"] }
git_types = { version = "0.1.0", path = "../../git/git_types" }
manifest = { version = "0.1.0", path = "../../manifest" }
@ -45,6 +46,7 @@ phases = { version = "0.1.0", path = "../../phases" }
regex = "1.5.4"
repo_blobstore = { version = "0.1.0", path = "../../blobrepo/repo_blobstore" }
repo_cross_repo = { version = "0.1.0", path = "../../repo_attributes/repo_cross_repo" }
repo_derived_data = { version = "0.1.0", path = "../../repo_attributes/repo_derived_data" }
repo_identity = { version = "0.1.0", path = "../../repo_attributes/repo_identity" }
serde_json = { version = "1.0.64", features = ["float_roundtrip", "unbounded_depth"] }
skiplist = { version = "0.1.0", path = "../../reachabilityindex/skiplist" }

View File

@ -13,6 +13,7 @@ mononoke_app::subcommands! {
mod fetch;
mod list_repos;
mod mutable_renames;
mod redaction;
mod repo_info;
mod skiplist;
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
mod create_key_list;
mod list;
use anyhow::Result;
use clap::{Parser, Subcommand};
use mononoke_app::MononokeApp;
use create_key_list::{RedactionCreateKeyListArgs, RedactionCreateKeyListFromIdsArgs};
use list::RedactionListArgs;
/// Manage repository bookmarks
#[derive(Parser)]
pub struct CommandArgs {
#[clap(subcommand)]
subcommand: RedactionSubcommand,
}
#[derive(Subcommand)]
pub enum RedactionSubcommand {
/// Create a key list using files in a changeset.
CreateKeyList(RedactionCreateKeyListArgs),
/// Create a key list using content ids.
CreateKeyListFromIds(RedactionCreateKeyListFromIdsArgs),
/// List the redacted files in a commit.
List(RedactionListArgs),
}
pub async fn run(app: MononokeApp, args: CommandArgs) -> Result<()> {
let ctx = app.new_context();
match args.subcommand {
RedactionSubcommand::CreateKeyList(create_args) => {
create_key_list::create_key_list_from_commit_files(&ctx, &app, create_args).await?
}
RedactionSubcommand::CreateKeyListFromIds(create_args) => {
create_key_list::create_key_list_from_blobstore_keys(&ctx, &app, create_args).await?
}
RedactionSubcommand::List(list_args) => list::list(&ctx, &app, list_args).await?,
}
Ok(())
}

View File

@ -0,0 +1,258 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use anyhow::{anyhow, bail, Context, Result};
use blobstore::Storable;
use bookmarks::{BookmarkName, BookmarksRef};
use clap::{ArgGroup, Args};
use context::CoreContext;
use fsnodes::RootFsnodeId;
use futures::future::try_join;
use futures::stream::TryStreamExt;
use manifest::{Entry, ManifestOps};
use mononoke_app::args::{RepoArgs, RepoBlobstoreArgs};
use mononoke_app::MononokeApp;
use mononoke_types::{BlobstoreValue, ChangesetId, MPath, MononokeId, RedactionKeyList};
use repo_blobstore::RepoBlobstoreArc;
use repo_derived_data::RepoDerivedDataRef;
use super::list::paths_for_content_keys;
use crate::commit_id::parse_commit_id;
use crate::repo::AdminRepo;
#[derive(Args)]
#[clap(group(ArgGroup::new("files-input=file").args(&["files", "input-file"]).required(true)))]
pub struct RedactionCreateKeyListArgs {
#[clap(flatten)]
repo_args: RepoArgs,
#[clap(long, short = 'i')]
commit_id: String,
/// Fail if any of the content to be redacted is reachable from this main
/// bookmark unless --force is set.
#[clap(long, default_value = "master")]
main_bookmark: BookmarkName,
/// Force content redaction even if content is reachable from the main
/// bookmark.
#[clap(long)]
force: bool,
/// Name of a file with a list of filenames to redact.
#[clap(long, parse(from_os_str))]
input_file: Option<PathBuf>,
/// Name of a file to write the new key to.
#[clap(long, parse(from_os_str))]
output_file: Option<PathBuf>,
/// Files to redact
#[clap(value_name = "FILE")]
files: Vec<String>,
}
#[derive(Args)]
pub struct RedactionCreateKeyListFromIdsArgs {
#[clap(flatten)]
repo_blobstore_args: RepoBlobstoreArgs,
/// Blobstore keys to redact
#[clap(value_name = "KEY")]
keys: Vec<String>,
/// Name of a file to write the new key to.
#[clap(long, parse(from_os_str))]
output_file: Option<PathBuf>,
}
async fn create_key_list(
ctx: &CoreContext,
app: &MononokeApp,
keys: Vec<String>,
output_file: Option<&Path>,
) -> Result<()> {
let redaction_blobstore = app.redaction_config_blobstore().await?;
let darkstorm_blobstore = app.redaction_config_blobstore_for_darkstorm().await?;
let blob = RedactionKeyList { keys }.into_blob();
let (id1, id2) = try_join(
blob.clone().store(ctx, &redaction_blobstore),
blob.store(ctx, &darkstorm_blobstore),
)
.await?;
if id1 != id2 {
bail!(
"Id mismatch on darkstorm and non-darkstorm blobstores: {} vs {}",
id1,
id2
);
}
println!("Redaction saved as: {}", id1);
println!(concat!(
"To finish the redaction process, you need to commit this id to ",
"scm/mononoke/redaction/redaction_sets.cconf in configerator"
));
if let Some(output_file) = output_file {
let mut output = File::create(output_file).with_context(|| {
format!(
"Failed to open output file '{}'",
output_file.to_string_lossy()
)
})?;
output
.write_all(id1.to_string().as_bytes())
.with_context(|| {
format!(
"Failed to write to output file '{}'",
output_file.to_string_lossy()
)
})?;
}
Ok(())
}
/// Returns the content keys for the given paths.
async fn content_keys_for_paths(
ctx: &CoreContext,
repo: &AdminRepo,
cs_id: ChangesetId,
paths: Vec<MPath>,
) -> Result<HashSet<String>> {
let root_fsnode_id = repo
.repo_derived_data()
.derive::<RootFsnodeId>(ctx, cs_id)
.await?;
let path_content_keys = root_fsnode_id
.fsnode_id()
.find_entries(ctx.clone(), repo.repo_blobstore_arc(), paths.clone())
.try_filter_map(|(path, entry)| async move {
match (path, entry) {
(Some(path), Entry::Leaf(fsnode_file)) => {
Ok(Some((path, fsnode_file.content_id().blobstore_key())))
}
_ => Ok(None),
}
})
.try_collect::<HashMap<_, _>>()
.await?;
let mut missing_paths = 0;
for path in paths.iter() {
if !path_content_keys.contains_key(path) {
eprintln!("Missing file: {}", path);
missing_paths += 1;
}
}
if missing_paths > 0 {
bail!("Failed to find {} files in this commit", missing_paths);
}
Ok(path_content_keys.into_values().collect())
}
pub async fn create_key_list_from_commit_files(
ctx: &CoreContext,
app: &MononokeApp,
create_args: RedactionCreateKeyListArgs,
) -> Result<()> {
let mut files = create_args
.files
.iter()
.map(MPath::new)
.collect::<Result<Vec<_>>>()?;
if let Some(input_file) = create_args.input_file {
let input_file =
BufReader::new(File::open(input_file).context("Failed to open input file")?);
for line in input_file.lines() {
files.push(MPath::new(line?)?);
}
}
if files.is_empty() {
bail!("No files to redact");
}
let repo: AdminRepo = app
.open_repo(&create_args.repo_args)
.await
.context("Failed to open repo")?;
let cs_id = parse_commit_id(ctx, &repo, &create_args.commit_id).await?;
let keys = content_keys_for_paths(ctx, &repo, cs_id, files).await?;
println!(
"Checking redacted content doesn't exist in '{}' bookmark",
create_args.main_bookmark
);
let main_cs_id = repo
.bookmarks()
.get(ctx.clone(), &create_args.main_bookmark)
.await?
.ok_or_else(|| {
anyhow!(
"Main bookmark '{}' does not exist",
create_args.main_bookmark
)
})?;
let main_redacted = paths_for_content_keys(ctx, &repo, main_cs_id, &keys).await?;
if main_redacted.is_empty() {
println!(
"No files would be redacted in the main bookmark ({})",
create_args.main_bookmark
);
} else {
for (path, content_id) in main_redacted.iter() {
println!(
"Redacted content in main bookmark: {} {}",
path,
content_id.blobstore_key(),
);
}
if create_args.force {
println!(
"Creating key list despite {} files being redacted in the main bookmark ({}) (--force)",
main_redacted.len(),
create_args.main_bookmark
);
} else {
bail!(
"Refusing to create key list because {} files would be redacted in the main bookmark ({})",
main_redacted.len(),
create_args.main_bookmark
);
}
}
create_key_list(
ctx,
app,
keys.into_iter().collect(),
create_args.output_file.as_deref(),
)
.await
}
pub async fn create_key_list_from_blobstore_keys(
ctx: &CoreContext,
app: &MononokeApp,
create_args: RedactionCreateKeyListFromIdsArgs,
) -> Result<()> {
create_key_list(
ctx,
app,
create_args.keys,
create_args.output_file.as_deref(),
)
.await
}

View File

@ -0,0 +1,117 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::collections::HashSet;
use anyhow::{Context, Result};
use blobstore::Loadable;
use clap::Args;
use context::CoreContext;
use fsnodes::RootFsnodeId;
use futures::stream::TryStreamExt;
use manifest::ManifestOps;
use metaconfig_types::RepoConfigRef;
use mononoke_app::args::RepoArgs;
use mononoke_app::MononokeApp;
use mononoke_types::{ChangesetId, ContentId, MPath, MononokeId};
use repo_blobstore::{RepoBlobstoreArc, RepoBlobstoreRef};
use repo_derived_data::RepoDerivedDataRef;
use crate::commit_id::parse_commit_id;
use crate::repo::AdminRepo;
#[derive(Args)]
pub struct RedactionListArgs {
#[clap(flatten)]
repo_args: RepoArgs,
#[clap(long, short = 'i')]
commit_id: String,
}
/// Returns paths and content ids whose content matches the given keys in the
/// given commit
pub(super) async fn paths_for_content_keys(
ctx: &CoreContext,
repo: &AdminRepo,
cs_id: ChangesetId,
keys: &HashSet<String>,
) -> Result<Vec<(MPath, ContentId)>> {
let root_fsnode_id = repo
.repo_derived_data()
.derive::<RootFsnodeId>(ctx, cs_id)
.await?;
let file_count = root_fsnode_id
.fsnode_id()
.load(ctx, repo.repo_blobstore())
.await?
.summary()
.descendant_files_count;
let mut processed = 0;
let mut paths = Vec::new();
let mut entries = root_fsnode_id
.fsnode_id()
.list_leaf_entries(ctx.clone(), repo.repo_blobstore_arc());
while let Some((path, fsnode_file)) = entries.try_next().await? {
processed += 1;
if processed % 100_000 == 0 {
if paths.is_empty() {
println!("Processed files: {}/{}", processed, file_count);
} else {
println!(
"Processed files: {}/{} ({} found so far)",
processed,
file_count,
paths.len()
);
}
}
if keys.contains(&fsnode_file.content_id().blobstore_key()) {
paths.push((path, fsnode_file.content_id().clone()));
}
}
Ok(paths)
}
pub async fn list(
ctx: &CoreContext,
app: &MononokeApp,
list_args: RedactionListArgs,
) -> Result<()> {
let repo: AdminRepo = app
.open_repo(&list_args.repo_args)
.await
.context("Failed to open repo")?;
let cs_id = parse_commit_id(ctx, &repo, &list_args.commit_id).await?;
// We don't have a way to get the keys for the redacted blobs out of the
// repo blobstore, so we must ask the factory to load them again. Until
// SqlRedactedBlobs are removed, we need to know the metadata database
// config for this.
let db_config = &repo.repo_config().storage_config.metadata;
let redacted_blobs = app
.repo_factory()
.redacted_blobs(ctx.clone(), db_config)
.await?;
let redacted_map = redacted_blobs.redacted();
let keys = redacted_map.keys().cloned().collect();
println!("Searching for redacted paths in {}", cs_id);
let mut redacted_paths = paths_for_content_keys(ctx, &repo, cs_id, &keys).await?;
println!("Found {} redacted paths", redacted_paths.len());
redacted_paths.sort_by(|a, b| a.0.cmp(&b.0));
for (path, content_id) in redacted_paths {
if let Some(meta) = redacted_map.get(&content_id.blobstore_key()) {
let log_only = if meta.log_only { " (log only)" } else { "" };
println!("{:20}: {}{}", meta.task, path, log_only);
}
}
Ok(())
}

View File

@ -13,10 +13,12 @@ use bookmarks::{self, BookmarkUpdateLog, Bookmarks};
use changeset_fetcher::ChangesetFetcher;
use changesets::Changesets;
use ephemeral_blobstore::RepoEphemeralStore;
use metaconfig_types::RepoConfig;
use mutable_renames::MutableRenames;
use phases::Phases;
use repo_blobstore::RepoBlobstore;
use repo_cross_repo::RepoCrossRepo;
use repo_derived_data::RepoDerivedData;
use repo_identity::RepoIdentity;
/// Repository object for admin commands.
@ -26,6 +28,9 @@ pub struct AdminRepo {
#[facet]
pub repo_identity: RepoIdentity,
#[facet]
pub repo_config: RepoConfig,
#[facet]
pub bonsai_hg_mapping: dyn BonsaiHgMapping,
@ -59,6 +64,9 @@ pub struct AdminRepo {
#[facet]
pub phases: dyn Phases,
#[facet]
pub repo_derived_data: RepoDerivedData,
#[facet]
pub repo_cross_repo: RepoCrossRepo,