newstore: introduce new StoreFile type with strongly-typed redaction, LFS Pointers, and copy-header support

Summary:
Introduce a new `StoreFile` type for the `revisionstore` crate. This is an enum of `File`, `LfsPointer`, and `RedactedFile`, which represent the different cases the `Entry` type's `content` might actually represent. The `File` variant also handles stripping the copy header, if present, and stores both the `copied_from`, raw unstripped `content`, and stripped `content`, though only the latter can be accessed through the public API right now. Ideally, we'll move copy information into the history API and never actually need to make it public here.

Conversions are provided from `Entry` and EdenApi's `FileEntry` type (which also supports redaction and LFS pointers, but as errors instead of first-class values).

Modify the output type used in the Python bindings to use this new `StoreFile` type.

Currently, the `LfsPointer` variant is unused. This will be used later when we add first-class LFS support.

Reviewed By: kulshrax

Differential Revision: D26862710

fbshipit-source-id: 8326921f3ee43bf2e253847d5735c61f5a50bfa6
This commit is contained in:
Meyer Jacobs 2021-03-24 12:51:48 -07:00 committed by Facebook GitHub Bot
parent dbc5d45e72
commit b7e4757faa
6 changed files with 189 additions and 9 deletions

View File

@ -36,7 +36,7 @@ use revisionstore::{
indexedlogdatastore::Entry,
newstore::{
BoxedReadStore, BoxedWriteStore, Fallback, FallbackCache, FilterMapStore, KeyStream,
LegacyDatastore,
LegacyDatastore, StoreFile,
},
repack, util, ContentStore, ContentStoreBuilder, CorruptionPolicy, DataPack, DataPackStore,
DataPackVersion, Delta, EdenApiFileStore, EdenApiTreeStore, ExtStoredPolicy, HgIdDataStore,
@ -1149,7 +1149,7 @@ fn make_newfilestore<'a>(
edenapi_filestore: Option<Arc<EdenApiFileStore>>,
suffix: Option<String>,
correlator: Option<String>,
) -> Result<(BoxedReadStore<Key, Entry>, Arc<ContentStore>)> {
) -> Result<(BoxedReadStore<Key, StoreFile>, Arc<ContentStore>)> {
// Construct ContentStore
let mut builder = ContentStoreBuilder::new(&config).correlator(correlator);
@ -1265,7 +1265,7 @@ fn make_newfilestore<'a>(
}
py_class!(pub class newfilestore |py| {
data store: BoxedReadStore<Key, Entry>;
data store: BoxedReadStore<Key, StoreFile>;
data contentstore: Arc<ContentStore>;
def __new__(_cls,
@ -1308,7 +1308,7 @@ py_class!(pub class newfilestore |py| {
});
impl ExtractInnerRef for newfilestore {
type Inner = BoxedReadStore<Key, Entry>;
type Inner = BoxedReadStore<Key, StoreFile>;
fn extract_inner_ref<'a>(&'a self, py: Python<'a>) -> &'a Self::Inner {
self.store(py)

View File

@ -130,6 +130,10 @@ impl FileEntry {
pub fn metadata(&self) -> &Metadata {
&self.metadata
}
pub fn parents(&self) -> &Parents {
&self.parents
}
}
#[cfg(any(test, feature = "for-tests"))]

View File

@ -5,10 +5,11 @@
* GNU General Public License version 2.
*/
use std::convert::From;
use std::convert::{From, Into, TryFrom, TryInto};
use std::fmt;
use std::sync::Arc;
use anyhow::Error;
use futures::{channel::mpsc::channel, SinkExt, StreamExt, TryStreamExt};
use tracing::error;
@ -46,7 +47,11 @@ where
// Write Value Type (must support conversion from fallback)
VW: Send + Sync + Clone + From<VF> + 'static,
// Output Value Type (must support conversion from preferred & fallback)
VO: Send + Sync + Clone + From<VF> + From<VP> + 'static,
VO: Send + Sync + Clone + TryFrom<VF> + TryFrom<VP> + 'static,
// TODO(meyer): For now, we just require the conversion errors to convertible to anyhow::Error
// We can probably loosen this later. In particular, we want to associate the key, at least.
<VO as TryFrom<VF>>::Error: Into<Error>,
<VO as TryFrom<VP>>::Error: Into<Error>,
{
fn fetch_stream(self: Arc<Self>, keys: KeyStream<K>) -> FetchStream<K, VO> {
// TODO(meyer): Write a custom Stream implementation to try to avoid use of channels
@ -62,7 +67,7 @@ where
use FetchError::*;
match res {
// Convert preferred values into output values
Ok(v) => Some(Ok(v.into())),
Ok(v) => Some(v.try_into().map_err(FetchError::from)),
// TODO(meyer): Looks like we aren't up to date with futures crate, missing "feed" method, which is probably better here.
// I think this might serialize the fallback stream as-written.
Err(NotFound(k)) => match sender.send(k.clone()).await {
@ -90,7 +95,7 @@ where
error!({ error = %e }, "error writing fallback value to channel");
}
// Convert fallback values to output values
Ok(v.into())
v.try_into().map_err(FetchError::from)
}
});
@ -110,7 +115,7 @@ where
// Convert fallback values to output values
Box::pin(select_drop(
preferred_stream,
fallback_stream.map_ok(|v| v.into()),
fallback_stream.map(|r| r.and_then(|v| v.try_into().map_err(FetchError::from))),
))
}
}

View File

@ -20,6 +20,7 @@ pub use self::{
filter_map::FilterMapStore,
inmemory::{HashMapStore, KeyedValue},
legacy::LegacyDatastore,
types::StoreFile,
};
pub mod edenapi;
@ -27,6 +28,7 @@ pub mod fallback;
pub mod filter_map;
pub mod inmemory;
pub mod legacy;
pub mod types;
/// A pinned, boxed stream of keys to fetch.
pub type KeyStream<K> = BoxStream<'static, K>;

View File

@ -0,0 +1,168 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::convert::TryFrom;
use anyhow::Error;
use edenapi_types::{FileEntry as EdenApiFileEntry, FileError as EdenApiFileError};
use minibytes::Bytes;
use types::{Key, Parents};
use crate::{
datastore::{strip_metadata, Metadata},
indexedlogdatastore::Entry,
redacted::is_redacted,
};
/// A strongly-typed file entry type. Like EdenApi's FileEntry type, but intended to support
/// Mercurial's local use cases rather than communication with EdenApi. Unlike EdenApi's FileEntry,
/// `RedactedFile` and `LfsPointer` are expressed as enum variants, rather than as errors when attempting
/// to read the file blob.
#[derive(Clone, Debug)]
pub struct StoreFile {
key: Option<Key>,
parents: Option<Parents>,
entry_metadata: Option<Metadata>,
/// The meaning of the raw_content field depends on the StoreFileKind
raw_content: Option<Bytes>,
kind: StoreFileKind,
}
/// The different kinds of "file-like" entities you might come across in various file-oriented APIs
#[derive(Clone, Debug)]
enum StoreFileKind {
// TODO(meyer): Do we need a separate "LfsFile" variant?
/// A file. May be LFS or non-LFS, but its contents are immediately available without
/// access to another store, unlike an LfsPointer.
File {
stripped_content: Option<Bytes>,
copied_from: Option<Key>,
},
// TODO(meyer): Parse out the LfsPointersEntry?
/// An LFS Pointer. Contains the content-based hashes used to look up an LFS File.
LfsPointer,
/// A redacted file. The contents of a redacted file are no longer accessible, and instead are
/// replaced with a special "tombstone" string.
RedactedFile,
}
impl TryFrom<Entry> for StoreFile {
type Error = Error;
fn try_from(mut v: Entry) -> Result<Self, Self::Error> {
let raw_content = v.content()?;
let key = v.key().clone();
let entry_metadata = v.metadata().clone();
if is_redacted(&raw_content) {
return Ok(StoreFile {
key: Some(key),
parents: None,
raw_content: Some(raw_content),
entry_metadata: Some(entry_metadata),
kind: StoreFileKind::RedactedFile,
});
}
// TODO(meyer): Delete when ExtStoredPolicy is removed.
if entry_metadata.is_lfs() {
return Ok(StoreFile {
key: Some(key),
parents: None,
entry_metadata: Some(entry_metadata),
raw_content: Some(raw_content),
kind: StoreFileKind::LfsPointer,
});
}
let (stripped, copied) = strip_metadata(&raw_content)?;
Ok(StoreFile {
key: Some(key),
parents: None,
entry_metadata: Some(entry_metadata),
raw_content: Some(raw_content),
kind: StoreFileKind::File {
stripped_content: Some(stripped),
copied_from: copied,
},
})
}
}
impl TryFrom<EdenApiFileEntry> for StoreFile {
type Error = Error;
fn try_from(v: EdenApiFileEntry) -> Result<Self, Self::Error> {
// TODO(meyer): Optimize this to remove unnecessary clones.
use EdenApiFileError::*;
v.data_checked().map_or_else(
|e| match e {
Corrupt(_) => Err(Error::from(e)),
Redacted(key, raw_content) => Ok(StoreFile {
key: Some(key),
parents: Some(v.parents().clone()),
raw_content: Some(raw_content.into()),
entry_metadata: Some(v.metadata().clone()),
kind: StoreFileKind::RedactedFile,
}),
Lfs(key, raw_content) => Ok(StoreFile {
key: Some(key),
parents: Some(v.parents().clone()),
raw_content: Some(raw_content.into()),
entry_metadata: Some(v.metadata().clone()),
kind: StoreFileKind::LfsPointer,
}),
},
|raw_content_checked| {
let raw_content_checked = raw_content_checked.into();
let (stripped, copied) = strip_metadata(&raw_content_checked)?;
Ok(StoreFile {
key: Some(v.key().clone()),
parents: Some(v.parents().clone()),
entry_metadata: Some(v.metadata().clone()),
raw_content: Some(raw_content_checked),
kind: StoreFileKind::File {
stripped_content: Some(stripped),
copied_from: copied,
},
})
},
)
}
}
impl StoreFile {
pub fn key(&self) -> Option<&Key> {
self.key.as_ref()
}
/// The "logical" file content, as it will be written to a checkout, stripped of copy headers.
///
/// Currently, this method will return the copy-header-stripped file contents for files,
/// the redaction tombstone for "redacted" files, and None for LFS Pointers.
pub fn content(&self) -> Option<&Bytes> {
use StoreFileKind::*;
match self.kind {
File {
stripped_content: ref c,
..
} => c.as_ref(),
RedactedFile => self.raw_content.as_ref(),
// LFS pointers return None, they have no available content to be placed in the filesystem.
_ => None,
}
}
pub fn entry_metadata(&self) -> Option<&Metadata> {
self.entry_metadata.as_ref()
}
}

View File

@ -7,6 +7,7 @@
use minibytes::Bytes;
/// TODO(T48685378): Handle redacted content in a less hacky way.
pub static REDACTED_CONTENT: &[u8] = b"PoUOK1GkdH6Xtx5j9WKYew3dZXspyfkahcNkhV6MJ4rhyNICTvX0nxmbCImFoT0oHAF9ivWGaC6ByswQZUgf1nlyxcDcahHknJS15Vl9Lvc4NokYhMg0mV1rapq1a4bhNoUI9EWTBiAkYmkadkO3YQXV0TAjyhUQWxxLVskjOwiiFPdL1l1pdYYCLTE3CpgOoxQV3EPVxGUPh1FGfk7F9Myv22qN1sUPSNN4h3IFfm2NNPRFgWPDsqAcaQ7BUSKa\n";
static REDACTED_MESSAGE: &[u8] = b"This version of the file is redacted and you are not allowed to access it. Update or rebase to a newer commit.\n";