mononoke: speed up file hooks

Summary:
Previously file hook would look up a file name from the root of the repo on
every hook run. So if we add or modify a lot of files in  the same directory,
then the manifest for this directory will be parsed over and over again.

There is not need to do it, since we can just use file node id of the file.
This diff does exactly that - now HookFile will use FileNodeId instead of
(HgChangesetId + MPath) to look up a file content.

Reviewed By: krallin

Differential Revision: D15349196

fbshipit-source-id: 503109fc87ccf2659d481eeca165044baa440463
This commit is contained in:
Stanislau Hlebik 2019-05-29 10:20:42 -07:00 committed by Facebook Github Bot
parent 2f7a107fa7
commit 0d383299c9
4 changed files with 233 additions and 157 deletions

View File

@ -16,9 +16,9 @@ use failure_ext::Error;
use futures::{finished, Future, Stream};
use futures_ext::{BoxFuture, FutureExt};
use hooks::{ChangedFileType, ChangesetStore, FileContentStore};
use mercurial_types::manifest_utils;
use mercurial_types::manifest_utils::{self, EntryStatus};
use mercurial_types::{
manifest::get_empty_manifest, Changeset, HgChangesetId, HgFileNodeId, MPath,
manifest::get_empty_manifest, Changeset, HgChangesetId, HgFileNodeId, MPath, Type,
};
use mononoke_types::{FileContents, FileType};
@ -66,32 +66,19 @@ impl FileContentStore for BlobRepoFileContentStore {
.boxify()
}
fn get_file_type(
fn get_file_content_by_id(
&self,
ctx: CoreContext,
changesetid: HgChangesetId,
path: MPath,
) -> BoxFuture<Option<FileType>, Error> {
find_file_in_repo(ctx.clone(), self.repo.clone(), changesetid, path)
.map(move |opt| opt.map(|(file_type, _)| file_type))
hash: HgFileNodeId,
) -> BoxFuture<Bytes, Error> {
self.repo
.get_file_content(ctx, hash)
.map(|FileContents::Bytes(bytes)| bytes)
.boxify()
}
fn get_file_size(
&self,
ctx: CoreContext,
changesetid: HgChangesetId,
path: MPath,
) -> BoxFuture<Option<u64>, Error> {
find_file_in_repo(ctx.clone(), self.repo.clone(), changesetid, path)
.and_then({
cloned!(self.repo);
move |opt| match opt {
Some((_, hash)) => repo.get_file_size(ctx, hash).map(Some).left_future(),
None => finished(None).right_future(),
}
})
.boxify()
fn get_file_size(&self, ctx: CoreContext, hash: HgFileNodeId) -> BoxFuture<u64, Error> {
self.repo.get_file_size(ctx, hash).boxify()
}
}
@ -114,7 +101,7 @@ impl ChangesetStore for BlobRepoChangesetStore {
&self,
ctx: CoreContext,
changesetid: HgChangesetId,
) -> BoxFuture<Vec<(String, ChangedFileType)>, Error> {
) -> BoxFuture<Vec<(String, ChangedFileType, Option<(HgFileNodeId, FileType)>)>, Error> {
cloned!(self.repo);
self.repo
.get_changeset_by_changesetid(ctx.clone(), changesetid)
@ -145,8 +132,30 @@ impl ChangesetStore for BlobRepoChangesetStore {
let path = changed_entry
.get_full_path()
.expect("File should have a path");
let ty = ChangedFileType::from(changed_entry.status);
(String::from_utf8_lossy(&path.to_vec()).into_owned(), ty)
let entry = match &changed_entry.status {
EntryStatus::Added(entry) => Some(entry),
EntryStatus::Deleted(_entry) => None,
EntryStatus::Modified { to_entry, .. } => Some(to_entry),
};
let hash_and_type = entry.map(|entry| {
let file_type = match entry.get_type() {
Type::File(file_type) => file_type,
Type::Tree => {
panic!("unexpected tree returned");
}
};
let filenode = HgFileNodeId::new(entry.get_hash().into_nodehash());
(filenode, file_type)
});
let change_ty = ChangedFileType::from(changed_entry.status);
(
String::from_utf8_lossy(&path.to_vec()).into_owned(),
change_ty,
hash_and_type,
)
})
.collect()
})

View File

@ -22,6 +22,7 @@ use hooks::{InMemoryChangesetStore, InMemoryFileContentStore};
use hooks_content_stores::{BlobRepoChangesetStore, BlobRepoFileContentStore};
use maplit::{hashmap, hashset};
use mercurial_types::{HgChangesetId, MPath};
use mercurial_types_mocks::nodehash::{ONES_FNID, THREES_FNID, TWOS_FNID};
use metaconfig_types::{
BlobConfig, BookmarkOrRegex, BookmarkParams, Bundle2ReplayParams, HookConfig, HookParams,
HookType, MetadataDBConfig, RepoConfig, RepoReadOnly, StorageConfig,
@ -500,20 +501,21 @@ fn test_changeset_hook_context() {
async_unit::tokio_unit_test(|| {
let ctx = CoreContext::test_mock();
let files = vec![
"dir1/subdir1/subsubdir1/file_1".to_string(),
"dir1/subdir1/subsubdir2/file_1".to_string(),
"dir1/subdir1/subsubdir2/file_2".to_string(),
("dir1/subdir1/subsubdir1/file_1".to_string(), ONES_FNID),
("dir1/subdir1/subsubdir2/file_1".to_string(), TWOS_FNID),
("dir1/subdir1/subsubdir2/file_2".to_string(), THREES_FNID),
];
let content_store = Arc::new(InMemoryFileContentStore::new());
let cs_id = default_changeset_id();
let hook_files = files
.iter()
.map(|path| {
.map(|(path, entry_id)| {
HookFile::new(
path.clone(),
content_store.clone(),
cs_id,
ChangedFileType::Added,
Some((*entry_id, FileType::Regular)),
)
})
.collect();
@ -1147,6 +1149,11 @@ fn hook_manager_blobrepo() -> HookManager {
)
}
fn to_mpath(string: &str) -> MPath {
// Please... avert your eyes
MPath::new(string.to_string().as_bytes().to_vec()).unwrap()
}
fn hook_manager_inmem() -> HookManager {
let ctx = CoreContext::test_mock();
let repo = many_files_dirs::getrepo(None);
@ -1157,20 +1164,46 @@ fn hook_manager_inmem() -> HookManager {
.wait()
.unwrap();
let mut changeset_store = InMemoryChangesetStore::new();
changeset_store.insert(cs_id, &cs);
changeset_store.insert_changeset(cs_id, cs);
let files = vec![
(
"dir1/subdir1/subsubdir1/file_1".to_string(),
ChangedFileType::Added,
Some((ONES_FNID, FileType::Symlink)),
),
(
"dir1/subdir1/subsubdir2/file_1".to_string(),
ChangedFileType::Added,
Some((TWOS_FNID, FileType::Regular)),
),
(
"dir1/subdir1/subsubdir2/file_2".to_string(),
ChangedFileType::Added,
Some((THREES_FNID, FileType::Regular)),
),
];
changeset_store.insert_files(cs_id, files);
let mut content_store = InMemoryFileContentStore::new();
content_store.insert(
(cs_id.clone(), to_mpath("dir1/subdir1/subsubdir1/file_1")),
(FileType::Symlink, "elephants".into()),
cs_id.clone(),
to_mpath("dir1/subdir1/subsubdir1/file_1"),
ONES_FNID,
"elephants".into(),
);
content_store.insert(
(cs_id.clone(), to_mpath("dir1/subdir1/subsubdir2/file_1")),
(FileType::Regular, "hippopatami".into()),
cs_id.clone(),
to_mpath("dir1/subdir1/subsubdir2/file_1"),
TWOS_FNID,
"hippopatami".into(),
);
content_store.insert(
(cs_id.clone(), to_mpath("dir1/subdir1/subsubdir2/file_2")),
(FileType::Regular, "eels".into()),
cs_id,
to_mpath("dir1/subdir1/subsubdir2/file_2"),
THREES_FNID,
"eels".into(),
);
let logger = Logger::root(Discard {}.ignore_res(), o!());
HookManager::new(
ctx,
@ -1181,11 +1214,6 @@ fn hook_manager_inmem() -> HookManager {
)
}
fn to_mpath(string: &str) -> MPath {
// Please... avert your eyes
MPath::new(string.to_string().as_bytes().to_vec()).unwrap()
}
fn default_repo_config() -> RepoConfig {
RepoConfig {
storage_config: StorageConfig {

View File

@ -29,9 +29,11 @@ use cloned::cloned;
use context::CoreContext;
pub use errors::*;
use failure_ext::{err_msg, Error, FutureFailureErrorExt};
use futures::{failed, finished, Future, IntoFuture};
use futures::{finished, future, Future, IntoFuture};
use futures_ext::{try_boxfuture, BoxFuture, FutureExt};
use mercurial_types::{manifest_utils::EntryStatus, Changeset, HgChangesetId, HgParents, MPath};
use mercurial_types::{
manifest_utils::EntryStatus, Changeset, HgChangesetId, HgFileNodeId, HgParents, MPath,
};
use metaconfig_types::{BookmarkOrRegex, HookBypass, HookConfig, HookManagerParams};
use mononoke_types::FileType;
use regex::Regex;
@ -451,8 +453,14 @@ impl HookManager {
let author = str::from_utf8(changeset.user())?.into();
let files = changed_files
.into_iter()
.map(|(path, ty)| {
HookFile::new(path, content_store.clone(), changeset_id.clone(), ty)
.map(|(path, ty, hash_and_type)| {
HookFile::new(
path,
content_store.clone(),
changeset_id.clone(),
ty,
hash_and_type,
)
})
.collect();
let comments = str::from_utf8(changeset.comments())?.into();
@ -581,6 +589,7 @@ pub struct HookFile {
content_store: Arc<FileContentStore>,
changeset_id: HgChangesetId,
ty: ChangedFileType,
hash_and_type: Option<(HgFileNodeId, FileType)>,
}
impl fmt::Debug for HookFile {
@ -620,12 +629,14 @@ impl HookFile {
content_store: Arc<FileContentStore>,
changeset_id: HgChangesetId,
ty: ChangedFileType,
hash_and_type: Option<(HgFileNodeId, FileType)>,
) -> HookFile {
HookFile {
path,
content_store,
changeset_id,
ty,
hash_and_type,
}
}
@ -641,34 +652,35 @@ impl HookFile {
pub fn len(&self, ctx: CoreContext) -> BoxFuture<u64, Error> {
let path = try_boxfuture!(MPath::new(self.path.as_bytes()));
cloned!(self.changeset_id);
self.content_store
.get_file_size(ctx, changeset_id, path.clone())
.and_then(move |opt| {
opt.ok_or(ErrorKind::MissingFile(changeset_id, path.into()).into())
})
.boxify()
match self.hash_and_type {
Some((entry_id, _)) => self.content_store.get_file_size(ctx, entry_id).boxify(),
None => {
future::err(ErrorKind::MissingFile(self.changeset_id, path.into()).into()).boxify()
}
}
}
pub fn file_content(&self, ctx: CoreContext) -> BoxFuture<Bytes, Error> {
let path = try_boxfuture!(MPath::new(self.path.as_bytes()));
cloned!(self.changeset_id);
self.content_store
.get_file_content(ctx, changeset_id, path.clone())
.and_then(move |opt| {
opt.ok_or(ErrorKind::MissingFile(changeset_id, path.into()).into())
})
.boxify()
match self.hash_and_type {
Some((entry_id, _)) => self
.content_store
.get_file_content_by_id(ctx, entry_id)
.boxify(),
None => {
future::err(ErrorKind::MissingFile(self.changeset_id, path.into()).into()).boxify()
}
}
}
pub fn file_type(&self, ctx: CoreContext) -> BoxFuture<FileType, Error> {
pub fn file_type(&self, _ctx: CoreContext) -> BoxFuture<FileType, Error> {
let path = try_boxfuture!(MPath::new(self.path.as_bytes()));
cloned!(self.changeset_id);
self.content_store
.get_file_type(ctx, changeset_id, path.clone())
.and_then(move |opt| {
opt.ok_or(ErrorKind::MissingFile(changeset_id, path.into()).into())
})
self.hash_and_type
.ok_or(ErrorKind::MissingFile(changeset_id, path.into()).into())
.into_future()
.map(|(_, file_type)| file_type)
.boxify()
}
@ -763,11 +775,13 @@ pub trait ChangesetStore: Send + Sync {
&self,
ctx: CoreContext,
changesetid: HgChangesetId,
) -> BoxFuture<Vec<(String, ChangedFileType)>, Error>;
) -> BoxFuture<Vec<(String, ChangedFileType, Option<(HgFileNodeId, FileType)>)>, Error>;
}
pub struct InMemoryChangesetStore {
map: HashMap<HgChangesetId, HgBlobChangeset>,
map_files:
HashMap<HgChangesetId, Vec<(String, ChangedFileType, Option<(HgFileNodeId, FileType)>)>>,
map_cs: HashMap<HgChangesetId, HgBlobChangeset>,
}
impl ChangesetStore for InMemoryChangesetStore {
@ -776,9 +790,9 @@ impl ChangesetStore for InMemoryChangesetStore {
_ctx: CoreContext,
changesetid: HgChangesetId,
) -> BoxFuture<HgBlobChangeset, Error> {
match self.map.get(&changesetid) {
Some(cs) => Box::new(finished(cs.clone())),
None => Box::new(failed(
match self.map_cs.get(&changesetid) {
Some(cs) => Box::new(future::ok(cs.clone())),
None => Box::new(future::err(
ErrorKind::NoSuchChangeset(changesetid.to_string()).into(),
)),
}
@ -788,16 +802,10 @@ impl ChangesetStore for InMemoryChangesetStore {
&self,
_ctx: CoreContext,
changesetid: HgChangesetId,
) -> BoxFuture<Vec<(String, ChangedFileType)>, Error> {
match self.map.get(&changesetid) {
Some(cs) => Box::new(finished(
cs.files()
.into_iter()
.map(|arr| String::from_utf8_lossy(&arr.to_vec()).into_owned())
.map(|path| (path, ChangedFileType::Added))
.collect(),
)),
None => Box::new(failed(
) -> BoxFuture<Vec<(String, ChangedFileType, Option<(HgFileNodeId, FileType)>)>, Error> {
match self.map_files.get(&changesetid) {
Some(files) => future::ok(files.clone()).boxify(),
None => Box::new(future::err(
ErrorKind::NoSuchChangeset(changesetid.to_string()).into(),
)),
}
@ -807,12 +815,21 @@ impl ChangesetStore for InMemoryChangesetStore {
impl InMemoryChangesetStore {
pub fn new() -> InMemoryChangesetStore {
InMemoryChangesetStore {
map: HashMap::new(),
map_cs: HashMap::new(),
map_files: HashMap::new(),
}
}
pub fn insert(&mut self, changeset_id: HgChangesetId, changeset: &HgBlobChangeset) {
self.map.insert(changeset_id.clone(), changeset.clone());
pub fn insert_files(
&mut self,
changeset_id: HgChangesetId,
files: Vec<(String, ChangedFileType, Option<(HgFileNodeId, FileType)>)>,
) {
self.map_files.insert(changeset_id.clone(), files);
}
pub fn insert_changeset(&mut self, changeset_id: HgChangesetId, cs: HgBlobChangeset) {
self.map_cs.insert(changeset_id.clone(), cs);
}
}
@ -824,76 +841,65 @@ pub trait FileContentStore: Send + Sync {
path: MPath,
) -> BoxFuture<Option<Bytes>, Error>;
fn get_file_type(
fn get_file_content_by_id(
&self,
ctx: CoreContext,
changesetid: HgChangesetId,
path: MPath,
) -> BoxFuture<Option<FileType>, Error>;
entry_id: HgFileNodeId,
) -> BoxFuture<Bytes, Error>;
fn get_file_size(
&self,
ctx: CoreContext,
changesetid: HgChangesetId,
path: MPath,
) -> BoxFuture<Option<u64>, Error>;
fn get_file_size(&self, ctx: CoreContext, entry_id: HgFileNodeId) -> BoxFuture<u64, Error>;
}
#[derive(Clone)]
pub struct InMemoryFileContentStore {
map: HashMap<(HgChangesetId, MPath), (FileType, Bytes)>,
entry_id_to_bytes: HashMap<HgFileNodeId, Bytes>,
path_to_bytes: HashMap<(HgChangesetId, MPath), Bytes>,
}
impl FileContentStore for InMemoryFileContentStore {
fn get_file_content(
&self,
_ctx: CoreContext,
changesetid: HgChangesetId,
cs_id: HgChangesetId,
path: MPath,
) -> BoxFuture<Option<Bytes>, Error> {
let opt = self
.map
.get(&(changesetid, path.clone()))
.map(|(_, bytes)| bytes.clone());
finished(opt).boxify()
future::ok(self.path_to_bytes.get(&(cs_id, path)).cloned()).boxify()
}
fn get_file_type(
fn get_file_content_by_id(
&self,
_ctx: CoreContext,
changesetid: HgChangesetId,
path: MPath,
) -> BoxFuture<Option<FileType>, Error> {
let opt = self
.map
.get(&(changesetid, path.clone()))
.map(|(file_type, _)| file_type.clone());
finished(opt).boxify()
hash: HgFileNodeId,
) -> BoxFuture<Bytes, Error> {
self.entry_id_to_bytes
.get(&hash)
.map(|bytes| bytes.clone())
.ok_or(err_msg("file not found"))
.into_future()
.boxify()
}
fn get_file_size(
&self,
_ctx: CoreContext,
changesetid: HgChangesetId,
path: MPath,
) -> BoxFuture<Option<u64>, Error> {
let opt = self
.map
.get(&(changesetid, path.clone()))
.map(|(_, bytes)| bytes.len() as u64);
finished(opt).boxify()
fn get_file_size(&self, _ctx: CoreContext, hash: HgFileNodeId) -> BoxFuture<u64, Error> {
self.entry_id_to_bytes
.get(&hash)
.map(|bytes| bytes.len() as u64)
.ok_or(err_msg("file not found"))
.into_future()
.boxify()
}
}
impl InMemoryFileContentStore {
pub fn new() -> InMemoryFileContentStore {
InMemoryFileContentStore {
map: HashMap::new(),
entry_id_to_bytes: HashMap::new(),
path_to_bytes: HashMap::new(),
}
}
pub fn insert(&mut self, key: (HgChangesetId, MPath), content: (FileType, Bytes)) {
self.map.insert(key, content);
pub fn insert(&mut self, cs_id: HgChangesetId, path: MPath, key: HgFileNodeId, content: Bytes) {
self.entry_id_to_bytes.insert(key, content.clone());
self.path_to_bytes.insert((cs_id, path), content);
}
}

View File

@ -444,15 +444,10 @@ mod test {
use bytes::Bytes;
use failure_ext::err_downcast;
use futures::Future;
use mercurial_types::HgChangesetId;
use mercurial_types::{HgChangesetId, MPath};
use std::str::FromStr;
use std::sync::Arc;
fn to_mpath(string: &str) -> MPath {
// Please... avert your eyes
MPath::new(string.to_string().as_bytes().to_vec()).unwrap()
}
#[test]
fn test_cs_hook_simple_rejected() {
async_unit::tokio_unit_test(|| {
@ -470,6 +465,25 @@ mod test {
});
}
#[test]
fn test_cs_hook_simple_fails_on_deleted_read() {
async_unit::tokio_unit_test(|| {
let ctx = CoreContext::test_mock();
let changeset = default_changeset();
let code = String::from(
"hook = function (ctx)\n\
for _, file in ipairs(ctx.files) do\n\
if file.path == \"deleted\" then\n\
file:file_content()\n\
end\n\
end\n\
return true\n\
end",
);
assert!(run_changeset_hook(ctx.clone(), code, changeset).is_err());
});
}
#[test]
fn test_cs_hook_reviewers() {
async_unit::tokio_unit_test(|| {
@ -1667,37 +1681,57 @@ end"#;
hook.run(ctx, context).wait()
}
use mercurial_types::HgFileNodeId;
use mercurial_types_mocks::nodehash::{
FIVES_FNID, FOURS_FNID, ONES_FNID, THREES_FNID, TWOS_FNID,
};
fn default_changeset() -> HookChangeset {
let added = vec!["file1".into(), "file2".into(), "file3".into()];
let deleted = vec!["deleted".into()];
let modified = vec!["modified".into()];
let added = vec![
("file1".into(), ONES_FNID),
("file2".into(), TWOS_FNID),
("file3".into(), THREES_FNID),
];
let deleted = vec![("deleted".into(), FOURS_FNID)];
let modified = vec![("modified".into(), FIVES_FNID)];
create_hook_changeset(added, deleted, modified)
}
fn to_mpath(string: &str) -> MPath {
// Please... avert your eyes
MPath::new(string.to_string().as_bytes().to_vec()).unwrap()
}
fn create_hook_changeset(
added: Vec<String>,
deleted: Vec<String>,
modified: Vec<String>,
added: Vec<(String, HgFileNodeId)>,
deleted: Vec<(String, HgFileNodeId)>,
modified: Vec<(String, HgFileNodeId)>,
) -> HookChangeset {
let mut content_store = InMemoryFileContentStore::new();
let cs_id = HgChangesetId::from_str("473b2e715e0df6b2316010908879a3c78e275dd9").unwrap();
for path in added.iter().chain(modified.iter()) {
for (path, entry_id) in added.iter().chain(modified.iter()) {
let content = path.clone() + "sausages";
let content_bytes: Bytes = content.into();
content_store.insert(
(cs_id.clone(), to_mpath(&path)),
(FileType::Regular, content_bytes.into()),
);
content_store.insert(cs_id, to_mpath(path), *entry_id, content_bytes.into());
}
let content_store = Arc::new(content_store);
let content_store2 = content_store.clone();
let create_hook_files = move |files: Vec<String>, ty: ChangedFileType| -> Vec<HookFile> {
files
.into_iter()
.map(|path| HookFile::new(path.clone(), content_store.clone(), cs_id, ty.clone()))
.collect()
};
let create_hook_files =
move |files: Vec<(String, HgFileNodeId)>, ty: ChangedFileType| -> Vec<HookFile> {
files
.into_iter()
.map(|(path, hash)| {
HookFile::new(
path.clone(),
content_store.clone(),
cs_id,
ty.clone(),
Some((hash, FileType::Regular)),
)
})
.collect()
};
let mut hook_files = vec![];
hook_files.extend(create_hook_files(added, ChangedFileType::Added));
@ -1718,30 +1752,28 @@ end"#;
fn default_hook_symlink_file() -> HookFile {
let mut content_store = InMemoryFileContentStore::new();
let cs_id = HgChangesetId::from_str("473b2e715e0df6b2316010908879a3c78e275dd9").unwrap();
content_store.insert(
(cs_id.clone(), to_mpath("/a/b/c.txt")),
(FileType::Symlink, "sausages".into()),
);
let path = "/a/b/c.txt";
content_store.insert(cs_id.clone(), to_mpath(path), ONES_FNID, "sausages".into());
HookFile::new(
"/a/b/c.txt".into(),
path.into(),
Arc::new(content_store),
cs_id,
ChangedFileType::Added,
Some((ONES_FNID, FileType::Symlink)),
)
}
fn default_hook_added_file() -> HookFile {
let mut content_store = InMemoryFileContentStore::new();
let cs_id = HgChangesetId::from_str("473b2e715e0df6b2316010908879a3c78e275dd9").unwrap();
content_store.insert(
(cs_id.clone(), to_mpath("/a/b/c.txt")),
(FileType::Regular, "sausages".into()),
);
let path = "/a/b/c.txt";
content_store.insert(cs_id.clone(), to_mpath(path), ONES_FNID, "sausages".into());
HookFile::new(
"/a/b/c.txt".into(),
path.into(),
Arc::new(content_store),
cs_id,
ChangedFileType::Added,
Some((ONES_FNID, FileType::Regular)),
)
}
@ -1753,6 +1785,7 @@ end"#;
Arc::new(content_store),
cs_id,
ChangedFileType::Deleted,
None,
)
}