hgcommits: implement strip_commits for testing

Summary:
About 64 tests depend on the revlog `strip` behavior. `strip` is not used in
production client-repos.  I tried to migrate them off `strip` but that seems
too much work for now. Instead let's just implement `strip` in the HgCommits
layer to be compatible to run the tests.

Reviewed By: DurhamG

Differential Revision: D22402195

fbshipit-source-id: f68d005e04690d8765d5268c698b6c96b981eb0a
This commit is contained in:
Jun Wu 2020-07-17 22:22:03 -07:00 committed by Facebook GitHub Bot
parent c6cac3456a
commit 7b7ae0bd09
6 changed files with 167 additions and 2 deletions

View File

@ -29,11 +29,19 @@ use hgcommits::HgCommits;
use hgcommits::MemHgCommits;
use hgcommits::ReadCommitText;
use hgcommits::RevlogCommits;
use hgcommits::StripCommits;
use std::cell::RefCell;
/// A combination of other traits: commit read/write + DAG algorithms.
pub trait Commits:
ReadCommitText + AppendCommits + DagAlgorithm + IdConvert + PrefixLookup + ToIdSet + ToSet
ReadCommitText
+ StripCommits
+ AppendCommits
+ DagAlgorithm
+ IdConvert
+ PrefixLookup
+ ToIdSet
+ ToSet
{
}
@ -66,6 +74,15 @@ py_class!(pub class commits |py| {
Ok(PyNone)
}
/// Strip commits. ONLY used to make LEGACY TESTS running.
/// Fails if called in a non-test environment.
/// New tests should avoid depending on `strip`.
def strip(&self, set: Names) -> PyResult<PyNone> {
let mut inner = self.inner(py).borrow_mut();
inner.strip_commits(set.0).map_pyerr(py)?;
Ok(PyNone)
}
/// Lookup the raw text of a commit by binary commit hash.
def getcommitrawtext(&self, node: PyBytes) -> PyResult<Option<PyBytes>> {
let vertex = node.data(py).to_vec().into();

View File

@ -5,9 +5,11 @@
* GNU General Public License version 2.
*/
use crate::strip;
use crate::AppendCommits;
use crate::HgCommit;
use crate::ReadCommitText;
use crate::StripCommits;
use anyhow::bail;
use anyhow::ensure;
use anyhow::Result;
@ -29,20 +31,25 @@ use std::collections::HashMap;
use std::collections::HashSet;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use zstore::Id20;
use zstore::Zstore;
/// Commits using the HG SHA1 hash function. Stored on disk.
pub struct HgCommits {
commits: Zstore,
commits_path: PathBuf,
dag: Dag,
dag_path: PathBuf,
}
impl HgCommits {
pub fn new(dag_path: &Path, commits_path: &Path) -> Result<Self> {
let result = Self {
dag: Dag::open(dag_path)?,
dag_path: dag_path.to_path_buf(),
commits: Zstore::open(commits_path)?,
commits_path: commits_path.to_path_buf(),
};
Ok(result)
}
@ -127,6 +134,19 @@ impl ReadCommitText for HgCommits {
}
}
impl StripCommits for HgCommits {
fn strip_commits(&mut self, set: Set) -> Result<()> {
let old_path = &self.dag_path;
let new_path = self.dag_path.join("strip");
let mut new = Self::new(&new_path, &self.commits_path)?;
strip::migrate_commits(self, &mut new, set)?;
drop(new);
strip::racy_unsafe_move_files(&new_path, &self.dag_path)?;
*self = Self::new(&old_path, &self.commits_path)?;
Ok(())
}
}
impl IdConvert for HgCommits {
fn vertex_id(&self, name: Vertex) -> Result<Id> {
self.dag.vertex_id(name)

View File

@ -39,7 +39,9 @@ pub struct HgCommit {
mod hgsha1commits;
mod memhgcommits;
mod revlog;
mod strip;
pub use hgsha1commits::HgCommits;
pub use memhgcommits::MemHgCommits;
pub use revlog::RevlogCommits;
pub use strip::StripCommits;

View File

@ -5,9 +5,11 @@
* GNU General Public License version 2.
*/
use crate::strip;
use crate::AppendCommits;
use crate::HgCommit;
use crate::ReadCommitText;
use crate::StripCommits;
use anyhow::bail;
use anyhow::Result;
use dag::ops::DagAddHeads;
@ -87,6 +89,15 @@ impl ReadCommitText for MemHgCommits {
}
}
impl StripCommits for MemHgCommits {
fn strip_commits(&mut self, set: Set) -> Result<()> {
let mut new = Self::new()?;
strip::migrate_commits(self, &mut new, set)?;
*self = new;
Ok(())
}
}
impl IdConvert for MemHgCommits {
fn vertex_id(&self, name: Vertex) -> Result<Id> {
self.dag.vertex_id(name)

View File

@ -5,9 +5,11 @@
* GNU General Public License version 2.
*/
use crate::strip;
use crate::AppendCommits;
use crate::HgCommit;
use crate::ReadCommitText;
use crate::StripCommits;
use anyhow::Result;
use dag::ops::DagAlgorithm;
use dag::ops::IdConvert;
@ -21,11 +23,14 @@ use dag::Set;
use dag::Vertex;
use minibytes::Bytes;
use revlogindex::RevlogIndex;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
/// HG commits stored on disk using the revlog format.
pub struct RevlogCommits {
revlog: RevlogIndex,
dir: PathBuf,
}
impl RevlogCommits {
@ -33,7 +38,10 @@ impl RevlogCommits {
let index_path = dir.join("00changelog.i");
let nodemap_path = dir.join("00changelog.nodemap");
let revlog = RevlogIndex::new(&index_path, &nodemap_path)?;
Ok(Self { revlog })
Ok(Self {
revlog,
dir: dir.to_path_buf(),
})
}
}
@ -65,6 +73,20 @@ impl ReadCommitText for RevlogCommits {
}
}
impl StripCommits for RevlogCommits {
fn strip_commits(&mut self, set: Set) -> Result<()> {
let old_dir = &self.dir;
let new_dir = old_dir.join("strip");
let _ = fs::create_dir(&new_dir);
let mut new = Self::new(&new_dir)?;
strip::migrate_commits(self, &mut new, set)?;
drop(new);
strip::racy_unsafe_move_files(&new_dir, old_dir)?;
*self = Self::new(old_dir)?;
Ok(())
}
}
impl IdConvert for RevlogCommits {
fn vertex_id(&self, name: Vertex) -> Result<Id> {
self.revlog.vertex_id(name)

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use crate::AppendCommits;
use crate::HgCommit;
use crate::ReadCommitText;
use anyhow::bail;
use anyhow::ensure;
use anyhow::Result;
use dag::DagAlgorithm;
use dag::Set;
use dag::Vertex;
use std::fs;
use std::path::Path;
pub trait StripCommits {
/// Strip commits. This is for legacy tests only that wouldn't be used
/// much in production. The callsite should take care of locking or
/// otherwise risk data race and loss.
fn strip_commits(&mut self, set: Set) -> Result<()>;
}
/// Enumerate all commits in `orig`, re-insert them to `new` except for `strip_set::`.
pub(crate) fn migrate_commits(
orig: &(impl ReadCommitText + DagAlgorithm),
new: &mut impl AppendCommits,
strip_set: Set,
) -> Result<()> {
ensure!(
std::env::var_os("TESTTMP").is_some(),
"strip only works in tests"
);
let set = orig.all()? - orig.descendants(strip_set)?;
let heads: Vec<Vertex> = orig
.heads(set.clone())?
.iter_rev()?
.collect::<Result<Vec<_>>>()?;
let commits: Vec<HgCommit> = set
.iter_rev()?
.map(|vertex| -> Result<HgCommit> {
let vertex = vertex?;
let raw_text = match orig.get_commit_raw_text(&vertex)? {
Some(text) => text,
None => bail!("commit {:?} not found", &vertex),
};
let parents = orig.parent_names(vertex.clone())?;
Ok(HgCommit {
vertex,
parents,
raw_text,
})
})
.collect::<Result<Vec<_>>>()?;
new.add_commits(&commits)?;
new.flush(&heads)?;
Ok(())
}
/// Move files and directories in `src_dir` to `dst_dir`.
/// Existing files are moved to `old.${epoch}`.
/// Racy. Should be used in non-production setup.
pub(crate) fn racy_unsafe_move_files(src_dir: &Path, dst_dir: &Path) -> Result<()> {
ensure!(
std::env::var_os("TESTTMP").is_some(),
"racy_unsafe_move_files should only be used in tests"
);
let backup_dir = {
let mut epoch = 0;
loop {
let dir = dst_dir.join(format!("old.{}", epoch));
if dir.exists() {
epoch += 1;
} else {
fs::create_dir(&dir)?;
break dir;
}
}
};
for entry in fs::read_dir(src_dir)? {
let entry = entry?;
let name = entry.file_name();
let src_path = src_dir.join(&name);
let dst_path = dst_dir.join(&name);
let backup_path = backup_dir.join(&name);
fs::rename(&dst_path, backup_path)?;
fs::rename(src_path, &dst_path)?;
}
Ok(())
}