repo: add changelog into Rust repo

Summary: Similarly to D35294917 and D35256084, this makes the Rust repo object hold the changelog (`dag_commits`) object. Notice that unlike its Python version, caching is not done on disk.

Differential Revision: D35360056

fbshipit-source-id: 04e51dc75665dcc52aa28b09909f5a60b12493b4
This commit is contained in:
Saul Gutierrez 2022-04-08 09:37:32 -07:00 committed by Facebook GitHub Bot
parent fdd5e9780c
commit dfd39a2ba9
8 changed files with 90 additions and 86 deletions

View File

@ -2185,6 +2185,7 @@ class localrepository(object):
def invalidatechangelog(self):
"""Invalidates the changelog. Discard pending changes."""
self._rsrepo.invalidatechangelog()
if "changelog" in self._filecache:
del self._filecache["changelog"]
if "changelog" in self.__dict__:
@ -3303,15 +3304,7 @@ def _openchangelog(repo):
if repo.ui.configbool("experimental", "use-rust-changelog") and not any(
set(repo.storerequirements) & set(pythonrequirements)
):
path = repo.svfs.join("")
storereqs = list(repo.storerequirements)
meta = repo.metalog()
# Just trying to get edenapi if it won't be available causes race conditions
if "lazychangelog" in repo.storerequirements:
edenapi = repo.edenapi
else:
edenapi = None
inner = bindings.repo.loadchangelog(path, storereqs, meta, edenapi)
inner = repo._rsrepo.changelog()
return changelog2.changelog(repo, inner, repo.ui.uiconfig())
if "emergencychangelog" in repo.storerequirements:

View File

@ -12,6 +12,7 @@ dag = { path = "../../../../lib/dag" }
futures = { version = "0.3" }
hgcommits = { path = "../../../../lib/hgcommits" }
minibytes = { path = "../../../../lib/minibytes" }
parking_lot = "0.11.2"
pyedenapi = { path = "../pyedenapi" }
pymetalog = { path = "../pymetalog" }
renderdag = { path = "../../../../lib/renderdag" }

View File

@ -5,7 +5,6 @@
* GNU General Public License version 2.
*/
use std::cell::RefCell;
use std::sync::Arc;
use anyhow::format_err;
@ -40,6 +39,7 @@ use hgcommits::HybridCommits;
use hgcommits::MemHgCommits;
use hgcommits::RevlogCommits;
use minibytes::Bytes;
use parking_lot::RwLock;
use pyedenapi::PyClient;
use pymetalog::metalog as PyMetaLog;
use storemodel::ReadRootTreeIds;
@ -50,7 +50,7 @@ use crate::Names;
use crate::Spans;
py_class!(pub class commits |py| {
data inner: RefCell<Box<dyn DagCommits + Send + 'static>>;
data inner: Arc<RwLock<Box<dyn DagCommits + Send + 'static>>>;
/// Add a list of commits (node, [parent], text) in-memory.
def addcommits(&self, commits: Vec<(PyBytes, Vec<PyBytes>, PyBytes)>) -> PyResult<PyNone> {
@ -60,7 +60,7 @@ py_class!(pub class commits |py| {
let raw_text = raw_text.data(py).to_vec().into();
HgCommit { vertex, parents, raw_text }
}).collect();
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
block_on(inner.add_commits(&commits)).map_pyerr(py)?;
Ok(PyNone)
}
@ -73,7 +73,7 @@ py_class!(pub class commits |py| {
let parents = parents.into_iter().map(|p| p.data(py).to_vec().into()).collect();
GraphNode { vertex, parents }
}).collect();
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
block_on(inner.add_graph_nodes(&graph_nodes)).map_pyerr(py)?;
Ok(PyNone)
}
@ -82,7 +82,7 @@ py_class!(pub class commits |py| {
/// `masterheads` is a hint about what parts belong to the "master" group.
def flush(&self, masterheads: Vec<PyBytes>) -> PyResult<PyNone> {
let heads = masterheads.into_iter().map(|h| h.data(py).to_vec().into()).collect::<Vec<_>>();
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
block_on(inner.flush(&heads)).map_pyerr(py)?;
Ok(PyNone)
}
@ -91,7 +91,7 @@ py_class!(pub class commits |py| {
/// For the revlog backend, this also write the commit graph to disk.
/// For the lazy commit hash backend, this also writes the commit hashes.
def flushcommitdata(&self) -> PyResult<PyNone> {
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
block_on(inner.flush_commit_data()).map_pyerr(py)?;
Ok(PyNone)
}
@ -99,7 +99,7 @@ py_class!(pub class commits |py| {
/// Import clone data (inside PyCell) and flush.
def importclonedata(&self, data: PyCell) -> PyResult<PyNone> {
let data: Box<CloneData<Vertex>> = data.take(py).ok_or_else(|| format_err!("Data is not CloneData")).map_pyerr(py)?;
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
block_on(inner.import_clone_data(*data)).map_pyerr(py)?;
Ok(PyNone)
}
@ -110,7 +110,7 @@ py_class!(pub class commits |py| {
let data: Box<CloneData<Vertex>> = data.take(py).ok_or_else(|| format_err!("Data is not CloneData")).map_pyerr(py)?;
let commits = data.flat_segments.vertex_count();
let segments = data.flat_segments.segment_count();
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
block_on(inner.import_pull_data(*data, &heads.0)).map_pyerr(py)?;
Ok((commits, segments))
}
@ -119,7 +119,7 @@ py_class!(pub class commits |py| {
/// Fails if called in a non-test environment.
/// New tests should avoid depending on `strip`.
def strip(&self, set: Names) -> PyResult<PyNone> {
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
block_on(inner.strip_commits(set.0)).map_pyerr(py)?;
Ok(PyNone)
}
@ -127,7 +127,7 @@ py_class!(pub class commits |py| {
/// Lookup the raw text of a commit by binary commit hash.
def getcommitrawtext(&self, node: PyBytes) -> PyResult<Option<PyBytes>> {
let vertex = node.data(py).to_vec().into();
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
let optional_bytes = block_on(inner.get_commit_raw_text(&vertex)).map_pyerr(py)?;
Ok(optional_bytes.map(|bytes| PyBytes::new(py, bytes.as_ref())))
}
@ -136,7 +136,7 @@ py_class!(pub class commits |py| {
def getcommitrawtextlist(&self, nodes: Vec<BytesLike<Vertex>>) -> PyResult<Vec<BytesLike<Bytes>>>
{
let vertexes: Vec<Vertex> = nodes.into_iter().map(|b| b.0).collect();
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
let texts = block_on(inner.get_commit_raw_text_list(&vertexes)).map_pyerr(py)?;
Ok(texts.into_iter().map(BytesLike).collect())
}
@ -146,37 +146,37 @@ py_class!(pub class commits |py| {
// Attempt to use IdMap bound to `set` if possible for performance.
let id_map = match set.0.hints().id_map() {
Some(map) => map,
None => self.inner(py).borrow().id_map_snapshot().map_pyerr(py)?,
None => self.inner(py).read().id_map_snapshot().map_pyerr(py)?,
};
Ok(Spans(block_on(id_map.to_id_set(&set.0)).map_pyerr(py)?))
}
/// Convert IdSet to Set. For compatibility with legacy code only.
def tonodes(&self, set: Spans) -> PyResult<Names> {
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
Ok(Names(inner.to_set(&set.0).map_pyerr(py)?))
}
/// Obtain the read-only dagalgo object that supports various DAG algorithms.
def dagalgo(&self) -> PyResult<dagalgo> {
dagalgo::from_arc_dag(py, self.inner(py).borrow().dag_snapshot().map_pyerr(py)?)
dagalgo::from_arc_dag(py, self.inner(py).read().dag_snapshot().map_pyerr(py)?)
}
/// Obtain the read-only object that can do hex prefix lookup and convert
/// between binary commit hashes and integer Ids.
def idmap(&self) -> PyResult<idmap::idmap> {
idmap::idmap::from_arc_idmap(py, self.inner(py).borrow().id_map_snapshot().map_pyerr(py)?)
idmap::idmap::from_arc_idmap(py, self.inner(py).read().id_map_snapshot().map_pyerr(py)?)
}
/// Name of the backend used for DAG algorithms.
def algorithmbackend(&self) -> PyResult<Str> {
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
Ok(inner.algorithm_backend().to_string().into())
}
/// Describe the backend.
def describebackend(&self) -> PyResult<Str> {
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
Ok(inner.describe_backend().into())
}
@ -184,7 +184,7 @@ py_class!(pub class commits |py| {
def explaininternals(&self, out: PyObject) -> PyResult<PyNone> {
// This function takes a 'out' parameter so it can work with pager
// and output progressively.
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
let mut out = cpython_ext::wrap_pyio(out);
inner.explain_internals(&mut out).map_pyerr(py)?;
Ok(PyNone)
@ -196,7 +196,7 @@ py_class!(pub class commits |py| {
/// Returns missing ids. A valid lazy graph should return an empty list.
/// See document in the dag crate for details.
def checkuniversalids(&self) -> PyResult<Vec<u64>> {
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
let ids = block_on(inner.check_universal_ids()).map_pyerr(py)?;
Ok(ids.into_iter().map(|i| i.0).collect())
}
@ -207,7 +207,7 @@ py_class!(pub class commits |py| {
/// Returns a list of human-readable messages indicating problems.
/// A valid graph should return an empty list.
def checksegments(&self) -> PyResult<Vec<String>> {
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
let problems = block_on(inner.check_segments()).map_pyerr(py)?;
Ok(problems)
}
@ -218,8 +218,8 @@ py_class!(pub class commits |py| {
/// Returns a list of human-readable messages indicating problems.
/// A valid graph should return an empty list.
def checkisomorphicgraph(&self, other: commits, heads: Names) -> PyResult<Vec<String>> {
let inner = self.inner(py).borrow();
let other = other.inner(py).borrow().dag_snapshot().map_pyerr(py)?;
let inner = self.inner(py).read();
let other = other.inner(py).read().dag_snapshot().map_pyerr(py)?;
let heads = heads.0;
let problems = block_on(inner.check_isomorphic_graph(&other, heads)).map_pyerr(py)?;
Ok(problems)
@ -232,7 +232,7 @@ py_class!(pub class commits |py| {
/// of truth).
def updatereferences(&self, metalog: PyMetaLog) -> PyResult<PyNone> {
let meta = metalog.metalog_rwlock(py);
let mut inner = self.inner(py).borrow_mut();
let mut inner = self.inner(py).write();
inner.update_references_to_match_metalog(&meta.read()).map_pyerr(py)?;
Ok(PyNone)
}
@ -350,21 +350,18 @@ py_class!(pub class commits |py| {
impl commits {
/// Create a `commits` Python object from a Rust struct.
pub fn from_commits(py: Python, commits: impl DagCommits + Send + 'static) -> PyResult<Self> {
Self::create_instance(py, RefCell::new(Box::new(commits)))
Self::create_instance(py, Arc::new(RwLock::new(Box::new(commits))))
}
pub(crate) fn to_read_root_tree_nodes(
&self,
py: Python,
) -> Arc<dyn ReadRootTreeIds + Send + Sync> {
let inner = self.inner(py).borrow();
let inner = self.inner(py).read();
inner.to_dyn_read_root_tree_ids()
}
pub fn get_inner<'a>(
&'a self,
py: Python<'a>,
) -> &'a RefCell<Box<dyn DagCommits + Send + 'static>> {
self.inner(py)
pub fn get_inner(&self, py: Python) -> Arc<RwLock<Box<dyn DagCommits + Send + 'static>>> {
self.inner(py).clone()
}
}

View File

@ -45,7 +45,7 @@ fn clone(
let config = config.get_cfg(py);
let client = edenapi.extract_inner(py);
let commits = commits.get_inner(py);
let mut commits = commits.borrow_mut();
let mut commits = commits.write();
let meta = metalog.metalog_rwlock(py);
let mut meta = meta.write();
exchange::clone(&config, client, &mut meta, &mut commits).map_pyerr(py)?;

View File

@ -9,6 +9,7 @@ cpython_ext = { path = "../../../../lib/cpython-ext", default-features = false }
cpython = { version = "0.7", default-features = false }
repo = { path = "../../../../lib/repo" }
util = { path = "../../../../lib/util" }
parking_lot = "0.11.2"
pyconfigparser = { path = "../pyconfigparser" }
pydag = { path = "../pydag" }
pyedenapi = { path = "../pyedenapi" }

View File

@ -9,14 +9,11 @@
extern crate repo as rsrepo;
use std::cell::RefCell;
use cpython::*;
use cpython_ext::error::ResultPyErrExt;
use cpython_ext::ExtractInner;
use cpython_ext::PyNone;
use cpython_ext::PyPath;
use cpython_ext::PyPathBuf;
use parking_lot::RwLock;
use pyconfigparser::config;
use pydag::commits::commits as PyCommits;
use pyedenapi::PyClient as PyEdenApi;
@ -27,24 +24,11 @@ pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let name = [package, "repo"].join(".");
let m = PyModule::new(py, &name)?;
m.add_class::<repo>(py)?;
m.add(
py,
"loadchangelog",
py_fn!(
py,
load_changelog(
dir: &PyPath,
storerequirements: Vec<String>,
metalog: PyMetaLog,
edenapi: Option<PyEdenApi>
)
),
)?;
Ok(m)
}
py_class!(pub class repo |py| {
data inner: RefCell<Repo>;
data inner: RwLock<Repo>;
@staticmethod
def initialize(path: PyPathBuf, config: &config) -> PyResult<PyNone> {
@ -58,40 +42,39 @@ py_class!(pub class repo |py| {
let config = config.get_cfg(py);
let abs_path = util::path::absolute(path.as_path()).map_pyerr(py)?;
let repo = Repo::load_with_config(abs_path, config).map_pyerr(py)?;
Self::create_instance(py, RefCell::new(repo))
Self::create_instance(py, RwLock::new(repo))
}
def metalog(&self) -> PyResult<PyMetaLog> {
let mut repo_ref = self.inner(py).borrow_mut();
let mut repo_ref = self.inner(py).write();
let path = String::from(repo_ref.metalog_path().to_string_lossy());
let log_ref = repo_ref.metalog().map_pyerr(py)?;
PyMetaLog::create_instance(py, log_ref, path)
}
def invalidatemetalog(&self) -> PyResult<PyNone> {
let mut repo_ref = self.inner(py).borrow_mut();
let mut repo_ref = self.inner(py).write();
repo_ref.invalidate_metalog();
Ok(PyNone)
}
def edenapi(&self) -> PyResult<PyEdenApi> {
let mut repo_ref = self.inner(py).borrow_mut();
let mut repo_ref = self.inner(py).write();
let edenapi_ref = repo_ref.eden_api().map_pyerr(py)?;
PyEdenApi::create_instance(py, edenapi_ref)
}
});
fn load_changelog(
py: Python,
dir: &PyPath,
storerequirements: Vec<String>,
metalog: PyMetaLog,
edenapi: Option<PyEdenApi>,
) -> PyResult<PyCommits> {
let client = edenapi.map(|e| e.extract_inner(py));
let meta = metalog.metalog_rwlock(py);
let inner = py
.allow_threads(|| rsrepo::open_dag_commits(dir.as_path(), storerequirements, meta, client))
def changelog(&self) -> PyResult<PyCommits> {
let mut repo_ref = self.inner(py).write();
let changelog_ref = py
.allow_threads(|| repo_ref.dag_commits())
.map_pyerr(py)?;
PyCommits::create_instance(py, RefCell::new(inner))
PyCommits::create_instance(py, changelog_ref)
}
def invalidatechangelog(&self) -> PyResult<PyNone> {
let mut repo_ref = self.inner(py).write();
repo_ref.invalidate_dag_commits();
Ok(PyNone)
}
});

View File

@ -5,6 +5,7 @@
* GNU General Public License version 2.
*/
use std::collections::HashSet;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
@ -20,7 +21,7 @@ use hgcommits::RevlogCommits;
use metalog::MetaLog;
use parking_lot::RwLock;
type OpenedCommits = dyn DagCommits + Send + 'static;
static REQUIREMENTS_PATH: &str = "requires";
static HG_COMMITS_PATH: &str = "hgcommits/v1";
static LAZY_HASH_PATH: &str = "lazyhashdir";
@ -39,10 +40,11 @@ static GIT_FILE: &str = "gitdir";
pub fn open_dag_commits(
store_path: &Path,
store_requirements: Vec<String>,
metalog: Arc<RwLock<MetaLog>>,
eden_api: Option<Arc<dyn EdenApi>>,
) -> Result<Box<OpenedCommits>, CommitError> {
eden_api: Arc<dyn EdenApi>,
) -> Result<Box<dyn DagCommits + Send + 'static>, CommitError> {
let store_requirements = get_store_requirements(store_path)
.map_err(|err| CommitError::FileReadError("requirements file", err))?;
if store_requirements.contains(&GIT_STORE_REQUIREMENT.to_string()) {
log_backend(GIT_BACKEND_LOG);
return open_git(store_path, metalog);
@ -57,6 +59,11 @@ pub fn open_dag_commits(
Ok(Box::new(RevlogCommits::new(store_path)?))
}
fn get_store_requirements(store_path: &Path) -> Result<HashSet<String>, std::io::Error> {
let store_requirements = fs::read_to_string(store_path.join(REQUIREMENTS_PATH))?;
Ok(store_requirements.split('\n').map(String::from).collect())
}
fn log_backend(backend: &str) {
tracing::info!(target: "changelog_info", changelog_backend=AsRef::<str>::as_ref(&backend));
}
@ -64,7 +71,7 @@ fn log_backend(backend: &str) {
fn open_git(
store_path: &Path,
metalog: Arc<RwLock<MetaLog>>,
) -> Result<Box<OpenedCommits>, CommitError> {
) -> Result<Box<dyn DagCommits + Send + 'static>, CommitError> {
let git_path =
calculate_git_path(store_path).map_err(|err| CommitError::FileReadError("gitdir", err))?;
let segments_path = calculate_segments_path(store_path);
@ -73,7 +80,7 @@ fn open_git(
Ok(Box::new(git_segmented_commits))
}
fn open_double(store_path: &Path) -> Result<Box<OpenedCommits>, CommitError> {
fn open_double(store_path: &Path) -> Result<Box<dyn DagCommits + Send + 'static>, CommitError> {
let segments_path = calculate_segments_path(store_path);
let hg_commits_path = store_path.join(HG_COMMITS_PATH);
let double_commits = DoubleWriteCommits::new(
@ -86,9 +93,8 @@ fn open_double(store_path: &Path) -> Result<Box<OpenedCommits>, CommitError> {
fn open_hybrid(
store_path: &Path,
eden_api: Option<Arc<dyn EdenApi>>,
) -> Result<Box<OpenedCommits>, CommitError> {
let eden_api = eden_api.ok_or(CommitError::OpenRequirements("eden_api"))?;
eden_api: Arc<dyn EdenApi>,
) -> Result<Box<dyn DagCommits + Send + 'static>, CommitError> {
let segments_path = calculate_segments_path(store_path);
let hg_commits_path = store_path.join(HG_COMMITS_PATH);
let lazy_hash_path = get_path_from_file(store_path, LAZY_HASH_PATH);

View File

@ -15,9 +15,11 @@ use configparser::config::ConfigSet;
use edenapi::Builder;
use edenapi::EdenApi;
use edenapi::EdenApiError;
use hgcommits::DagCommits;
use metalog::MetaLog;
use parking_lot::RwLock;
use crate::commits::open_dag_commits;
use crate::errors;
use crate::init;
@ -32,6 +34,7 @@ pub struct Repo {
repo_name: Option<String>,
metalog: Option<Arc<RwLock<MetaLog>>>,
eden_api: Option<Arc<dyn EdenApi>>,
dag_commits: Option<Arc<RwLock<Box<dyn DagCommits + Send + 'static>>>>,
}
/// Either an optional [`Repo`] which owns a [`ConfigSet`], or a [`ConfigSet`]
@ -158,6 +161,7 @@ impl Repo {
});
let metalog = None;
let eden_api = None;
let dag_commits = None;
Ok(Repo {
path,
@ -170,6 +174,7 @@ impl Repo {
repo_name,
metalog,
eden_api,
dag_commits,
})
}
@ -243,6 +248,24 @@ impl Repo {
}
}
}
pub fn dag_commits(&mut self) -> Result<Arc<RwLock<Box<dyn DagCommits + Send + 'static>>>> {
match &self.dag_commits {
Some(commits) => Ok(commits.clone()),
None => {
let metalog = self.metalog()?;
let eden_api = self.eden_api()?;
let commits = open_dag_commits(&self.store_path, metalog, eden_api)?;
let commits = Arc::new(RwLock::new(commits));
self.dag_commits = Some(commits.clone());
Ok(commits)
}
}
}
pub fn invalidate_dag_commits(&mut self) {
self.dag_commits = None;
}
}
fn find_hg_repo_root(current_path: &Path) -> Option<PathBuf> {