From dfd39a2ba967a978c1868509be89c20224e91be9 Mon Sep 17 00:00:00 2001 From: Saul Gutierrez Date: Fri, 8 Apr 2022 09:37:32 -0700 Subject: [PATCH] repo: add changelog into Rust repo Summary: Similarly to D35294917 and D35256084, this makes the Rust repo object hold the changelog (`dag_commits`) object. Notice that unlike its Python version, caching is not done on disk. Differential Revision: D35360056 fbshipit-source-id: 04e51dc75665dcc52aa28b09909f5a60b12493b4 --- eden/scm/edenscm/mercurial/localrepo.py | 11 +--- .../bindings/modules/pydag/Cargo.toml | 1 + .../bindings/modules/pydag/src/commits.rs | 57 +++++++++---------- .../bindings/modules/pyexchange/src/lib.rs | 2 +- .../bindings/modules/pyrepo/Cargo.toml | 1 + .../bindings/modules/pyrepo/src/lib.rs | 57 +++++++------------ eden/scm/lib/repo/src/commits.rs | 24 +++++--- eden/scm/lib/repo/src/repo.rs | 23 ++++++++ 8 files changed, 90 insertions(+), 86 deletions(-) diff --git a/eden/scm/edenscm/mercurial/localrepo.py b/eden/scm/edenscm/mercurial/localrepo.py index fae3597077..cdb3cce328 100644 --- a/eden/scm/edenscm/mercurial/localrepo.py +++ b/eden/scm/edenscm/mercurial/localrepo.py @@ -2185,6 +2185,7 @@ class localrepository(object): def invalidatechangelog(self): """Invalidates the changelog. Discard pending changes.""" + self._rsrepo.invalidatechangelog() if "changelog" in self._filecache: del self._filecache["changelog"] if "changelog" in self.__dict__: @@ -3303,15 +3304,7 @@ def _openchangelog(repo): if repo.ui.configbool("experimental", "use-rust-changelog") and not any( set(repo.storerequirements) & set(pythonrequirements) ): - path = repo.svfs.join("") - storereqs = list(repo.storerequirements) - meta = repo.metalog() - # Just trying to get edenapi if it won't be available causes race conditions - if "lazychangelog" in repo.storerequirements: - edenapi = repo.edenapi - else: - edenapi = None - inner = bindings.repo.loadchangelog(path, storereqs, meta, edenapi) + inner = repo._rsrepo.changelog() return changelog2.changelog(repo, inner, repo.ui.uiconfig()) if "emergencychangelog" in repo.storerequirements: diff --git a/eden/scm/edenscmnative/bindings/modules/pydag/Cargo.toml b/eden/scm/edenscmnative/bindings/modules/pydag/Cargo.toml index dfc87415a8..856fa0c2a1 100644 --- a/eden/scm/edenscmnative/bindings/modules/pydag/Cargo.toml +++ b/eden/scm/edenscmnative/bindings/modules/pydag/Cargo.toml @@ -12,6 +12,7 @@ dag = { path = "../../../../lib/dag" } futures = { version = "0.3" } hgcommits = { path = "../../../../lib/hgcommits" } minibytes = { path = "../../../../lib/minibytes" } +parking_lot = "0.11.2" pyedenapi = { path = "../pyedenapi" } pymetalog = { path = "../pymetalog" } renderdag = { path = "../../../../lib/renderdag" } diff --git a/eden/scm/edenscmnative/bindings/modules/pydag/src/commits.rs b/eden/scm/edenscmnative/bindings/modules/pydag/src/commits.rs index 93f45f107c..71ea5fbd35 100644 --- a/eden/scm/edenscmnative/bindings/modules/pydag/src/commits.rs +++ b/eden/scm/edenscmnative/bindings/modules/pydag/src/commits.rs @@ -5,7 +5,6 @@ * GNU General Public License version 2. */ -use std::cell::RefCell; use std::sync::Arc; use anyhow::format_err; @@ -40,6 +39,7 @@ use hgcommits::HybridCommits; use hgcommits::MemHgCommits; use hgcommits::RevlogCommits; use minibytes::Bytes; +use parking_lot::RwLock; use pyedenapi::PyClient; use pymetalog::metalog as PyMetaLog; use storemodel::ReadRootTreeIds; @@ -50,7 +50,7 @@ use crate::Names; use crate::Spans; py_class!(pub class commits |py| { - data inner: RefCell>; + data inner: Arc>>; /// Add a list of commits (node, [parent], text) in-memory. def addcommits(&self, commits: Vec<(PyBytes, Vec, PyBytes)>) -> PyResult { @@ -60,7 +60,7 @@ py_class!(pub class commits |py| { let raw_text = raw_text.data(py).to_vec().into(); HgCommit { vertex, parents, raw_text } }).collect(); - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); block_on(inner.add_commits(&commits)).map_pyerr(py)?; Ok(PyNone) } @@ -73,7 +73,7 @@ py_class!(pub class commits |py| { let parents = parents.into_iter().map(|p| p.data(py).to_vec().into()).collect(); GraphNode { vertex, parents } }).collect(); - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); block_on(inner.add_graph_nodes(&graph_nodes)).map_pyerr(py)?; Ok(PyNone) } @@ -82,7 +82,7 @@ py_class!(pub class commits |py| { /// `masterheads` is a hint about what parts belong to the "master" group. def flush(&self, masterheads: Vec) -> PyResult { let heads = masterheads.into_iter().map(|h| h.data(py).to_vec().into()).collect::>(); - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); block_on(inner.flush(&heads)).map_pyerr(py)?; Ok(PyNone) } @@ -91,7 +91,7 @@ py_class!(pub class commits |py| { /// For the revlog backend, this also write the commit graph to disk. /// For the lazy commit hash backend, this also writes the commit hashes. def flushcommitdata(&self) -> PyResult { - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); block_on(inner.flush_commit_data()).map_pyerr(py)?; Ok(PyNone) } @@ -99,7 +99,7 @@ py_class!(pub class commits |py| { /// Import clone data (inside PyCell) and flush. def importclonedata(&self, data: PyCell) -> PyResult { let data: Box> = data.take(py).ok_or_else(|| format_err!("Data is not CloneData")).map_pyerr(py)?; - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); block_on(inner.import_clone_data(*data)).map_pyerr(py)?; Ok(PyNone) } @@ -110,7 +110,7 @@ py_class!(pub class commits |py| { let data: Box> = data.take(py).ok_or_else(|| format_err!("Data is not CloneData")).map_pyerr(py)?; let commits = data.flat_segments.vertex_count(); let segments = data.flat_segments.segment_count(); - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); block_on(inner.import_pull_data(*data, &heads.0)).map_pyerr(py)?; Ok((commits, segments)) } @@ -119,7 +119,7 @@ py_class!(pub class commits |py| { /// Fails if called in a non-test environment. /// New tests should avoid depending on `strip`. def strip(&self, set: Names) -> PyResult { - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); block_on(inner.strip_commits(set.0)).map_pyerr(py)?; Ok(PyNone) } @@ -127,7 +127,7 @@ py_class!(pub class commits |py| { /// Lookup the raw text of a commit by binary commit hash. def getcommitrawtext(&self, node: PyBytes) -> PyResult> { let vertex = node.data(py).to_vec().into(); - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); let optional_bytes = block_on(inner.get_commit_raw_text(&vertex)).map_pyerr(py)?; Ok(optional_bytes.map(|bytes| PyBytes::new(py, bytes.as_ref()))) } @@ -136,7 +136,7 @@ py_class!(pub class commits |py| { def getcommitrawtextlist(&self, nodes: Vec>) -> PyResult>> { let vertexes: Vec = nodes.into_iter().map(|b| b.0).collect(); - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); let texts = block_on(inner.get_commit_raw_text_list(&vertexes)).map_pyerr(py)?; Ok(texts.into_iter().map(BytesLike).collect()) } @@ -146,37 +146,37 @@ py_class!(pub class commits |py| { // Attempt to use IdMap bound to `set` if possible for performance. let id_map = match set.0.hints().id_map() { Some(map) => map, - None => self.inner(py).borrow().id_map_snapshot().map_pyerr(py)?, + None => self.inner(py).read().id_map_snapshot().map_pyerr(py)?, }; Ok(Spans(block_on(id_map.to_id_set(&set.0)).map_pyerr(py)?)) } /// Convert IdSet to Set. For compatibility with legacy code only. def tonodes(&self, set: Spans) -> PyResult { - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); Ok(Names(inner.to_set(&set.0).map_pyerr(py)?)) } /// Obtain the read-only dagalgo object that supports various DAG algorithms. def dagalgo(&self) -> PyResult { - dagalgo::from_arc_dag(py, self.inner(py).borrow().dag_snapshot().map_pyerr(py)?) + dagalgo::from_arc_dag(py, self.inner(py).read().dag_snapshot().map_pyerr(py)?) } /// Obtain the read-only object that can do hex prefix lookup and convert /// between binary commit hashes and integer Ids. def idmap(&self) -> PyResult { - idmap::idmap::from_arc_idmap(py, self.inner(py).borrow().id_map_snapshot().map_pyerr(py)?) + idmap::idmap::from_arc_idmap(py, self.inner(py).read().id_map_snapshot().map_pyerr(py)?) } /// Name of the backend used for DAG algorithms. def algorithmbackend(&self) -> PyResult { - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); Ok(inner.algorithm_backend().to_string().into()) } /// Describe the backend. def describebackend(&self) -> PyResult { - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); Ok(inner.describe_backend().into()) } @@ -184,7 +184,7 @@ py_class!(pub class commits |py| { def explaininternals(&self, out: PyObject) -> PyResult { // This function takes a 'out' parameter so it can work with pager // and output progressively. - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); let mut out = cpython_ext::wrap_pyio(out); inner.explain_internals(&mut out).map_pyerr(py)?; Ok(PyNone) @@ -196,7 +196,7 @@ py_class!(pub class commits |py| { /// Returns missing ids. A valid lazy graph should return an empty list. /// See document in the dag crate for details. def checkuniversalids(&self) -> PyResult> { - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); let ids = block_on(inner.check_universal_ids()).map_pyerr(py)?; Ok(ids.into_iter().map(|i| i.0).collect()) } @@ -207,7 +207,7 @@ py_class!(pub class commits |py| { /// Returns a list of human-readable messages indicating problems. /// A valid graph should return an empty list. def checksegments(&self) -> PyResult> { - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); let problems = block_on(inner.check_segments()).map_pyerr(py)?; Ok(problems) } @@ -218,8 +218,8 @@ py_class!(pub class commits |py| { /// Returns a list of human-readable messages indicating problems. /// A valid graph should return an empty list. def checkisomorphicgraph(&self, other: commits, heads: Names) -> PyResult> { - let inner = self.inner(py).borrow(); - let other = other.inner(py).borrow().dag_snapshot().map_pyerr(py)?; + let inner = self.inner(py).read(); + let other = other.inner(py).read().dag_snapshot().map_pyerr(py)?; let heads = heads.0; let problems = block_on(inner.check_isomorphic_graph(&other, heads)).map_pyerr(py)?; Ok(problems) @@ -232,7 +232,7 @@ py_class!(pub class commits |py| { /// of truth). def updatereferences(&self, metalog: PyMetaLog) -> PyResult { let meta = metalog.metalog_rwlock(py); - let mut inner = self.inner(py).borrow_mut(); + let mut inner = self.inner(py).write(); inner.update_references_to_match_metalog(&meta.read()).map_pyerr(py)?; Ok(PyNone) } @@ -350,21 +350,18 @@ py_class!(pub class commits |py| { impl commits { /// Create a `commits` Python object from a Rust struct. pub fn from_commits(py: Python, commits: impl DagCommits + Send + 'static) -> PyResult { - Self::create_instance(py, RefCell::new(Box::new(commits))) + Self::create_instance(py, Arc::new(RwLock::new(Box::new(commits)))) } pub(crate) fn to_read_root_tree_nodes( &self, py: Python, ) -> Arc { - let inner = self.inner(py).borrow(); + let inner = self.inner(py).read(); inner.to_dyn_read_root_tree_ids() } - pub fn get_inner<'a>( - &'a self, - py: Python<'a>, - ) -> &'a RefCell> { - self.inner(py) + pub fn get_inner(&self, py: Python) -> Arc>> { + self.inner(py).clone() } } diff --git a/eden/scm/edenscmnative/bindings/modules/pyexchange/src/lib.rs b/eden/scm/edenscmnative/bindings/modules/pyexchange/src/lib.rs index f7b1847aed..b9cd93ced2 100644 --- a/eden/scm/edenscmnative/bindings/modules/pyexchange/src/lib.rs +++ b/eden/scm/edenscmnative/bindings/modules/pyexchange/src/lib.rs @@ -45,7 +45,7 @@ fn clone( let config = config.get_cfg(py); let client = edenapi.extract_inner(py); let commits = commits.get_inner(py); - let mut commits = commits.borrow_mut(); + let mut commits = commits.write(); let meta = metalog.metalog_rwlock(py); let mut meta = meta.write(); exchange::clone(&config, client, &mut meta, &mut commits).map_pyerr(py)?; diff --git a/eden/scm/edenscmnative/bindings/modules/pyrepo/Cargo.toml b/eden/scm/edenscmnative/bindings/modules/pyrepo/Cargo.toml index 95eb1bd98e..b31e15bb67 100644 --- a/eden/scm/edenscmnative/bindings/modules/pyrepo/Cargo.toml +++ b/eden/scm/edenscmnative/bindings/modules/pyrepo/Cargo.toml @@ -9,6 +9,7 @@ cpython_ext = { path = "../../../../lib/cpython-ext", default-features = false } cpython = { version = "0.7", default-features = false } repo = { path = "../../../../lib/repo" } util = { path = "../../../../lib/util" } +parking_lot = "0.11.2" pyconfigparser = { path = "../pyconfigparser" } pydag = { path = "../pydag" } pyedenapi = { path = "../pyedenapi" } diff --git a/eden/scm/edenscmnative/bindings/modules/pyrepo/src/lib.rs b/eden/scm/edenscmnative/bindings/modules/pyrepo/src/lib.rs index e65cf1ab29..984b209f14 100644 --- a/eden/scm/edenscmnative/bindings/modules/pyrepo/src/lib.rs +++ b/eden/scm/edenscmnative/bindings/modules/pyrepo/src/lib.rs @@ -9,14 +9,11 @@ extern crate repo as rsrepo; -use std::cell::RefCell; - use cpython::*; use cpython_ext::error::ResultPyErrExt; -use cpython_ext::ExtractInner; use cpython_ext::PyNone; -use cpython_ext::PyPath; use cpython_ext::PyPathBuf; +use parking_lot::RwLock; use pyconfigparser::config; use pydag::commits::commits as PyCommits; use pyedenapi::PyClient as PyEdenApi; @@ -27,24 +24,11 @@ pub fn init_module(py: Python, package: &str) -> PyResult { let name = [package, "repo"].join("."); let m = PyModule::new(py, &name)?; m.add_class::(py)?; - m.add( - py, - "loadchangelog", - py_fn!( - py, - load_changelog( - dir: &PyPath, - storerequirements: Vec, - metalog: PyMetaLog, - edenapi: Option - ) - ), - )?; Ok(m) } py_class!(pub class repo |py| { - data inner: RefCell; + data inner: RwLock; @staticmethod def initialize(path: PyPathBuf, config: &config) -> PyResult { @@ -58,40 +42,39 @@ py_class!(pub class repo |py| { let config = config.get_cfg(py); let abs_path = util::path::absolute(path.as_path()).map_pyerr(py)?; let repo = Repo::load_with_config(abs_path, config).map_pyerr(py)?; - Self::create_instance(py, RefCell::new(repo)) + Self::create_instance(py, RwLock::new(repo)) } def metalog(&self) -> PyResult { - let mut repo_ref = self.inner(py).borrow_mut(); + let mut repo_ref = self.inner(py).write(); let path = String::from(repo_ref.metalog_path().to_string_lossy()); let log_ref = repo_ref.metalog().map_pyerr(py)?; PyMetaLog::create_instance(py, log_ref, path) } def invalidatemetalog(&self) -> PyResult { - let mut repo_ref = self.inner(py).borrow_mut(); + let mut repo_ref = self.inner(py).write(); repo_ref.invalidate_metalog(); Ok(PyNone) } def edenapi(&self) -> PyResult { - let mut repo_ref = self.inner(py).borrow_mut(); + let mut repo_ref = self.inner(py).write(); let edenapi_ref = repo_ref.eden_api().map_pyerr(py)?; PyEdenApi::create_instance(py, edenapi_ref) } -}); -fn load_changelog( - py: Python, - dir: &PyPath, - storerequirements: Vec, - metalog: PyMetaLog, - edenapi: Option, -) -> PyResult { - let client = edenapi.map(|e| e.extract_inner(py)); - let meta = metalog.metalog_rwlock(py); - let inner = py - .allow_threads(|| rsrepo::open_dag_commits(dir.as_path(), storerequirements, meta, client)) - .map_pyerr(py)?; - PyCommits::create_instance(py, RefCell::new(inner)) -} + def changelog(&self) -> PyResult { + let mut repo_ref = self.inner(py).write(); + let changelog_ref = py + .allow_threads(|| repo_ref.dag_commits()) + .map_pyerr(py)?; + PyCommits::create_instance(py, changelog_ref) + } + + def invalidatechangelog(&self) -> PyResult { + let mut repo_ref = self.inner(py).write(); + repo_ref.invalidate_dag_commits(); + Ok(PyNone) + } +}); diff --git a/eden/scm/lib/repo/src/commits.rs b/eden/scm/lib/repo/src/commits.rs index d81b032fd1..62bd4e5e54 100644 --- a/eden/scm/lib/repo/src/commits.rs +++ b/eden/scm/lib/repo/src/commits.rs @@ -5,6 +5,7 @@ * GNU General Public License version 2. */ +use std::collections::HashSet; use std::fs; use std::path::Path; use std::path::PathBuf; @@ -20,7 +21,7 @@ use hgcommits::RevlogCommits; use metalog::MetaLog; use parking_lot::RwLock; -type OpenedCommits = dyn DagCommits + Send + 'static; +static REQUIREMENTS_PATH: &str = "requires"; static HG_COMMITS_PATH: &str = "hgcommits/v1"; static LAZY_HASH_PATH: &str = "lazyhashdir"; @@ -39,10 +40,11 @@ static GIT_FILE: &str = "gitdir"; pub fn open_dag_commits( store_path: &Path, - store_requirements: Vec, metalog: Arc>, - eden_api: Option>, -) -> Result, CommitError> { + eden_api: Arc, +) -> Result, CommitError> { + let store_requirements = get_store_requirements(store_path) + .map_err(|err| CommitError::FileReadError("requirements file", err))?; if store_requirements.contains(&GIT_STORE_REQUIREMENT.to_string()) { log_backend(GIT_BACKEND_LOG); return open_git(store_path, metalog); @@ -57,6 +59,11 @@ pub fn open_dag_commits( Ok(Box::new(RevlogCommits::new(store_path)?)) } +fn get_store_requirements(store_path: &Path) -> Result, std::io::Error> { + let store_requirements = fs::read_to_string(store_path.join(REQUIREMENTS_PATH))?; + Ok(store_requirements.split('\n').map(String::from).collect()) +} + fn log_backend(backend: &str) { tracing::info!(target: "changelog_info", changelog_backend=AsRef::::as_ref(&backend)); } @@ -64,7 +71,7 @@ fn log_backend(backend: &str) { fn open_git( store_path: &Path, metalog: Arc>, -) -> Result, CommitError> { +) -> Result, CommitError> { let git_path = calculate_git_path(store_path).map_err(|err| CommitError::FileReadError("gitdir", err))?; let segments_path = calculate_segments_path(store_path); @@ -73,7 +80,7 @@ fn open_git( Ok(Box::new(git_segmented_commits)) } -fn open_double(store_path: &Path) -> Result, CommitError> { +fn open_double(store_path: &Path) -> Result, CommitError> { let segments_path = calculate_segments_path(store_path); let hg_commits_path = store_path.join(HG_COMMITS_PATH); let double_commits = DoubleWriteCommits::new( @@ -86,9 +93,8 @@ fn open_double(store_path: &Path) -> Result, CommitError> { fn open_hybrid( store_path: &Path, - eden_api: Option>, -) -> Result, CommitError> { - let eden_api = eden_api.ok_or(CommitError::OpenRequirements("eden_api"))?; + eden_api: Arc, +) -> Result, CommitError> { let segments_path = calculate_segments_path(store_path); let hg_commits_path = store_path.join(HG_COMMITS_PATH); let lazy_hash_path = get_path_from_file(store_path, LAZY_HASH_PATH); diff --git a/eden/scm/lib/repo/src/repo.rs b/eden/scm/lib/repo/src/repo.rs index 1920321f70..ede0ca541a 100644 --- a/eden/scm/lib/repo/src/repo.rs +++ b/eden/scm/lib/repo/src/repo.rs @@ -15,9 +15,11 @@ use configparser::config::ConfigSet; use edenapi::Builder; use edenapi::EdenApi; use edenapi::EdenApiError; +use hgcommits::DagCommits; use metalog::MetaLog; use parking_lot::RwLock; +use crate::commits::open_dag_commits; use crate::errors; use crate::init; @@ -32,6 +34,7 @@ pub struct Repo { repo_name: Option, metalog: Option>>, eden_api: Option>, + dag_commits: Option>>>, } /// Either an optional [`Repo`] which owns a [`ConfigSet`], or a [`ConfigSet`] @@ -158,6 +161,7 @@ impl Repo { }); let metalog = None; let eden_api = None; + let dag_commits = None; Ok(Repo { path, @@ -170,6 +174,7 @@ impl Repo { repo_name, metalog, eden_api, + dag_commits, }) } @@ -243,6 +248,24 @@ impl Repo { } } } + + pub fn dag_commits(&mut self) -> Result>>> { + match &self.dag_commits { + Some(commits) => Ok(commits.clone()), + None => { + let metalog = self.metalog()?; + let eden_api = self.eden_api()?; + let commits = open_dag_commits(&self.store_path, metalog, eden_api)?; + let commits = Arc::new(RwLock::new(commits)); + self.dag_commits = Some(commits.clone()); + Ok(commits) + } + } + } + + pub fn invalidate_dag_commits(&mut self) { + self.dag_commits = None; + } } fn find_hg_repo_root(current_path: &Path) -> Option {