From d2fbb0164eb87fd9d6c4ff8bec93b09053925e01 Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Tue, 17 Sep 2019 18:12:07 -0700 Subject: [PATCH] bindings: add revlogindex Summary: This module is inteneded to have native paths for some operations that need to scan the whole changelog. It allows us to experiment some breaking changes, namely, head-based visibility without "filtered revs", head-based phases on the revlog format, before the more advanced structure taking over. This diff adds a revlog index reader that can answer do simple queries like "length", "parents". Reviewed By: sfilipco Differential Revision: D17199837 fbshipit-source-id: 2574f64c980419fa966200fd52fa5ddf873baae4 --- edenscmnative/bindings/Cargo.toml | 1 + .../bindings/modules/pyrevlogindex/Cargo.toml | 8 ++ .../bindings/modules/pyrevlogindex/src/lib.rs | 125 ++++++++++++++++++ edenscmnative/bindings/src/lib.rs | 1 + lib/dag/src/spanset.rs | 2 +- 5 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 edenscmnative/bindings/modules/pyrevlogindex/Cargo.toml create mode 100644 edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs diff --git a/edenscmnative/bindings/Cargo.toml b/edenscmnative/bindings/Cargo.toml index 6d630fb2da..7a07f19c48 100644 --- a/edenscmnative/bindings/Cargo.toml +++ b/edenscmnative/bindings/Cargo.toml @@ -28,6 +28,7 @@ pymutationstore = { path = "modules/pymutationstore" } pynodemap = { path = "modules/pynodemap" } pypathmatcher = { path = "modules/pypathmatcher" } pyrevisionstore = { path = "modules/pyrevisionstore" } +pyrevlogindex = { path = "modules/pyrevlogindex" } pystackdesc = { path = "modules/pystackdesc" } pytreestate = { path = "modules/pytreestate" } pyvlq = { path = "modules/pyvlq" } diff --git a/edenscmnative/bindings/modules/pyrevlogindex/Cargo.toml b/edenscmnative/bindings/modules/pyrevlogindex/Cargo.toml new file mode 100644 index 0000000000..1d912730e2 --- /dev/null +++ b/edenscmnative/bindings/modules/pyrevlogindex/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "pyrevlogindex" +version = "0.1.0" +edition = "2018" + +[dependencies] +cpython = { version = "0.3", features = ["python27-sys"], default-features = false } +cpython-ext = { path = "../../../../lib/cpython-ext" } diff --git a/edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs b/edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs new file mode 100644 index 0000000000..49d012c98b --- /dev/null +++ b/edenscmnative/bindings/modules/pyrevlogindex/src/lib.rs @@ -0,0 +1,125 @@ +// Copyright 2019 Facebook, Inc. +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +#![allow(non_camel_case_types)] + +use cpython::*; +use cpython_ext::SimplePyBuf; +use std::cell::RefCell; + +// XXX: The revlogindex is a temporary solution before migrating to +// segmented changelog. It is here to experiment breaking changes with +// revlog, incluing: +// +// - Redefine "head()" to only return remotenames and tracked draft heads. +// - Get rid of "filtered revs" and "repo view" layer entirely. +// - Switch phases to be defined by heads (remotenames), instead of roots. + +pub fn init_module(py: Python, package: &str) -> PyResult { + let name = [package, "revlogindex"].join("."); + let m = PyModule::new(py, &name)?; + m.add_class::(py)?; + Ok(m) +} + +py_class!(class revlogindex |py| { + data changelogi: RevlogIndex; + + def __new__(_cls, changelogi: &PyObject) -> PyResult { + let changelogi = RevlogIndex { + data: SimplePyBuf::new(py, changelogi), + inserted: RefCell::new(Vec::new()), + }; + Self::create_instance(py, changelogi) + } + + /// Get parent revisions. + def parentrevs(&self, rev: u32) -> PyResult> { + let revlog = self.changelogi(py); + Ok(revlog.parents(rev)) + } + + /// Insert a new revision that hasn't been written to disk. + /// Used by revlog._addrevision. + def insert(&self, parents: Vec) -> PyResult { + let revlog = self.changelogi(py); + revlog.insert(parents); + Ok(py.None()) + } + + def __len__(&self) -> PyResult { + let revlog = self.changelogi(py); + Ok(revlog.len()) + } +}); + +/// Minimal code to read the DAG (i.e. parents) stored in non-inlined revlog. +struct RevlogIndex { + // Content of revlog-name.i (ex. 00changelog.i). + data: SimplePyBuf, + + // Inserted entries that are not flushed to disk. + inserted: RefCell>>, +} + +/// Revlog entry. See "# index ng" in revlog.py. +#[allow(dead_code)] +#[repr(packed)] +#[derive(Copy, Clone)] +struct RevlogEntry { + offset_flags: u64, + compressed: i32, + len: i32, + base: i32, + link: i32, + p1: i32, + p2: i32, + node: [u8; 32], +} + +impl RevlogIndex { + /// Revisions in total. + fn len(&self) -> usize { + let inserted = self.inserted.borrow(); + self.data_len() + inserted.len() + } + + /// Revisions stored in the original revlog index. + fn data_len(&self) -> usize { + self.data.as_ref().len() + } + + /// Get parent revisions. + fn parents(&self, rev: u32) -> Vec { + let data_len = self.data_len(); + if rev >= data_len as u32 { + let inserted = self.inserted.borrow(); + return inserted[rev as usize - data_len].clone(); + } + + let data = self.data.as_ref(); + let p1 = i32::from_be(data[rev as usize].p1); + let p2 = i32::from_be(data[rev as usize].p2); + if p1 == -1 { + // p1 == -1 but p2 != -1 is illegal for changelog (but possible + // for filelog with copy information). + assert!(p2 == -1); + Vec::new() + } else if p2 == -1 { + assert!((p1 as u32) < rev); + vec![p1 as u32] + } else { + assert!((p1 as u32) < rev); + assert!((p2 as u32) < rev); + vec![p1 as u32, p2 as u32] + } + } + + /// Insert a new revision with given parents at the end. + fn insert(&self, parents: Vec) { + let mut inserted = self.inserted.borrow_mut(); + inserted.push(parents); + } +} diff --git a/edenscmnative/bindings/src/lib.rs b/edenscmnative/bindings/src/lib.rs index d2c34494bc..4cec3bff83 100644 --- a/edenscmnative/bindings/src/lib.rs +++ b/edenscmnative/bindings/src/lib.rs @@ -37,6 +37,7 @@ py_module_initializer!(bindings, initbindings, PyInit_bindings, |py, m| { "revisionstore", pyrevisionstore::init_module(py, &name)?, )?; + m.add(py, "revlogindex", pyrevlogindex::init_module(py, &name)?)?; m.add(py, "stackdesc", pystackdesc::init_module(py, &name)?)?; m.add(py, "treestate", pytreestate::init_module(py, &name)?)?; m.add(py, "vlq", pyvlq::init_module(py, &name)?)?; diff --git a/lib/dag/src/spanset.rs b/lib/dag/src/spanset.rs index a54c154f43..2fa9ab3caf 100644 --- a/lib/dag/src/spanset.rs +++ b/lib/dag/src/spanset.rs @@ -15,7 +15,7 @@ use std::collections::BinaryHeap; use std::fmt::{self, Debug}; use std::ops::{Bound, RangeBounds, RangeInclusive}; -type Id = u64; +pub type Id = u64; /// Range `low..=high`. `low` must be <= `high`. #[derive(Copy, Clone, Debug, Eq)]