diff --git a/mercurial-types/src/lib.rs b/mercurial-types/src/lib.rs index 6502d7a9cd..5b964d23e3 100644 --- a/mercurial-types/src/lib.rs +++ b/mercurial-types/src/lib.rs @@ -4,6 +4,44 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. +//! Mercurial Types +//! +//! This crate contains useful definitions for types that occur in Mercurial. Or more generally, +//! in a source control system that is based on Mercurial and extensions. +//! +//! The top-most level is the Repo, which is a container for changesets. +//! +//! A changeset represents a snapshot of a file tree at a specific moment in time. Changesets +//! can (and commonly do) have parent-child relationships with other changesets; if once changeset +//! is the child of another one, then it is interpreted as an incremental change in the history of +//! a single namespace. Changesets can have multiple parents (currently limited to 2), which +//! represents the merging of history. A changeset can have no parents, which represents the +//! creation of a new namespace. There's no requirement that all (or any) changeset within a +//! repo be connected at all via parent-child relationships. +//! +//! Each changeset has a tree of manifests, which represent their namespace. A manifest is +//! equivalent to a directory in a filesystem, mapping names to other objects. Those other +//! objects can be other manifests (subdirectories), files, or symlinks. Manifest objects can +//! be shared by multiple changesets - if the only difference between two changesets is a +//! single file, then all other files and directories will be the same and shared. +//! +//! Changesets, manifests and files are uniformly represented by a `Node`. A `Node` has +//! 0-2 parents and some content. A node's identity is computed by hashing over (p1, p2, content), +//! resulting in `NodeHash` (TODO: rename NodeHash -> NodeId?). This means manifests and files +//! have a notion of history independent of the changeset(s) they're embedded in. +//! +//! Nodes are stored as blobs in the blobstore, but with their content in a separate blob. This +//! is because it's very common for the same file content to appear either under different names +//! (copies) or multiple times within the same history (reverts), or both (rebase, amend, etc). +//! +//! Blobs are the underlying raw storage for all immutable objects in Mononoke. Their primary +//! storage key is a hash (TBD, stronger than SHA1) over their raw bit patterns, but they can +//! have other keys to allow direct access via multiple aliases. For example, file content may be +//! shared by multiple nodes, but can be access directly without having to go via a node. +//! +//! Delta and bdiff are used in revlogs and on the wireprotocol to represent inter-file +//! differences. These are for interfacing at the edges, but are not used within Mononoke's core +//! structures at all. #![deny(warnings)] #![feature(const_fn)] #![feature(never_type)] diff --git a/mercurial-types/src/manifest.rs b/mercurial-types/src/manifest.rs index f7687fb0cd..adf8db28f4 100644 --- a/mercurial-types/src/manifest.rs +++ b/mercurial-types/src/manifest.rs @@ -17,10 +17,30 @@ use nodehash::EntryId; use path::{MPath, RepoPath}; /// Interface for a manifest +/// +/// A `Manifest` represents the mapping between a list of names and `Entry`s - ie, +/// functionally equivalent to a directory. +/// +/// The name "Manifest" comes from Mercurial, where a single object represents the entire repo +/// namespace ("flat manifest"). But modern Mercurial and Mononoke use a distinct Manifest for +/// each directory ("tree manifest"). As a result, operations on a manifest are path element at +/// a time. +/// +/// TODO: (jsgf) T25575327 lookup should just take a single element pub trait Manifest: Send + 'static { + /// Look up a specific entry in the Manifest by name + /// + /// If the name exists, return it as Some(entry). If it doesn't exist, return None. + /// If it returns an error, it indicates something went wrong with the underlying + /// infrastructure. fn lookup(&self, path: &MPath) -> BoxFuture>, Error>; + + /// List all the entries in the Manifest. + /// + /// Entries are returned in canonical order. fn list(&self) -> BoxStream, Error>; + /// Return self as a type-erased boxed trait (still needed as a trait method? T25577105) fn boxed(self) -> Box where Self: Sync + Sized, @@ -73,6 +93,16 @@ impl Manifest for Box { } } +/// Type of an Entry +/// +/// File and Executable are identical - they both represent files containing arbitrary content. +/// The only difference is that the Executables are created with executable permission when +/// checked out. +/// +/// Symlink is also the same as File, but the content of the file is interpolated into a path +/// being traversed during lookup. +/// +/// Tree is a reference to another Manifest (directory-like) object. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize)] pub enum Type { File, @@ -81,6 +111,19 @@ pub enum Type { Executable, } +impl Display for Type { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let s = match self { + &Type::Symlink => "l", + &Type::Executable => "x", + &Type::Tree => "t", + &Type::File => "", + }; + write!(fmt, "{}", s) + } +} + +/// Concrete representation of various Entry Types. pub enum Content { File(Blob>), // TODO stream Executable(Blob>), // TODO stream @@ -88,16 +131,38 @@ pub enum Content { Tree(Box), } +/// An entry represents a single entry in a Manifest +/// +/// The Entry has at least a name, a type, and the identity of the object it refers to + pub trait Entry: Send + 'static { + /// Type of the object this entry refers to fn get_type(&self) -> Type; + + /// Get the parents (in the history graph) of the referred-to object fn get_parents(&self) -> BoxFuture; + + /// Get the raw content of the object as it exists in the blobstore, + /// without any interpretation. This is only really useful for doing a bit-level duplication. fn get_raw_content(&self) -> BoxFuture>, Error>; + + /// Get the interpreted content of the object. This will likely require IO fn get_content(&self) -> BoxFuture; + + /// Get the logical size of the entry. Some entries don't really have a meaningful size. fn get_size(&self) -> BoxFuture, Error>; + + /// Get the identity of the object this entry refers to. fn get_hash(&self) -> &EntryId; + + /// Get the full path of this entry (meaningless - see T25575327) fn get_path(&self) -> &RepoPath; + + /// Also meaningless (T25575327) fn get_mpath(&self) -> &MPath; + /// Return an Entry as a type-erased trait object. + /// (Do we still need this as a trait method? T25577105) fn boxed(self) -> Box where Self: Sync + Sized, @@ -106,6 +171,9 @@ pub trait Entry: Send + 'static { } } +/// Wrapper for boxing an instance of Entry +/// +/// TODO: (jsgf) T25577105 Are the Box variants of Manifest/Entry traits still needed? pub struct BoxEntry where Ent: Entry, @@ -192,15 +260,3 @@ impl Entry for Box { (**self).get_mpath() } } - -impl Display for Type { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - let s = match self { - &Type::Symlink => "l", - &Type::Executable => "x", - &Type::Tree => "t", - &Type::File => "", - }; - write!(fmt, "{}", s) - } -} diff --git a/mercurial-types/src/nodehash.rs b/mercurial-types/src/nodehash.rs index 8bc4ef6218..8ac6ed7b22 100644 --- a/mercurial-types/src/nodehash.rs +++ b/mercurial-types/src/nodehash.rs @@ -47,7 +47,6 @@ impl NodeHash { } } - struct StringVisitor; impl<'de> serde::de::Visitor<'de> for StringVisitor { @@ -216,6 +215,7 @@ impl Display for ManifestId { } } +/// TODO: (jsgf) T25576292 EntryId should be a (Type, NodeId) tuple #[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] #[derive(HeapSizeOf)] pub struct EntryId(NodeHash);