treedirstate: extract serialization methods to separate module

The serialization and deserialization of the treedirstate tree file is
scattered across various functions in several files.  Gather these together in
a single module so that the file format is obvious.

Differential Revision: https://phab.mercurial-scm.org/D1510
This commit is contained in:
Mark Thomas 2017-11-28 04:51:38 -08:00
parent 33855b35cc
commit 2328feb895
5 changed files with 192 additions and 149 deletions

View File

@ -1,17 +1,13 @@
// Copyright Facebook, Inc. 2017
//! Directory State.
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use errors::*;
use filestore::FileStore;
use std::io::{Cursor, Read, Write};
use serialization::Serializable;
use std::io::Cursor;
use std::path::Path;
use store::{BlockId, NullStore, Store, StoreView};
use tree::{Key, KeyRef, Storable, Tree};
/// Marker indicating that a block is probably a root node.
const MAGIC: &[u8] = b"////";
const MAGIC_LEN: usize = 4;
use tree::{Key, KeyRef, Tree};
/// Selected backend implementation for the dirstate.
enum Backend {
@ -68,7 +64,15 @@ pub struct Dirstate<T> {
root_id: Option<BlockId>,
}
impl<T: Storable + Clone> Dirstate<T> {
/// Representation of the root of a dirstate tree that can be serialized to disk.
pub(crate) struct DirstateRoot {
pub(crate) tracked_root_id: BlockId,
pub(crate) tracked_file_count: u32,
pub(crate) removed_root_id: BlockId,
pub(crate) removed_file_count: u32,
}
impl<T: Serializable + Clone> Dirstate<T> {
/// Create a new, empty dirstate, with no backend store.
pub fn new() -> Dirstate<T> {
Dirstate {
@ -83,24 +87,9 @@ impl<T: Storable + Clone> Dirstate<T> {
/// the disk until they are accessed.
pub fn open<P: AsRef<Path>>(&mut self, filename: P, root_id: BlockId) -> Result<()> {
let store = FileStore::open(filename)?;
{
let root_data = store.read(root_id)?;
let mut root = Cursor::new(root_data);
// Sanity check that this is a root
let mut buffer = [0; MAGIC_LEN];
root.read_exact(&mut buffer)?;
if buffer != MAGIC {
bail!(ErrorKind::InvalidStoreId(root_id.0));
}
let tracked_root_id = BlockId(root.read_u64::<BigEndian>()?);
let tracked_file_count = root.read_u32::<BigEndian>()?;
let removed_root_id = BlockId(root.read_u64::<BigEndian>()?);
let removed_file_count = root.read_u32::<BigEndian>()?;
self.tracked = Tree::open(tracked_root_id, tracked_file_count);
self.removed = Tree::open(removed_root_id, removed_file_count);
}
let root = DirstateRoot::deserialize(&mut Cursor::new(store.read(root_id)?))?;
self.tracked = Tree::open(root.tracked_root_id, root.tracked_file_count);
self.removed = Tree::open(root.removed_root_id, root.removed_file_count);
self.store = Backend::File(store);
self.root_id = Some(root_id);
Ok(())
@ -109,13 +98,15 @@ impl<T: Storable + Clone> Dirstate<T> {
/// Write a new root block to the store. This contains the identities of the tree roots
/// and the tree sizes.
fn write_root(&mut self) -> Result<()> {
let root = DirstateRoot {
tracked_root_id: self.tracked.root_id().unwrap(),
tracked_file_count: self.tracked.file_count(),
removed_root_id: self.removed.root_id().unwrap(),
removed_file_count: self.removed.file_count(),
};
let store = self.store.store();
let mut data = Vec::new();
data.write(MAGIC)?;
data.write_u64::<BigEndian>(self.tracked.root_id().unwrap().0)?;
data.write_u32::<BigEndian>(self.tracked.file_count())?;
data.write_u64::<BigEndian>(self.removed.root_id().unwrap().0)?;
data.write_u32::<BigEndian>(self.removed.file_count())?;
root.serialize(&mut data)?;
self.root_id = Some(store.append(&data)?);
store.flush()?;
Ok(())
@ -279,21 +270,21 @@ impl<T: Storable + Clone> Dirstate<T> {
mod tests {
use dirstate::Dirstate;
use tempdir::TempDir;
use tree::Storable;
use std::io::{Read, Write};
use byteorder::{ReadBytesExt, WriteBytesExt};
use serialization::Serializable;
use errors::*;
#[derive(PartialEq, Clone, Debug)]
struct State(char);
impl Storable for State {
fn write(&self, w: &mut Write) -> Result<()> {
impl Serializable for State {
fn serialize(&self, w: &mut Write) -> Result<()> {
w.write_u8(self.0 as u8)?;
Ok(())
}
fn read(r: &mut Read) -> Result<State> {
fn deserialize(r: &mut Read) -> Result<State> {
Ok(State(r.read_u8()? as char))
}
}

View File

@ -1,12 +1,6 @@
// Copyright Facebook, Inc. 2017
//! File State.
use byteorder::{ReadBytesExt, WriteBytesExt};
use errors::*;
use std::io::{Read, Write};
use tree::Storable;
use vlqencoding::{VLQDecode, VLQEncode};
/// Information relating to a file in the dirstate.
#[derive(Debug, PartialEq, Copy, Clone)]
pub struct FileState {
@ -36,28 +30,3 @@ impl FileState {
}
}
}
impl Storable for FileState {
/// Write a file entry to the store.
fn write(&self, mut w: &mut Write) -> Result<()> {
w.write_u8(self.state)?;
w.write_vlq(self.mode)?;
w.write_vlq(self.size)?;
w.write_vlq(self.mtime)?;
Ok(())
}
/// Read an entry from the store.
fn read(mut r: &mut Read) -> Result<FileState> {
let state = r.read_u8()?;
let mode = r.read_vlq()?;
let size = r.read_vlq()?;
let mtime = r.read_vlq()?;
Ok(FileState {
state,
mode,
size,
mtime,
})
}
}

View File

@ -40,6 +40,7 @@ pub mod filestore;
#[cfg(not(test))]
#[allow(non_camel_case_types)]
pub mod python;
pub mod serialization;
pub mod store;
pub mod tree;
pub mod vecmap;

View File

@ -0,0 +1,153 @@
// Copyright Facebook, Inc. 2017
//! Trait for serialization and deserialization of tree data.
use filestate::FileState;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use errors::*;
use std::io::{Read, Write};
use vlqencoding::{VLQDecode, VLQEncode};
use tree::{Key, Node, NodeEntry, NodeEntryMap};
use dirstate::DirstateRoot;
use store::BlockId;
pub trait Serializable
where
Self: Sized,
{
/// Serialize the storable data to a `Write` stream.
fn serialize(&self, w: &mut Write) -> Result<()>;
/// Deserialize a new data item from a `Read` stream.
fn deserialize(r: &mut Read) -> Result<Self>;
}
impl Serializable for FileState {
/// Write a file entry to the store.
fn serialize(&self, mut w: &mut Write) -> Result<()> {
w.write_u8(self.state)?;
w.write_vlq(self.mode)?;
w.write_vlq(self.size)?;
w.write_vlq(self.mtime)?;
Ok(())
}
/// Read an entry from the store.
fn deserialize(mut r: &mut Read) -> Result<FileState> {
let state = r.read_u8()?;
let mode = r.read_vlq()?;
let size = r.read_vlq()?;
let mtime = r.read_vlq()?;
Ok(FileState {
state,
mode,
size,
mtime,
})
}
}
/// Deserialize a single entry in a node's entry map. Returns the name and the entry.
fn deserialize_node_entry<T>(mut r: &mut Read) -> Result<(Key, NodeEntry<T>)>
where
T: Serializable + Clone,
{
let entry_type = r.read_u8()?;
match entry_type {
b'f' => {
// File entry.
let data = T::deserialize(r)?;
let name_len = r.read_vlq()?;
let mut name = Vec::with_capacity(name_len);
unsafe {
// Safe as we've just allocated the buffer and are about to read into it.
name.set_len(name_len);
}
r.read_exact(name.as_mut_slice())?;
Ok((name, NodeEntry::File(data)))
}
b'd' => {
// Directory entry.
let id = r.read_vlq()?;
let name_len = r.read_vlq()?;
let mut name = Vec::with_capacity(name_len);
unsafe {
// Safe as we've just allocated the buffer and are about to read into it.
name.set_len(name_len);
}
r.read_exact(name.as_mut_slice())?;
Ok((name, NodeEntry::Directory(Node::open(BlockId(id)))))
}
_ => {
bail!(ErrorKind::CorruptTree);
}
}
}
impl<T: Serializable + Clone> Serializable for NodeEntryMap<T> {
fn deserialize(r: &mut Read) -> Result<NodeEntryMap<T>> {
let mut r = r;
let count = r.read_vlq()?;
let mut entries = NodeEntryMap::with_capacity(count);
for _i in 0..count {
let (name, entry) = deserialize_node_entry(r)?;
entries.insert_hint_end(name, entry);
}
Ok(entries)
}
fn serialize(&self, w: &mut Write) -> Result<()> {
let mut w = w;
w.write_vlq(self.len())?;
for (name, entry) in self.iter() {
match entry {
&NodeEntry::File(ref file) => {
w.write_u8(b'f')?;
file.serialize(w)?;
}
&NodeEntry::Directory(ref node) => {
w.write_u8(b'd')?;
w.write_vlq(node.id.unwrap().0)?;
}
}
w.write_vlq(name.len())?;
w.write(name)?;
}
Ok(())
}
}
/// Marker indicating that a block is probably a root node.
const DIRSTATE_ROOT_MAGIC_LEN: usize = 4;
const DIRSTATE_ROOT_MAGIC: [u8; DIRSTATE_ROOT_MAGIC_LEN] = *b"////";
impl Serializable for DirstateRoot {
fn deserialize(r: &mut Read) -> Result<DirstateRoot> {
// Sanity check that this is a root
let mut buffer = [0; DIRSTATE_ROOT_MAGIC_LEN];
r.read_exact(&mut buffer)?;
if buffer != DIRSTATE_ROOT_MAGIC {
bail!(ErrorKind::CorruptTree);
}
let tracked_root_id = BlockId(r.read_u64::<BigEndian>()?);
let tracked_file_count = r.read_u32::<BigEndian>()?;
let removed_root_id = BlockId(r.read_u64::<BigEndian>()?);
let removed_file_count = r.read_u32::<BigEndian>()?;
Ok(DirstateRoot {
tracked_root_id,
tracked_file_count,
removed_root_id,
removed_file_count,
})
}
fn serialize(&self, w: &mut Write) -> Result<()> {
w.write(&DIRSTATE_ROOT_MAGIC)?;
w.write_u64::<BigEndian>(self.tracked_root_id.0)?;
w.write_u32::<BigEndian>(self.tracked_file_count)?;
w.write_u64::<BigEndian>(self.removed_root_id.0)?;
w.write_u32::<BigEndian>(self.removed_file_count)?;
Ok(())
}
}

View File

@ -1,29 +1,16 @@
// Copyright Facebook, Inc. 2017
//! Directory State Tree.
use byteorder::{ReadBytesExt, WriteBytesExt};
use errors::*;
use serialization::Serializable;
use std::collections::Bound;
use std::io::{Cursor, Read, Write};
use std::io::Cursor;
use store::{BlockId, Store, StoreView};
use vecmap::VecMap;
use vlqencoding::{VLQDecode, VLQEncode};
/// Trait that must be implemented by types that can be stored as the value in the tree.
pub trait Storable
where
Self: Sized,
{
/// Serialize the storable data to a `Write` stream.
fn write(&self, w: &mut Write) -> Result<()>;
/// Deserialize a new data item from a `Read` stream.
fn read(r: &mut Read) -> Result<Self>;
}
/// A node entry is an entry in a directory, either a file or another directory.
#[derive(Debug)]
enum NodeEntry<T> {
pub(crate) enum NodeEntry<T> {
Directory(Node<T>),
File(T),
}
@ -33,19 +20,19 @@ pub type Key = Vec<u8>;
pub type KeyRef<'a> = &'a [u8];
/// Store the node entries in an ordered map from name to node entry.
type NodeEntryMap<T> = VecMap<Key, NodeEntry<T>>;
pub(crate) type NodeEntryMap<T> = VecMap<Key, NodeEntry<T>>;
/// The contents of a directory.
#[derive(Debug)]
struct Node<T> {
pub(crate) struct Node<T> {
/// The ID of the directory in the store. If None, this directory has not yet been
/// written to the back-end store in its current state.
id: Option<BlockId>,
pub(crate) id: Option<BlockId>,
/// The set of files and directories in this directory, indexed by their name. If None,
/// then the ID must not be None, and the entries are yet to be loaded from the back-end
/// store.
entries: Option<NodeEntryMap<T>>,
pub(crate) entries: Option<NodeEntryMap<T>>,
/// A map from keys that have been filtered through a case-folding filter function to the
/// original key. This is used for case-folded look-ups. Filtered values are cached, so
@ -82,43 +69,7 @@ fn split_key<'a>(key: KeyRef<'a>) -> (KeyRef<'a>, Option<KeyRef<'a>>) {
(key, None)
}
impl<T: Storable + Clone> NodeEntry<T> {
/// Read an entry from the store. Returns the name and the entry.
fn read(mut r: &mut Read) -> Result<(Key, NodeEntry<T>)> {
let entry_type = r.read_u8()?;
match entry_type {
b'f' => {
// File entry.
let data = T::read(r)?;
let name_len = r.read_vlq()?;
let mut name = Vec::with_capacity(name_len);
unsafe {
// Safe as we've just allocated the buffer and are about to read into it.
name.set_len(name_len);
}
r.read_exact(name.as_mut_slice())?;
Ok((name, NodeEntry::File(data)))
}
b'd' => {
// Directory entry.
let id = r.read_vlq()?;
let name_len = r.read_vlq()?;
let mut name = Vec::with_capacity(name_len);
unsafe {
// Safe as we've just allocated the buffer and are about to read into it.
name.set_len(name_len);
}
r.read_exact(name.as_mut_slice())?;
Ok((name, NodeEntry::Directory(Node::open(BlockId(id)))))
}
_ => {
bail!(ErrorKind::CorruptTree);
}
}
}
}
impl<T: Storable + Clone> Node<T> {
impl<T: Serializable + Clone> Node<T> {
/// Create a new empty Node. This has no ID as it is not yet written to the store.
fn new() -> Node<T> {
Node {
@ -130,7 +81,7 @@ impl<T: Storable + Clone> Node<T> {
/// Create a new Node for an existing entry in the store. The entries are not loaded until
/// the load method is called.
fn open(id: BlockId) -> Node<T> {
pub(crate) fn open(id: BlockId) -> Node<T> {
Node {
id: Some(id),
entries: None,
@ -146,15 +97,7 @@ impl<T: Storable + Clone> Node<T> {
}
let id = self.id.expect("Node must have a valid ID to be loaded");
let data = store.read(id)?;
let len = data.len() as u64;
let mut cursor = Cursor::new(data);
let count = cursor.read_vlq()?;
let mut entries = NodeEntryMap::with_capacity(count);
while cursor.position() < len {
let (name, entry) = NodeEntry::read(&mut cursor)?;
entries.insert_hint_end(name, entry);
}
self.entries = Some(entries);
self.entries = Some(NodeEntryMap::<T>::deserialize(&mut Cursor::new(data))?);
Ok(())
}
@ -175,21 +118,7 @@ impl<T: Storable + Clone> Node<T> {
let entries = self.entries
.as_mut()
.expect("Node should have entries populated before writing out.");
data.write_vlq(entries.len())?;
for (name, entry) in entries.iter_mut() {
match entry {
&mut NodeEntry::File(ref file) => {
data.write_u8(b'f')?;
file.write(&mut data)?;
}
&mut NodeEntry::Directory(ref mut node) => {
data.write_u8(b'd')?;
data.write_vlq(node.id.unwrap().0)?;
}
}
data.write_vlq(name.len())?;
data.write(name)?;
}
entries.serialize(&mut data)?;
self.id = Some(store.append(&data)?);
Ok(())
}
@ -548,7 +477,7 @@ impl<T: Storable + Clone> Node<T> {
}
}
impl<T: Storable + Clone> Tree<T> {
impl<T: Serializable + Clone> Tree<T> {
/// Create a new empty tree.
pub fn new() -> Tree<T> {
Tree {