mirror of
https://github.com/facebook/sapling.git
synced 2024-10-06 23:07:18 +03:00
indexedlog: support efficient index deletion by prefix
Summary: The index can delete all keys matching a prefix more efficiently than deleting them one by one. Expose this feature. The `dag` crate will use this feature to delete all "non-master" segments and ids efficiently. Reviewed By: sfilipco Differential Revision: D18825296 fbshipit-source-id: b8531695609238a16913254af61004170f12954e
This commit is contained in:
parent
e51daf1fb1
commit
784da69352
@ -431,6 +431,16 @@ impl RadixOffset {
|
||||
}
|
||||
}
|
||||
|
||||
/// Change all children and link offset to null.
|
||||
/// Panic if the offset points to an on-disk entry.
|
||||
fn set_all_to_null(self, index: &mut Index) {
|
||||
if self.is_dirty() {
|
||||
index.dirty_radixes[self.dirty_index()] = MemRadix::default();
|
||||
} else {
|
||||
panic!("bug: set_all_to_null called on immutable radix entry");
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new in-memory radix entry.
|
||||
#[inline]
|
||||
fn create(index: &mut Index, radix: MemRadix) -> RadixOffset {
|
||||
@ -2415,6 +2425,16 @@ impl Index {
|
||||
.context(|| format!(" Index.path = {:?}", self.path))
|
||||
}
|
||||
|
||||
/// Remove all values associated with all keys with the given prefix.
|
||||
pub fn remove_prefix(&mut self, prefix: impl AsRef<[u8]>) -> crate::Result<()> {
|
||||
// NOTE: See "remove". The implementation detail does not optimize
|
||||
// for space or lookup performance.
|
||||
let prefix = prefix.as_ref();
|
||||
self.insert_advanced(InsertKey::Embed(prefix), InsertValue::TombstonePrefix)
|
||||
.context(|| format!("in Index::remove_prefix(prefix={:?})", prefix))
|
||||
.context(|| format!(" Index.path = {:?}", self.path))
|
||||
}
|
||||
|
||||
/// Update the linked list for a given key.
|
||||
///
|
||||
/// If `link` is None, behave like `insert`. Otherwise, ignore the existing
|
||||
@ -2470,6 +2490,10 @@ impl Index {
|
||||
link_offset.create(self, value)
|
||||
}
|
||||
InsertValue::Tombstone => LinkOffset::default(),
|
||||
InsertValue::TombstonePrefix => {
|
||||
radix.set_all_to_null(self);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
radix.set_link(self, new_link_offset);
|
||||
return Ok(());
|
||||
@ -2486,7 +2510,7 @@ impl Index {
|
||||
InsertValue::PrependReplace(value, link_offset) => {
|
||||
link_offset.create(self, value)
|
||||
}
|
||||
InsertValue::Tombstone => {
|
||||
InsertValue::Tombstone | InsertValue::TombstonePrefix => {
|
||||
// No need to create a key.
|
||||
radix.set_child(self, x, Offset::null());
|
||||
return Ok(());
|
||||
@ -2515,7 +2539,13 @@ impl Index {
|
||||
let detached_key = unsafe { &*(old_key as (*const [u8])) };
|
||||
(detached_key, link_offset)
|
||||
};
|
||||
if old_key == key.as_ref() {
|
||||
let matched = if let InsertValue::TombstonePrefix = value {
|
||||
// Only test the prefix of old_key.
|
||||
old_key.get(..key.as_ref().len()) == Some(key.as_ref())
|
||||
} else {
|
||||
old_key == key.as_ref()
|
||||
};
|
||||
if matched {
|
||||
// Key matched. Need to copy leaf entry for modification, except for
|
||||
// deletion.
|
||||
let new_link_offset = match value {
|
||||
@ -2523,7 +2553,7 @@ impl Index {
|
||||
InsertValue::PrependReplace(value, link_offset) => {
|
||||
link_offset.create(self, value)
|
||||
}
|
||||
InsertValue::Tombstone => {
|
||||
InsertValue::Tombstone | InsertValue::TombstonePrefix => {
|
||||
// No need to copy the leaf entry.
|
||||
last_radix.set_child(self, last_child, Offset::null());
|
||||
return Ok(());
|
||||
@ -2540,7 +2570,7 @@ impl Index {
|
||||
InsertValue::PrependReplace(value, link_offset) => {
|
||||
link_offset.create(self, value)
|
||||
}
|
||||
InsertValue::Tombstone => return Ok(()),
|
||||
InsertValue::Tombstone | InsertValue::TombstonePrefix => return Ok(()),
|
||||
};
|
||||
self.split_leaf(
|
||||
leaf,
|
||||
@ -2774,6 +2804,10 @@ pub enum InsertValue {
|
||||
|
||||
/// Effectively delete associated values for the specified key.
|
||||
Tombstone,
|
||||
|
||||
/// Effectively delete associated values for all keys starting with the
|
||||
/// prefix.
|
||||
TombstonePrefix,
|
||||
}
|
||||
|
||||
//// Debug Formatter
|
||||
@ -3077,6 +3111,41 @@ mod tests {
|
||||
assert_eq!(index.range(..).unwrap().count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_recursive() {
|
||||
let dir = tempdir().unwrap();
|
||||
let mut index = open_opts().open(dir.path().join("a")).expect("open");
|
||||
index.insert(b"abc", 42).unwrap();
|
||||
index.insert(b"abc", 42).unwrap();
|
||||
index.insert(b"abxyz1", 42).unwrap();
|
||||
index.insert(b"abxyz2", 42).unwrap();
|
||||
index.insert(b"abxyz33333", 42).unwrap();
|
||||
index.insert(b"abxyz44444", 42).unwrap();
|
||||
index.insert(b"aby", 42).unwrap();
|
||||
index.flush().unwrap();
|
||||
|
||||
let mut index = open_opts().open(dir.path().join("a")).expect("open");
|
||||
let mut n = index.range(..).unwrap().count();
|
||||
index.remove_prefix(b"abxyz33333333333").unwrap(); // nothing removed
|
||||
assert_eq!(index.range(..).unwrap().count(), n);
|
||||
|
||||
index.remove_prefix(b"abxyz33333").unwrap(); // exact match
|
||||
n -= 1; // abxyz33333 removed
|
||||
assert_eq!(index.range(..).unwrap().count(), n);
|
||||
|
||||
index.remove_prefix(b"abxyz4").unwrap(); // prefix exact match
|
||||
n -= 1; // abxyz44444 removed
|
||||
assert_eq!(index.range(..).unwrap().count(), n);
|
||||
|
||||
index.remove_prefix(b"ab").unwrap(); // prefix match
|
||||
n -= 4; // abc, aby, abxyz1, abxyz2 removed
|
||||
assert_eq!(index.range(..).unwrap().count(), n);
|
||||
|
||||
let mut index = open_opts().open(dir.path().join("a")).expect("open");
|
||||
index.remove_prefix(b"").unwrap(); // remove everything
|
||||
assert_eq!(index.range(..).unwrap().count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_one_byte_keys() {
|
||||
let dir = tempdir().unwrap();
|
||||
@ -3902,5 +3971,32 @@ mod tests {
|
||||
})
|
||||
}
|
||||
|
||||
fn test_deletion_prefix(keys_deleted: Vec<(Vec<u8>, bool)>) -> bool {
|
||||
let mut set = BTreeSet::<Vec<u8>>::new();
|
||||
let mut index = in_memory_index();
|
||||
keys_deleted.into_iter().all(|(key, deleted)| {
|
||||
if deleted {
|
||||
// BTreeSet does not have remove_prefix. Emulate it.
|
||||
let to_delete = set
|
||||
.iter()
|
||||
.filter(|k| k.starts_with(&key))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
for key in to_delete {
|
||||
set.remove(&key);
|
||||
}
|
||||
index.remove_prefix(&key).unwrap();
|
||||
} else {
|
||||
set.insert(key.clone());
|
||||
index.insert(&key, 1).unwrap();
|
||||
}
|
||||
index
|
||||
.range(..)
|
||||
.unwrap()
|
||||
.map(|s| s.unwrap().0.as_ref().to_vec())
|
||||
.collect::<Vec<_>>()
|
||||
== set.iter().cloned().collect::<Vec<_>>()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -164,6 +164,11 @@ pub enum IndexOutput {
|
||||
///
|
||||
/// This only affects the index. The entry is not removed in the log.
|
||||
Remove(Box<[u8]>),
|
||||
|
||||
/// Remove all values associated with all keys with the given prefix in the index.
|
||||
///
|
||||
/// This only affects the index. The entry is not removed in the log.
|
||||
RemovePrefix(Box<[u8]>),
|
||||
}
|
||||
|
||||
/// What checksum function to use for an entry.
|
||||
@ -1149,6 +1154,9 @@ impl Log {
|
||||
IndexOutput::Remove(key) => {
|
||||
index.remove(key)?;
|
||||
}
|
||||
IndexOutput::RemovePrefix(key) => {
|
||||
index.remove_prefix(key)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1212,6 +1220,9 @@ impl Log {
|
||||
IndexOutput::Remove(key) => {
|
||||
index.remove(key)?;
|
||||
}
|
||||
IndexOutput::RemovePrefix(key) => {
|
||||
index.remove_prefix(key)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
offset = entry_result.next_offset;
|
||||
@ -2307,9 +2318,9 @@ impl IndexOutput {
|
||||
})?,
|
||||
),
|
||||
IndexOutput::Owned(key) => Cow::Owned(key.into_vec()),
|
||||
IndexOutput::Remove(_) => {
|
||||
IndexOutput::Remove(_) | IndexOutput::RemovePrefix(_) => {
|
||||
return Err(crate::Error::programming(
|
||||
"into_cow does not support Remove",
|
||||
"into_cow does not support Remove or RemovePrefix",
|
||||
))
|
||||
}
|
||||
})
|
||||
@ -2556,7 +2567,13 @@ mod tests {
|
||||
// Two index functions. First takes every 2 bytes as references. The second takes every 3
|
||||
// bytes as owned slices.
|
||||
// Keys starting with '-' are considered as "deletion" requests.
|
||||
// Keys starting with '=' are considered as "delete prefix" requests.
|
||||
let index_func0 = |data: &[u8]| {
|
||||
if data.first() == Some(&b'=') {
|
||||
return vec![IndexOutput::RemovePrefix(
|
||||
data[1..].to_vec().into_boxed_slice(),
|
||||
)];
|
||||
}
|
||||
let is_removal = data.first() == Some(&b'-');
|
||||
let start = if is_removal { 1 } else { 0 };
|
||||
(start..(data.len().max(1) - 1))
|
||||
@ -2570,6 +2587,11 @@ mod tests {
|
||||
.collect()
|
||||
};
|
||||
let index_func1 = |data: &[u8]| {
|
||||
if data.first() == Some(&b'=') {
|
||||
return vec![IndexOutput::RemovePrefix(
|
||||
data[1..].to_vec().into_boxed_slice(),
|
||||
)];
|
||||
}
|
||||
let is_removal = data.first() == Some(&b'-');
|
||||
let start = if is_removal { 1 } else { 0 };
|
||||
(start..(data.len().max(2) - 2))
|
||||
@ -2598,7 +2620,7 @@ mod tests {
|
||||
for lag in [0u64, 20, 1000].iter().cloned() {
|
||||
let dir = tempdir().unwrap();
|
||||
let mut log = Log::open(dir.path(), get_index_defs(lag)).unwrap();
|
||||
let entries: [&[u8]; 6] = [b"1", b"", b"2345", b"", b"78", b"3456"];
|
||||
let entries: [&[u8]; 7] = [b"1", b"", b"2345", b"", b"78", b"3456", b"35"];
|
||||
for bytes in entries.iter() {
|
||||
log.append(bytes).expect("append");
|
||||
// Flush and reload in the middle of entries. This exercises the code paths
|
||||
@ -2624,7 +2646,17 @@ mod tests {
|
||||
[b"3456", b"2345"]
|
||||
);
|
||||
|
||||
log.sync().unwrap();
|
||||
|
||||
// Delete prefix.
|
||||
log.append(b"=3").unwrap();
|
||||
for key in [b"34", b"35"].iter() {
|
||||
assert!(log.lookup(0, key).unwrap().into_vec().unwrap().is_empty());
|
||||
}
|
||||
assert_eq!(log.lookup(0, b"56").unwrap().into_vec().unwrap(), [b"3456"]);
|
||||
|
||||
// Delete keys.
|
||||
let mut log = Log::open(dir.path(), get_index_defs(lag)).unwrap();
|
||||
for bytes in entries.iter() {
|
||||
let mut bytes = bytes.to_vec();
|
||||
bytes.insert(0, b'-');
|
||||
|
Loading…
Reference in New Issue
Block a user