sapling/eden/scm/lib/indexedlog/benches/index.rs
Jun Wu 6f4bf325d5 indexedlog: write Checksum inline with Log
Summary:
Enhance the index format: The Root entry can be followed by an optional
Checksum entry which replaces the need of ChecksumTable.

The format is backwards compatible since the old format will be just
treated as "there is no ChecksumTable", and the ChecksumTable will be built on
the next "flush".

This change is non-trivial. But the tests are pretty strong - the bitflip test
alone covered a lot of issues, and the dump of Index content helps a lot too.

For the index itself without ".sum", checksum, this change is bi-directional
compatible:
1. New code reading old file will just think the old file does not have the
   checksum entry, similar to new code having checksum disabled.
2. Old code will think the root+checksum slice is the "root" entry. Parsing
   the root entry is fine since it does not complain about unknown data at the
   end.

However, this change dropped the logic updating ".sum" files. That part is an
issue blocking old clients from reading new data.

Reviewed By: DurhamG

Differential Revision: D19850741

fbshipit-source-id: 551a45cd5422f1fb4c5b08e3b207a2ffe3d93dea
2020-02-28 09:23:55 -08:00

190 lines
5.5 KiB
Rust

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use indexedlog::index::{InsertKey, InsertValue, OpenOptions};
use minibench::{bench, elapsed, measure, Measure};
use rand_chacha::{
rand_core::{RngCore, SeedableRng},
ChaChaRng,
};
use std::sync::Arc;
use tempfile::tempdir;
const N: usize = 204800;
/// Generate random buffer
fn gen_buf(size: usize) -> Vec<u8> {
let mut buf = vec![0u8; size];
ChaChaRng::seed_from_u64(0).fill_bytes(buf.as_mut());
buf
}
/// Default open options: 4K checksum chunk
fn open_opts() -> OpenOptions {
let mut open_opts = OpenOptions::new();
open_opts.checksum_chunk_size_log(12);
open_opts
}
fn main() {
bench("index insertion (owned key)", || {
let dir = tempdir().unwrap();
let mut idx = open_opts().open(dir.path().join("i")).expect("open");
let buf = gen_buf(N * 20);
elapsed(move || {
for i in 0..N {
idx.insert(&&buf[20 * i..20 * (i + 1)], i as u64)
.expect("insert");
}
})
});
bench("index insertion (referred key)", || {
let dir = tempdir().unwrap();
let buf = gen_buf(N * 20);
let mut idx = open_opts()
.key_buf(Some(Arc::new(buf.clone())))
.open(dir.path().join("i"))
.expect("open");
elapsed(move || {
for i in 0..N {
idx.insert(&&buf[20 * i..20 * (i + 1)], i as u64)
.expect("insert");
}
})
});
bench("index flush", || {
let dir = tempdir().unwrap();
let mut idx = open_opts().open(dir.path().join("i")).expect("open");
let buf = gen_buf(N * 20);
for i in 0..N {
idx.insert(&&buf[20 * i..20 * (i + 1)], i as u64)
.expect("insert");
}
elapsed(|| {
idx.flush().expect("flush");
})
});
{
let dir = tempdir().unwrap();
let mut idx = open_opts().open(dir.path().join("i")).expect("open");
let buf = gen_buf(N * 20);
for i in 0..N {
idx.insert(&&buf[20 * i..20 * (i + 1)], i as u64)
.expect("insert");
}
bench("index lookup (memory)", || {
elapsed(|| {
for i in 0..N {
idx.get(&&buf[20 * i..20 * (i + 1)]).expect("lookup");
}
})
});
bench("index prefix scan (2B)", || {
elapsed(|| {
for _ in 0..(N / 3) {
idx.scan_prefix([0x33, 0x33]).unwrap().count();
}
})
});
bench("index prefix scan (1B)", || {
elapsed(|| {
for _ in 0..(N / 807) {
idx.scan_prefix([0x33]).unwrap().count();
}
})
});
}
{
let dir = tempdir().unwrap();
let mut idx = open_opts()
.checksum_enabled(false)
.open(dir.path().join("i"))
.expect("open");
let buf = gen_buf(N * 20);
for i in 0..N {
idx.insert(&&buf[20 * i..20 * (i + 1)], i as u64)
.expect("insert");
}
idx.flush().expect("flush");
bench("index lookup (disk, no verify)", || {
elapsed(|| {
for i in 0..N {
idx.get(&&buf[20 * i..20 * (i + 1)]).expect("lookup");
}
})
});
bench("index prefix scan (2B, disk)", || {
elapsed(|| {
for _ in 0..(N / 3) {
idx.scan_prefix([0x33, 0x33]).unwrap().count();
}
})
});
bench("index prefix scan (1B, disk)", || {
elapsed(|| {
for _ in 0..(N / 807) {
idx.scan_prefix([0x33]).unwrap().count();
}
})
});
}
bench("index lookup (disk, verified)", || {
let dir = tempdir().unwrap();
let mut idx = open_opts().open(dir.path().join("i")).expect("open");
let buf = gen_buf(N * 20);
for i in 0..N {
idx.insert(&&buf[20 * i..20 * (i + 1)], i as u64)
.expect("insert");
}
idx.flush().expect("flush");
elapsed(move || {
for i in 0..N {
idx.get(&&buf[20 * i..20 * (i + 1)]).expect("lookup");
}
})
});
bench("index size (5M owned keys)", || {
const N: usize = 5000000;
let dir = tempdir().unwrap();
let mut idx = open_opts().open(dir.path().join("i")).expect("open");
let buf = gen_buf(N * 20);
for i in 0..N {
idx.insert(&&buf[20 * i..20 * (i + 1)], i as u64)
.expect("insert");
}
measure::Bytes::measure(|| idx.flush().unwrap())
});
bench("index size (5M referred keys)", || {
const N: usize = 5000000;
let dir = tempdir().unwrap();
let buf = gen_buf(N * 20);
let mut idx = open_opts()
.key_buf(Some(Arc::new(buf.clone())))
.open(dir.path().join("i"))
.expect("open");
for i in 0..N {
let ext_key = InsertKey::Reference((i as u64 * 20, 20));
idx.insert_advanced(ext_key, InsertValue::Prepend(i as u64))
.expect("insert");
}
measure::Bytes::measure(|| idx.flush().unwrap())
});
}