chop: offline event log truncation (#165)

## `chop`

`urbit chop <pier>` implements a simple, offline **event log
truncation**[^1] tool.

`chop` gracefully stops the given pier (if running), backs up the
current snapshot to `<pier>/.urb/bhk`, makes sure a current snapshot
exists (i.e., is fully written to disk in `chk/*.bin` with no existing
patch files), reads the metadata and the last event from the pier's
event log, initializes a fresh event log in the `<pier>/.urb/log/chop`
directory, writes the metadata and last event from the original log into
the fresh one, renames the original event log to
`<pier>/.urb/log/chop/data_<first>_<last>.mdb.bak` where `first` and
`last` are the first and last event numbers from the event log, and
exits.

Pilots are then free to move, archive, or delete their `.bak` event log
file, resume normal operation of their ship, and enjoy the many benefits
of lowered disk pressure and any reductions in associated hosting costs.

I've tested `chop` successfully on my own planet `~mastyr-bottec`
(multiple times), three different comets (all fresh), and multitudes of
fake galaxies.

Resolves #122.

Note: `knit`, which is the "undo" button for `chop`, is being
implemented in its own PR #184.

[^1]: https://roadmap.urbit.org/project/event-log-truncation
This commit is contained in:
Josh Lehman 2023-02-09 07:01:05 -08:00 committed by GitHub
commit 7098eb8825
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 354 additions and 30 deletions

View File

@ -156,5 +156,9 @@
unlink(a);})
# define c3_fopen(a, b) ({ \
fopen(a, b);})
# define c3_remove(a) ({ \
remove(a);})
# define c3_rename(a, b) ({ \
rename(a, b);})
#endif /* ifndef C3_DEFS_H */

View File

@ -201,6 +201,7 @@
# define c3__chew c3_s4('c','h','e','w')
# define c3__chis c3_s4('c','h','i','s')
# define c3__chob c3_s4('c','h','o','b')
# define c3__chop c3_s4('c','h','o','p')
# define c3__chug c3_s4('c','h','u','g')
# define c3__claf c3_s4('c','l','a','f')
# define c3__clam c3_s4('c','l','a','m')

View File

@ -992,10 +992,10 @@ _ce_image_copy(u3e_image* fom_u, u3e_image* tou_u)
return c3y;
}
/* _ce_backup();
/* u3e_backup();
*/
static void
_ce_backup(void)
c3_o
u3e_backup(c3_o ovw_o)
{
u3e_image nop_u = { .nam_c = "north", .pgs_w = 0 };
u3e_image sop_u = { .nam_c = "south", .pgs_w = 0 };
@ -1004,25 +1004,25 @@ _ce_backup(void)
snprintf(ful_c, 8192, "%s/.urb/bhk", u3P.dir_c);
if ( c3_mkdir(ful_c, 0700) ) {
if ( (c3n == ovw_o) && c3_mkdir(ful_c, 0700) ) {
if ( EEXIST != errno ) {
fprintf(stderr, "loom: image backup: %s\r\n", strerror(errno));
}
return;
return c3n;
}
snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, nop_u.nam_c);
if ( -1 == (nop_u.fid_i = c3_open(ful_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno));
return;
return c3n;
}
snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, sop_u.nam_c);
if ( -1 == (sop_u.fid_i = c3_open(ful_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno));
return;
return c3n;
}
if ( (c3n == _ce_image_copy(&u3P.nor_u, &nop_u))
@ -1034,10 +1034,14 @@ _ce_backup(void)
c3_unlink(ful_c);
snprintf(ful_c, 8192, "%s/.urb/bhk", u3P.dir_c);
c3_rmdir(ful_c);
fprintf(stderr, "loom: image backup failed\r\n");
return c3n;
}
close(nop_u.fid_i);
close(sop_u.fid_i);
fprintf(stderr, "loom: image backup complete\r\n");
return c3y;
}
/*
@ -1103,7 +1107,7 @@ u3e_save(void)
_ce_patch_free(pat_u);
_ce_patch_delete();
_ce_backup();
u3e_backup(c3n);
}
/* u3e_live(): start the checkpointing system.

View File

@ -64,6 +64,11 @@
/** Functions.
**/
/* u3e_backup(): copy the snapshot from chk to bhk.
*/
c3_o
u3e_backup(c3_o ovw_o);
/* u3e_fault(): handle a memory event with libsigsegv protocol.
*/
c3_i

View File

@ -504,6 +504,112 @@ u3_lmdb_save_meta(MDB_env* env_u,
return c3y;
}
/* u3_lmdb_walk_init(): initialize db iterator.
*/
c3_o
u3_lmdb_walk_init(MDB_env* env_u,
u3_lmdb_walk* itr_u,
c3_d nex_d,
c3_d las_d)
{
// XX assumes little-endian
//
MDB_val key_u = { .mv_size = sizeof(c3_d), .mv_data = &nex_d };
MDB_val val_u;
c3_w ops_w, ret_w;
itr_u->red_o = c3n;
itr_u->nex_d = nex_d;
itr_u->las_d = las_d;
// create a read-only transaction.
//
ops_w = MDB_RDONLY;
if ( (ret_w = mdb_txn_begin(env_u, 0, ops_w, &itr_u->txn_u)) ) {
mdb_logerror(stderr, ret_w, "lmdb: read txn_begin fail");
return c3n;
}
// open the database in the transaction
//
ops_w = MDB_CREATE | MDB_INTEGERKEY;
if ( (ret_w = mdb_dbi_open(itr_u->txn_u, "EVENTS", ops_w, &itr_u->mdb_u)) ) {
mdb_logerror(stderr, ret_w, "lmdb: read: dbi_open fail");
// XX confirm
//
mdb_txn_abort(itr_u->txn_u);
return c3n;
}
// creates a cursor to iterate over keys starting at [eve_d]
//
if ( (ret_w = mdb_cursor_open(itr_u->txn_u, itr_u->mdb_u, &itr_u->cur_u)) ) {
mdb_logerror(stderr, ret_w, "lmdb: read: cursor_open fail");
// XX confirm
//
mdb_txn_abort(itr_u->txn_u);
return c3n;
}
// set the cursor to the position of [eve_d]
//
ops_w = MDB_SET_KEY;
if ( (ret_w = mdb_cursor_get(itr_u->cur_u, &key_u, &val_u, ops_w)) ) {
mdb_logerror(stderr, ret_w, "lmdb: read: initial cursor_get failed");
fprintf(stderr, " at %" PRIu64 "\r\n", nex_d);
mdb_cursor_close(itr_u->cur_u);
// XX confirm
//
mdb_txn_abort(itr_u->txn_u);
return c3n;
}
return c3y;
}
/* u3_lmdb_walk_next(): synchronously read next event from iterator.
*/
c3_o
u3_lmdb_walk_next(u3_lmdb_walk* itr_u, size_t* len_i, void** buf_v)
{
MDB_val key_u, val_u;
c3_w ret_w, ops_w;
c3_assert( itr_u->nex_d <= itr_u->las_d );
ops_w = ( c3y == itr_u->red_o ) ? MDB_NEXT : MDB_GET_CURRENT;
if ( (ret_w = mdb_cursor_get(itr_u->cur_u, &key_u, &val_u, ops_w)) ) {
mdb_logerror(stderr, ret_w, "lmdb: walk error");
return c3n;
}
// sanity check: ensure contiguous event numbers
//
if ( *(c3_d*)key_u.mv_data != itr_u->nex_d ) {
fprintf(stderr, "lmdb: read gap: expected %" PRIu64
", received %" PRIu64 "\r\n",
itr_u->nex_d,
*(c3_d*)key_u.mv_data);
return c3n;
}
*len_i = val_u.mv_size;
*buf_v = val_u.mv_data;
itr_u->nex_d++;
itr_u->red_o = c3y;
return c3y;
}
/* u3_lmdb_walk_done(): close iterator.
*/
void
u3_lmdb_walk_done(u3_lmdb_walk* itr_u)
{
mdb_cursor_close(itr_u->cur_u);
mdb_txn_abort(itr_u->txn_u);
}
/* mdb_logerror(): writes an error message and lmdb error code to f.
*/
void mdb_logerror(FILE* f, int err, const char* fmt, ...)

View File

@ -8,6 +8,16 @@
/* lmdb api wrapper
*/
/* u3_lmdb_walk: event iterator
*/
typedef struct _u3_lmdb_walk {
MDB_txn* txn_u; // transaction handle
MDB_dbi mdb_u; // db handle
MDB_cursor* cur_u; // db cursor
c3_o red_o; // have we read from this yet?
c3_d nex_d; // next event number
c3_d las_d; // final event number, inclusive
} u3_lmdb_walk;
/* u3_lmdb_init(): open lmdb at [pax_c], mmap up to [siz_i].
*/
@ -19,7 +29,6 @@
void
u3_lmdb_exit(MDB_env* env_u);
/* u3_lmdb_stat(): print env stats.
*/
void
@ -64,4 +73,22 @@
size_t val_i,
void* val_p);
/* u3_lmdb_walk_init(): initialize db iterator.
*/
c3_o
u3_lmdb_walk_init(MDB_env* env_u,
u3_lmdb_walk* itr_u,
c3_d nex_d,
c3_d las_d);
/* u3_lmdb_walk_next(): synchronously read next event from iterator.
*/
c3_o
u3_lmdb_walk_next(u3_lmdb_walk* itr_u, size_t* len_i, void** buf_v);
/* u3_lmdb_walk_done(): close iterator.
*/
void
u3_lmdb_walk_done(u3_lmdb_walk* itr_u);
#endif /* ifndef U3_VERE_DB_LMDB_H */

View File

@ -464,14 +464,14 @@ u3_disk_read(u3_disk* log_u, c3_d eve_d, c3_d len_d)
/* _disk_save_meta(): serialize atom, save as metadata at [key_c].
*/
static c3_o
_disk_save_meta(u3_disk* log_u, const c3_c* key_c, u3_atom dat)
_disk_save_meta(MDB_env* mdb_u, const c3_c* key_c, u3_atom dat)
{
c3_w len_w = u3r_met(3, dat);
c3_y* byt_y = c3_malloc(len_w);
u3r_bytes(0, len_w, byt_y, dat);
{
c3_o ret_o = u3_lmdb_save_meta(log_u->mdb_u, key_c, len_w, byt_y);
c3_o ret_o = u3_lmdb_save_meta(mdb_u, key_c, len_w, byt_y);
c3_free(byt_y);
return ret_o;
}
@ -480,17 +480,17 @@ _disk_save_meta(u3_disk* log_u, const c3_c* key_c, u3_atom dat)
/* u3_disk_save_meta(): save metadata.
*/
c3_o
u3_disk_save_meta(u3_disk* log_u,
u3_disk_save_meta(MDB_env* mdb_u,
c3_d who_d[2],
c3_o fak_o,
c3_w lif_w)
{
c3_assert( c3y == u3a_is_cat(lif_w) );
if ( (c3n == _disk_save_meta(log_u, "version", 1))
|| (c3n == _disk_save_meta(log_u, "who", u3i_chubs(2, who_d)))
|| (c3n == _disk_save_meta(log_u, "fake", fak_o))
|| (c3n == _disk_save_meta(log_u, "life", lif_w)) )
if ( (c3n == _disk_save_meta(mdb_u, "version", 1))
|| (c3n == _disk_save_meta(mdb_u, "who", u3i_chubs(2, who_d)))
|| (c3n == _disk_save_meta(mdb_u, "fake", fak_o))
|| (c3n == _disk_save_meta(mdb_u, "life", lif_w)) )
{
return c3n;
}
@ -513,36 +513,36 @@ _disk_meta_read_cb(void* ptr_v, size_t val_i, void* val_p)
/* _disk_read_meta(): read metadata at [key_c], deserialize.
*/
static u3_weak
_disk_read_meta(u3_disk* log_u, const c3_c* key_c)
_disk_read_meta(MDB_env* mdb_u, const c3_c* key_c)
{
u3_weak dat = u3_none;
u3_lmdb_read_meta(log_u->mdb_u, &dat, key_c, _disk_meta_read_cb);
u3_lmdb_read_meta(mdb_u, &dat, key_c, _disk_meta_read_cb);
return dat;
}
/* u3_disk_read_meta(): read metadata.
*/
c3_o
u3_disk_read_meta(u3_disk* log_u,
u3_disk_read_meta(MDB_env* mdb_u,
c3_d* who_d,
c3_o* fak_o,
c3_w* lif_w)
{
u3_weak ver, who, fak, lif;
if ( u3_none == (ver = _disk_read_meta(log_u, "version")) ) {
if ( u3_none == (ver = _disk_read_meta(mdb_u, "version")) ) {
fprintf(stderr, "disk: read meta: no version\r\n");
return c3n;
}
if ( u3_none == (who = _disk_read_meta(log_u, "who")) ) {
if ( u3_none == (who = _disk_read_meta(mdb_u, "who")) ) {
fprintf(stderr, "disk: read meta: no indentity\r\n");
return c3n;
}
if ( u3_none == (fak = _disk_read_meta(log_u, "fake")) ) {
if ( u3_none == (fak = _disk_read_meta(mdb_u, "fake")) ) {
fprintf(stderr, "disk: read meta: no fake bit\r\n");
return c3n;
}
if ( u3_none == (lif = _disk_read_meta(log_u, "life")) ) {
if ( u3_none == (lif = _disk_read_meta(mdb_u, "life")) ) {
fprintf(stderr, "disk: read meta: no lifecycle length\r\n");
return c3n;
}

View File

@ -637,6 +637,7 @@ _cw_usage(c3_c* bin_c)
" %s prep %.*s prepare for upgrade:\n",
" %s next %.*s request upgrade:\n",
" %s queu %.*s<at-event> cue state:\n",
" %s chop %.*s truncate event log:\n",
" %s vere ARGS <output dir> download binary:\n",
"\n run as a 'serf':\n",
" %s serf <pier> <key> <flags> <cache-size> <at-event>"
@ -1823,6 +1824,180 @@ _cw_prep(c3_i argc, c3_c* argv[])
u3_Host.ops_u.tem = c3y;
}
/* _cw_chop(): truncate event log
*/
static void
_cw_chop(c3_i argc, c3_c* argv[])
{
c3_i ch_i, lid_i;
c3_w arg_w;
static struct option lop_u[] = {
{ "loom", required_argument, NULL, c3__loom },
{ NULL, 0, NULL, 0 }
};
u3_Host.dir_c = _main_pier_run(argv[0]);
while ( -1 != (ch_i=getopt_long(argc, argv, "", lop_u, &lid_i)) ) {
switch ( ch_i ) {
case c3__loom: {
c3_w lom_w;
c3_o res_o = _main_readw(optarg, u3a_bits + 3, &lom_w);
if ( (c3n == res_o) || (lom_w < 20) ) {
fprintf(stderr, "error: --loom must be >= 20 and <= %u\r\n", u3a_bits + 2);
exit(1);
}
u3_Host.ops_u.lom_y = lom_w;
} break;
case '?': {
fprintf(stderr, "invalid argument\r\n");
exit(1);
} break;
}
}
// argv[optind] is always "chop"
//
if ( !u3_Host.dir_c ) {
if ( optind + 1 < argc ) {
u3_Host.dir_c = argv[optind + 1];
}
else {
fprintf(stderr, "invalid command, pier required\r\n");
exit(1);
}
optind++;
}
if ( optind + 1 != argc ) {
fprintf(stderr, "invalid command\r\n");
exit(1);
}
// gracefully shutdown the pier if it's running
u3_disk* old_u = _cw_disk_init(u3_Host.dir_c);
// note: this include patch applications (if any)
u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y);
// check if there's a *current* snapshot
if ( old_u->dun_d != u3A->eve_d ) {
fprintf(stderr, "chop: error: snapshot is out of date, please "
"start/shutdown your pier gracefully first\r\n");
fprintf(stderr, "chop: eve_d: %" PRIu64 ", dun_d: %" PRIu64 "\r\n", u3A->eve_d, old_u->dun_d);
exit(1);
}
if ( c3n == u3e_backup(c3y)) { // backup current snapshot
fprintf(stderr, "chop: error: failed to backup snapshot\r\n");
exit(1);
}
// initialize the lmdb environment
// see disk.c:885
const size_t siz_i =
// 500 GiB is as large as musl on aarch64 wants to allow
#if (defined(U3_CPU_aarch64) && defined(U3_OS_linux))
0x7d00000000;
#else
0x10000000000;
#endif
c3_c log_c[8193];
snprintf(log_c, sizeof(log_c), "%s/.urb/log", u3_Host.dir_c);
// get the first/last event numbers from the event log
c3_d fir_d, las_d;
if ( c3n == u3_lmdb_gulf(old_u->mdb_u, &fir_d, &las_d) ) {
fprintf(stderr, "chop: failed to load latest event from database\r\n");
exit(1);
}
// get the metadata
c3_d who_d[2];
c3_o fak_o;
c3_w lif_w;
if ( c3y != u3_disk_read_meta(old_u->mdb_u, who_d, &fak_o, &lif_w) ) {
fprintf(stderr, "chop: failed to read metadata\r\n");
exit(1);
}
// get the last event
u3_lmdb_walk itr_u;
size_t len_i;
void* buf_v[1];
if ( c3n == u3_lmdb_walk_init(old_u->mdb_u, &itr_u, las_d, las_d) ) {
fprintf(stderr, "chop: failed to initialize iterator\r\n");
exit(1);
}
if ( c3n == u3_lmdb_walk_next(&itr_u, &len_i, buf_v) ) {
fprintf(stderr, "chop: failed to read event\r\n");
exit(1);
}
u3_lmdb_walk_done(&itr_u);
// initialize a fresh lmdb environment in the "chop" subdir
c3_c cho_c[8193];
snprintf(cho_c, sizeof(cho_c), "%s/chop", log_c);
if ( 0 != access(cho_c, F_OK) ) {
if ( 0 != c3_mkdir(cho_c, 0700) ) {
fprintf(stderr, "chop: failed to create chop directory\r\n");
exit(1);
}
}
MDB_env* new_u = u3_lmdb_init(cho_c, siz_i);
if ( !new_u ) {
fprintf(stderr, "chop: failed to initialize new database\r\n");
exit(1);
}
// write the metadata to the database
if ( c3n == u3_disk_save_meta(new_u, who_d, fak_o, lif_w) ) {
fprintf(stderr, "chop: failed to save metadata\r\n");
exit(1);
}
// write the last event to the database
// warning: this relies on the old database still being open
if ( c3n == u3_lmdb_save(new_u, las_d, 1, buf_v, &len_i) ) {
fprintf(stderr, "chop: failed to write last event\r\n");
exit(1);
}
// backup the original database file
c3_c dat_c[8193], bak_c[8193];
snprintf(dat_c, sizeof(dat_c), "%s/data.mdb", log_c);
// "data_<first>-<last>.mdb.bak"
snprintf(bak_c, sizeof(bak_c), "%s/data_%" PRIu64 "-%" PRIu64 ".mdb.bak", cho_c, fir_d, las_d);
if ( 0 != c3_rename(dat_c, bak_c) ) {
fprintf(stderr, "chop: failed to backup original database file\r\n");
exit(1);
}
// rename new database file to be official
c3_c new_c[8193];
snprintf(new_c, sizeof(new_c), "%s/data.mdb", cho_c);
if ( 0 != c3_rename(new_c, dat_c) ) {
fprintf(stderr, "chop: failed to rename new database file\r\n");
exit(1);
}
// cleanup
u3_disk_exit(old_u);
u3_lmdb_exit(new_u);
u3m_stop();
// success
fprintf(stderr, "chop: event log truncation complete\r\n");
fprintf(stderr, " event log backup written to %s\r\n", bak_c);
fprintf(stderr, " WARNING: ENSURE YOU CAN RESTART YOUR SHIP BEFORE DELETING YOUR EVENT LOG BACKUP FILE!\r\n");
fprintf(stderr, " if you can't, restore your log by running:\r\n");
fprintf(stderr, " `mv %s %s` then try again\r\n", bak_c, dat_c);
}
/* _cw_vere(): download vere
*/
static void
@ -1936,7 +2111,7 @@ _cw_vere(c3_i argc, c3_c* argv[])
u3l_log("vere: download succeeded");
}
/* _cw_vile(): generatoe/print keyfile
/* _cw_vile(): generate/print keyfile
*/
static void
_cw_vile(c3_i argc, c3_c* argv[])
@ -2088,6 +2263,7 @@ _cw_utils(c3_i argc, c3_c* argv[])
case c3__play: _cw_play(argc, argv); return 2; // continue on
case c3__prep: _cw_prep(argc, argv); return 2; // continue on
case c3__queu: _cw_queu(argc, argv); return 1;
case c3__chop: _cw_chop(argc, argv); return 1;
case c3__vere: _cw_vere(argc, argv); return 1;
case c3__vile: _cw_vile(argc, argv); return 1;

View File

@ -1708,8 +1708,8 @@ u3_pier_stay(c3_w wag_w, u3_noun pax)
return 0;
}
if ( c3n == u3_disk_read_meta(pir_u->log_u, pir_u->who_d,
&pir_u->fak_o, &pir_u->lif_w) )
if ( c3n == u3_disk_read_meta(pir_u->log_u->mdb_u, pir_u->who_d,
&pir_u->fak_o, &pir_u->lif_w) )
{
fprintf(stderr, "pier: disk read meta fail\r\n");
// XX dispose
@ -1905,8 +1905,8 @@ _pier_boot_plan(u3_pier* pir_u,
pir_u->lif_w = u3qb_lent(bot_u.bot);
}
if ( c3n == u3_disk_save_meta(pir_u->log_u, pir_u->who_d,
pir_u->fak_o, pir_u->lif_w) )
if ( c3n == u3_disk_save_meta(pir_u->log_u->mdb_u, pir_u->who_d,
pir_u->fak_o, pir_u->lif_w) )
{
// XX dispose bot_u
//

View File

@ -4,6 +4,7 @@
#define U3_VERE_H
#include "c3.h"
#include "db/lmdb.h"
#include "noun.h"
#include "serf.h"
#include "uv.h"
@ -945,7 +946,7 @@
/* u3_disk_read_meta(): read metadata.
*/
c3_o
u3_disk_read_meta(u3_disk* log_u,
u3_disk_read_meta(MDB_env* mdb_u,
c3_d* who_d,
c3_o* fak_o,
c3_w* lif_w);
@ -953,7 +954,7 @@
/* u3_disk_save_meta(): save metadata.
*/
c3_o
u3_disk_save_meta(u3_disk* log_u,
u3_disk_save_meta(MDB_env* mdb_u,
c3_d who_d[2],
c3_o fak_o,
c3_w lif_w);