From 4087699172b73cf9cb5afad20e504ac36d715c5c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Jan 2022 14:51:25 -0500 Subject: [PATCH 1/4] u3: fix snapshot corruption by preemptively dirtying the loom --- pkg/urbit/noun/events.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index bd9ee1b961..c485ab7888 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -913,6 +913,10 @@ u3e_live(c3_o nuu_o, c3_c* dir_c) _ce_patch_delete(); } + // mark all pages dirty (pages in the snapshot will be marked clean) + // + u3e_foul(); + /* Write image files to memory; reinstate protection. */ { @@ -933,7 +937,6 @@ u3e_live(c3_o nuu_o, c3_c* dir_c) ** snapshot on a future boot for which the images are not empty. */ if ( (0 == u3P.nor_u.pgs_w) && (0 == u3P.sou_u.pgs_w) ) { - u3e_foul(); u3l_log("live: logical boot\r\n"); nuu_o = c3y; } From c1441e9cd01535caf416092eac327f16e93ebf25 Mon Sep 17 00:00:00 2001 From: Peter McEvoy Date: Thu, 27 Jan 2022 13:54:20 -0800 Subject: [PATCH 2/4] u3: document page tracking subtleties when taking snapshot --- pkg/urbit/noun/events.c | 67 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index c485ab7888..ee617d4289 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -1,6 +1,62 @@ -/* g/e.c -** -*/ +//! @file events.c +//! Incremental snapshot system. +//! +//! ### Components +//! - North image file: the contiguous pages of the home road's heap. Pages +//! are ordered within the file by increasing address (i.e. bottommost page +//! of the loom comes first). +//! - South image file: the contiguous pages of the home road's stack. Pages +//! are ordered within the file by decreasing address (i.e. topmost page of +//! the loom comes first). +//! - Memory patch file: the raw memory of the pages that changed since the +//! last snapshot was taken. +//! - Control patch file: metadata describing the contents of the memory patch +//! file. +//! +//! ### Taking a snapshot +//! 1. Create a memory patch file containing all dirty pages within the bounds +//! of the home road's heap and stack and an accompanying control patch file +//! that documents where in the loom those dirty pages belong. +//! 2. Apply the contents of the memory patch file to the appropriate image file +//! using the metadata stored in the control patch file. +//! 3. Delete the memory and control patch files. +//! +//! ### Restoring a snapshot +//! 1. Check for the memory and control patch files. If they exist, then a crash +//! presumably occurred while taking a snapshot (after the patch files were +//! created but before they could be applied to the image files) and so the +//! patch files should be applied to the image files the same as when taking +//! a snapshot. +//! 2. Mark all pages in the loom as dirty. This effectively ignores pages that +//! are not on the home road and so optimizes page tracking. +//! 3. Apply the image files to memory. Mark as clean and write-protect the +//! pages restored from the image files. +//! +//! ### Page tracking subtleties +//! To better understand any subtleties of the page tracking system, consider +//! the following sequence: +//! 1. Boot up and restore the snapshot, which is empty (i.e. the image files +//! contain no pages). Because the snapshot is empty, all pages are marked as +//! dirty and are writable, which means that no page faults will be generated. +//! 2. After a while, take a snapshot. All pages within the home road's heap and +//! stack will be gathered into the patch files (because all of the pages in +//! the loom are dirty) and ultimately written to the image files. +//! 3. Exit. +//! 4. Reboot and restore the snapshot, which is no longer empty. Because the +//! snapshot contains the pages that comprised the home road's heap and stack +//! at the time at which the snapshot was taken, only those pages will be +//! marked as clean and write-protected. As a result, all other pages will +//! remain dirty and writable. +//! 5. After a while, take another snapshot. By this point, any writes to the +//! write-protected pages on the home road's heap or stack will have triggered +//! page faults, and those pages will have been marked as dirty. Any writes to +//! non-write-protected pages on the home road's heap or stack (which would +//! exist if the home road heap and/or stack grew) will already be dirty. +//! All of these dirty pages are applied to the image files via the patch +//! files. As this process repeats, the snapshot grows incrementally without +//! the need to write-protect the pages between the home road's heap and +//! stack, thereby reducing the number of page faults generated. + #include "all.h" #include #include @@ -931,10 +987,7 @@ u3e_live(c3_o nuu_o, c3_c* dir_c) u3l_log("boot: protected loom\r\n"); } - /* If the images were empty, we are logically booting. By default, we mark - ** all pages as dirty, which enables us to track only home road pages by - ** marking those as clean when they're mapped into memory from the - ** snapshot on a future boot for which the images are not empty. + /* If the images were empty, we are logically booting. */ if ( (0 == u3P.nor_u.pgs_w) && (0 == u3P.sou_u.pgs_w) ) { u3l_log("live: logical boot\r\n"); From 60fc5c1389842019892823b95c198acb5a93b12c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Feb 2022 20:11:40 -0500 Subject: [PATCH 3/4] u3: refines snapshot system docs --- pkg/urbit/noun/events.c | 116 ++++++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 53 deletions(-) diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index ee617d4289..cbeafe56fc 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -1,61 +1,71 @@ //! @file events.c -//! Incremental snapshot system. //! -//! ### Components -//! - North image file: the contiguous pages of the home road's heap. Pages -//! are ordered within the file by increasing address (i.e. bottommost page -//! of the loom comes first). -//! - South image file: the contiguous pages of the home road's stack. Pages -//! are ordered within the file by decreasing address (i.e. topmost page of -//! the loom comes first). -//! - Memory patch file: the raw memory of the pages that changed since the -//! last snapshot was taken. -//! - Control patch file: metadata describing the contents of the memory patch -//! file. +//! incremental, orthogonal, paginated loom snapshots //! -//! ### Taking a snapshot -//! 1. Create a memory patch file containing all dirty pages within the bounds -//! of the home road's heap and stack and an accompanying control patch file -//! that documents where in the loom those dirty pages belong. -//! 2. Apply the contents of the memory patch file to the appropriate image file -//! using the metadata stored in the control patch file. -//! 3. Delete the memory and control patch files. +//! ### components //! -//! ### Restoring a snapshot -//! 1. Check for the memory and control patch files. If they exist, then a crash -//! presumably occurred while taking a snapshot (after the patch files were -//! created but before they could be applied to the image files) and so the -//! patch files should be applied to the image files the same as when taking -//! a snapshot. -//! 2. Mark all pages in the loom as dirty. This effectively ignores pages that -//! are not on the home road and so optimizes page tracking. -//! 3. Apply the image files to memory. Mark as clean and write-protect the -//! pages restored from the image files. +//! - page: 16KB chunk of the loom. +//! - north segment (u3e_image, north.bin): low contiguous loom pages, +//! (in practice, the home road heap). indexed from low to high: +//! in-order on disk. +//! - south segment (u3e_image, south.bin): high contiguous loom pages, +//! (in practice, the home road stack). indexed from high to low: +//! reversed on disk. +//! - patch memory (memory.bin): new or changed pages since the last snapshot +//! - patch control (u3e_control control.bin): patch metadata, watermarks, +//! and indices/mugs for pages in patch memory. +//! +//! ### initialization (u3e_live()) +//! +//! - with the loom already mapped, all pages are marked dirty in a bitmap. +//! - if snapshot is missing or partial, empty segments are created. +//! - if a patch is present, it's applied (crash recovery). +//! - snapshot segments are copied onto the loom; all included pages +//! are marked clean and protected (read-only). +//! +//! #### page faults (u3e_fault()) +//! +//! - stores into protected pages generate faults (currently SIGSEGV, +//! handled outside this module). +//! - faults are handled by dirtying the page and switching protections to +//! read/write. +//! +//! ### updates (u3e_save()) +//! +//! - all updates to a snapshot are made through a patch. +//! - high/low watermarks for the north/south segments are established, +//! and dirty pages below/above them are added to the patch. +//! - modifications have been caught by the fault handler. +//! - newly-used pages are automatically included (preemptively dirtied). +//! - unused, innermost pages are reclaimed (segments are truncated to the +//! high/low watermarks; the last page in each is always adjacent to the +//! contiguous free space). +//! - patch pages are written to memory.bin, metadata to control.bin. +//! - the patch is applied to the snapshot segments, in-place. +//! - patch files are deleted. +//! +//! ### limitations +//! +//! - loom page size is fixed (16 KB), and must be a multiple of the +//! system page size, but that invariant is not enforced. +//! (can the size vary at runtime give south.bin's reversed order? +//! alternately, if system page size > ours, the fault handler could dirty +//! N pages at a time.) +//! - update atomicity is suspect: patch application must either +//! completely succeed or leave on-disk segments intact. unapplied +//! patches can be discarded (triggering event replay), but once +//! patch application begins it must succeed (can fail if disk is full). +//! may require integration into the overall signal-handling regime. +//! - any errors are handled with assertions; failed/partial writes are not +//! retried. +//! +//! ### enhancements +//! +//! - use platform specific page fault mechanism (mach rpc, userfaultfd, &c). +//! - implement demand paging / heuristic page-out. +//! - add a guard page in the middle of the loom to reactively handle stack overflow. +//! - parallelism //! -//! ### Page tracking subtleties -//! To better understand any subtleties of the page tracking system, consider -//! the following sequence: -//! 1. Boot up and restore the snapshot, which is empty (i.e. the image files -//! contain no pages). Because the snapshot is empty, all pages are marked as -//! dirty and are writable, which means that no page faults will be generated. -//! 2. After a while, take a snapshot. All pages within the home road's heap and -//! stack will be gathered into the patch files (because all of the pages in -//! the loom are dirty) and ultimately written to the image files. -//! 3. Exit. -//! 4. Reboot and restore the snapshot, which is no longer empty. Because the -//! snapshot contains the pages that comprised the home road's heap and stack -//! at the time at which the snapshot was taken, only those pages will be -//! marked as clean and write-protected. As a result, all other pages will -//! remain dirty and writable. -//! 5. After a while, take another snapshot. By this point, any writes to the -//! write-protected pages on the home road's heap or stack will have triggered -//! page faults, and those pages will have been marked as dirty. Any writes to -//! non-write-protected pages on the home road's heap or stack (which would -//! exist if the home road heap and/or stack grew) will already be dirty. -//! All of these dirty pages are applied to the image files via the patch -//! files. As this process repeats, the snapshot grows incrementally without -//! the need to write-protect the pages between the home road's heap and -//! stack, thereby reducing the number of page faults generated. #include "all.h" #include From 7fcdf6ef54192bf4a367d2b97acf19a76ca27a87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C5=8Dshin?= Date: Wed, 2 Feb 2022 06:06:05 -0800 Subject: [PATCH 4/4] noun: go ahead and enforce that invariant (#5579) * noun: go ahead and enforce that invariant * mingw: sysconf compat wrapper --- pkg/urbit/compat/mingw/compat.c | 11 +++++++++++ pkg/urbit/compat/mingw/compat.h | 12 +++++++----- pkg/urbit/noun/events.c | 11 +++++++---- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/pkg/urbit/compat/mingw/compat.c b/pkg/urbit/compat/mingw/compat.c index 0a60b56661..a15436d475 100644 --- a/pkg/urbit/compat/mingw/compat.c +++ b/pkg/urbit/compat/mingw/compat.c @@ -353,3 +353,14 @@ char *realpath(const char *path, char *resolved_path) // TODO return strdup(path); } + +long sysconf(int name) +{ + SYSTEM_INFO si; + + if ( _SC_PAGESIZE != name ) { + return -1; + } + GetNativeSystemInfo(&si); + return si.dwPageSize; +} diff --git a/pkg/urbit/compat/mingw/compat.h b/pkg/urbit/compat/mingw/compat.h index 21290d7b55..3e5b3bc9e2 100644 --- a/pkg/urbit/compat/mingw/compat.h +++ b/pkg/urbit/compat/mingw/compat.h @@ -6,13 +6,15 @@ char *realpath(const char *path, char *resolved_path); int fdatasync(int fd); int utimes(const char *path, const struct timeval times[2]); +long sysconf(int name); int kill(pid_t pid, int signum); -#define SIGUSR1 10 -#define SIGALRM 14 -#define SIGVTALRM 26 -#define SIGSTK 31 -#define SIG_COUNT 32 +#define SIGUSR1 10 +#define SIGALRM 14 +#define SIGVTALRM 26 +#define SIGSTK 31 +#define SIG_COUNT 32 +#define _SC_PAGESIZE 29 #endif//_MINGW_IO_H diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index cbeafe56fc..214e228d2d 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -47,10 +47,9 @@ //! ### limitations //! //! - loom page size is fixed (16 KB), and must be a multiple of the -//! system page size, but that invariant is not enforced. -//! (can the size vary at runtime give south.bin's reversed order? -//! alternately, if system page size > ours, the fault handler could dirty -//! N pages at a time.) +//! system page size. (can the size vary at runtime give south.bin's +//! reversed order? alternately, if system page size > ours, the fault +//! handler could dirty N pages at a time.) //! - update atomicity is suspect: patch application must either //! completely succeed or leave on-disk segments intact. unapplied //! patches can be discarded (triggering event replay), but once @@ -946,6 +945,10 @@ u3e_save(void) c3_o u3e_live(c3_o nuu_o, c3_c* dir_c) { + // require that our page size is a multiple of the system page size. + // + c3_assert(0 == (1 << (2 + u3a_page)) % sysconf(_SC_PAGESIZE)); + u3P.dir_c = dir_c; u3P.nor_u.nam_c = "north"; u3P.sou_u.nam_c = "south";