From c5ebc4bb400251d09f29a2835ff2838624f41437 Mon Sep 17 00:00:00 2001 From: Jelle Raaijmakers Date: Wed, 24 May 2023 14:08:31 +0200 Subject: [PATCH] LibSQL: Reuse heap blocks when overwriting storage Previously, only the first block in a chain of blocks would be overwritten while all subsequent blocks would be appended to the heap. Now we make sure to reuse all existing blocks in the chain. --- Tests/LibSQL/TestSqlHeap.cpp | 43 ++++++++++++++++++++++++++++++ Userland/Libraries/LibSQL/Heap.cpp | 26 +++++++++++++++--- Userland/Libraries/LibSQL/Heap.h | 2 ++ 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/Tests/LibSQL/TestSqlHeap.cpp b/Tests/LibSQL/TestSqlHeap.cpp index f2c3268d2e9..9a1eaccc4c1 100644 --- a/Tests/LibSQL/TestSqlHeap.cpp +++ b/Tests/LibSQL/TestSqlHeap.cpp @@ -53,3 +53,46 @@ TEST_CASE(heap_write_large_storage_with_flush) auto stored_long_string = TRY_OR_FAIL(heap->read_storage(storage_block_id)); EXPECT_EQ(long_string.bytes(), stored_long_string.bytes()); } + +TEST_CASE(heap_overwrite_large_storage) +{ + ScopeGuard guard([]() { MUST(Core::System::unlink(db_path)); }); + auto heap = create_heap(); + auto storage_block_id = heap->request_new_block_index(); + + // Write large storage spanning multiple blocks + StringBuilder builder; + MUST(builder.try_append_repeated('x', SQL::Block::DATA_SIZE * 4)); + auto long_string = builder.string_view(); + TRY_OR_FAIL(heap->write_storage(storage_block_id, long_string.bytes())); + MUST(heap->flush()); + auto heap_size = MUST(heap->file_size_in_bytes()); + + // Let's write it again and check whether the Heap reused the same extended blocks + TRY_OR_FAIL(heap->write_storage(storage_block_id, long_string.bytes())); + MUST(heap->flush()); + auto new_heap_size = MUST(heap->file_size_in_bytes()); + EXPECT_EQ(heap_size, new_heap_size); + + // Write a smaller string and read back - heap size should be at most the previous size + builder.clear(); + MUST(builder.try_append_repeated('y', SQL::Block::DATA_SIZE * 2)); + auto shorter_string = builder.string_view(); + TRY_OR_FAIL(heap->write_storage(storage_block_id, shorter_string.bytes())); + MUST(heap->flush()); + new_heap_size = MUST(heap->file_size_in_bytes()); + EXPECT(new_heap_size <= heap_size); + auto stored_shorter_string = TRY_OR_FAIL(heap->read_storage(storage_block_id)); + EXPECT_EQ(shorter_string.bytes(), stored_shorter_string.bytes()); + + // Write a longer string and read back - heap size is expected to grow + builder.clear(); + MUST(builder.try_append_repeated('z', SQL::Block::DATA_SIZE * 6)); + auto longest_string = builder.string_view(); + TRY_OR_FAIL(heap->write_storage(storage_block_id, longest_string.bytes())); + MUST(heap->flush()); + new_heap_size = MUST(heap->file_size_in_bytes()); + EXPECT(new_heap_size > heap_size); + auto stored_longest_string = TRY_OR_FAIL(heap->read_storage(storage_block_id)); + EXPECT_EQ(longest_string.bytes(), stored_longest_string.bytes()); +} diff --git a/Userland/Libraries/LibSQL/Heap.cpp b/Userland/Libraries/LibSQL/Heap.cpp index 8425e1d43c9..ac7599e1762 100644 --- a/Userland/Libraries/LibSQL/Heap.cpp +++ b/Userland/Libraries/LibSQL/Heap.cpp @@ -72,6 +72,12 @@ ErrorOr Heap::open() return {}; } +ErrorOr Heap::file_size_in_bytes() const +{ + TRY(m_file->seek(0, SeekMode::FromEndPosition)); + return TRY(m_file->tell()); +} + bool Heap::has_block(Block::Index index) const { return index <= m_highest_block_written || m_write_ahead_log.contains(index); @@ -95,6 +101,7 @@ ErrorOr Heap::read_storage(Block::Index index) ErrorOr Heap::write_storage(Block::Index index, ReadonlyBytes data) { dbgln_if(SQL_DEBUG, "{}({}, {} bytes)", __FUNCTION__, index, data.size()); + VERIFY(index > 0); VERIFY(data.size() > 0); // Split up the storage across multiple blocks if necessary, creating a chain @@ -103,11 +110,24 @@ ErrorOr Heap::write_storage(Block::Index index, ReadonlyBytes data) while (remaining_size > 0) { auto block_data_size = AK::min(remaining_size, Block::DATA_SIZE); remaining_size -= block_data_size; - auto next_block_index = (remaining_size > 0) ? request_new_block_index() : 0; - auto block_data = TRY(ByteBuffer::create_uninitialized(block_data_size)); + ByteBuffer block_data; + Block::Index next_block_index = 0; + if (has_block(index)) { + auto existing_block = TRY(read_block(index)); + block_data = existing_block.data(); + TRY(block_data.try_resize(block_data_size)); + next_block_index = existing_block.next_block(); + } else { + block_data = TRY(ByteBuffer::create_uninitialized(block_data_size)); + } + + if (next_block_index == 0 && remaining_size > 0) + next_block_index = request_new_block_index(); + else if (remaining_size == 0) + next_block_index = 0; + block_data.bytes().overwrite(0, data.offset(offset_in_data), block_data_size); - TRY(write_block({ index, block_data_size, next_block_index, move(block_data) })); index = next_block_index; diff --git a/Userland/Libraries/LibSQL/Heap.h b/Userland/Libraries/LibSQL/Heap.h index daf2363cb41..2a6623b9afb 100644 --- a/Userland/Libraries/LibSQL/Heap.h +++ b/Userland/Libraries/LibSQL/Heap.h @@ -71,6 +71,8 @@ public: virtual ~Heap() override; ErrorOr open(); + ErrorOr file_size_in_bytes() const; + bool has_block(Block::Index) const; [[nodiscard]] Block::Index request_new_block_index() { return m_next_block++; }