diff --git a/Tests/LibCompress/CMakeLists.txt b/Tests/LibCompress/CMakeLists.txt index 39a96ce6fc5..5ce4dacdc9f 100644 --- a/Tests/LibCompress/CMakeLists.txt +++ b/Tests/LibCompress/CMakeLists.txt @@ -3,6 +3,7 @@ set(TEST_SOURCES TestDeflate.cpp TestGzip.cpp TestLzma.cpp + TestLzw.cpp TestPackBits.cpp TestXz.cpp TestZlib.cpp diff --git a/Tests/LibCompress/TestLzw.cpp b/Tests/LibCompress/TestLzw.cpp new file mode 100644 index 00000000000..20c64097a26 --- /dev/null +++ b/Tests/LibCompress/TestLzw.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024, Lucas Chollet + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#include +#include + +namespace { + +ErrorOr test_roundtrip_string(StringView input) +{ + auto const compressed = TRY(Compress::LzwCompressor::compress_all(input.bytes(), 8)); + auto const roundtrip = TRY(Compress::LzwDecompressor::decompress_all(compressed, 8)); + return roundtrip == input.bytes(); +} + +} + +TEST_CASE(roundtrip_lzw_little_endian_short) +{ + EXPECT(TRY_OR_FAIL(test_roundtrip_string("WeWellll"sv))); +} + +TEST_CASE(roundtrip_lzw_little_endian_long) +{ + // LZW changes the code size after ~512 new symbols, this test case is long enough to trigger that. + constexpr auto input = "WellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,g"sv; + EXPECT(TRY_OR_FAIL(test_roundtrip_string(input))); +} diff --git a/Userland/Libraries/LibCompress/Lzw.h b/Userland/Libraries/LibCompress/Lzw.h index e53b9d998a2..78bc58afed3 100644 --- a/Userland/Libraries/LibCompress/Lzw.h +++ b/Userland/Libraries/LibCompress/Lzw.h @@ -173,4 +173,75 @@ private: Vector m_output {}; }; +class LzwCompressor : private Details::LzwState { +public: + static ErrorOr compress_all(ReadonlyBytes bytes, u8 initial_code_size) + { + LzwCompressor compressor { initial_code_size }; + AllocatingMemoryStream buffer; + LittleEndianOutputBitStream output_stream { MaybeOwned(buffer) }; + + u16 const clear_code = compressor.add_control_code(); + u16 const end_of_data_code = compressor.add_control_code(); + + TRY(output_stream.write_bits(clear_code, compressor.m_code_size)); + + u32 last_offset = 0; + + while (last_offset < bytes.size()) { + ReadonlyBytes current_symbol {}; + u16 current_code {}; + + if (compressor.m_code_table.size() == max_table_size - 2) { + TRY(output_stream.write_bits(clear_code, compressor.m_code_size)); + compressor.reset(); + } + + bool found_symbol = false; + + for (u32 symbol_size = 1; last_offset + symbol_size <= bytes.size(); ++symbol_size) { + current_symbol = bytes.slice(last_offset, symbol_size); + auto const new_code = compressor.code_for_symbol(current_symbol); + + if (new_code.has_value()) { + current_code = *new_code; + } else { + found_symbol = true; + break; + } + } + + TRY(output_stream.write_bits(current_code, compressor.m_code_size)); + + if (found_symbol) { + compressor.extend_code_table(Vector(current_symbol)); + current_symbol = current_symbol.trim(current_symbol.size() - 1); + } + last_offset += current_symbol.size(); + } + + TRY(output_stream.write_bits(end_of_data_code, compressor.m_code_size)); + TRY(output_stream.align_to_byte_boundary()); + TRY(output_stream.flush_buffer_to_stream()); + + return TRY(buffer.read_until_eof()); + } + +private: + LzwCompressor(u8 initial_code_size) + : Details::LzwState(initial_code_size, 1) + { + } + + Optional code_for_symbol(ReadonlyBytes bytes) + { + for (u16 i = 0; i < m_code_table.size(); ++i) { + if (m_code_table[i].span() == bytes) + return i; + } + + return OptionalNone {}; + } +}; + }