LibCompress: Implement proper handling of LZMA end-of-stream markers

This commit is contained in:
Tim Schumacher 2023-03-30 00:39:36 +02:00 committed by Andreas Kling
parent b6f3b2f116
commit 8ff36e5910
Notes: sideshowbarker 2024-07-17 05:02:42 +09:00
4 changed files with 34 additions and 12 deletions

View File

@ -166,7 +166,6 @@ TEST_CASE(specification_bad_incorrect_size_lzma_decompress)
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
// FIXME: This should detect that there is still remaining (non-'end of stream') data after the expected data and reject the last read.
// As of now, this test accepts any result, except for crashes.
(void)decompressor->read_until_eof(PAGE_SIZE);
auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE);
EXPECT(buffer_or_error.is_error());
}

View File

@ -81,6 +81,7 @@ ErrorOr<LzmaDecompressorOptions> LzmaHeader::as_decompressor_options() const
.position_bits = model_properties.position_bits,
.dictionary_size = dictionary_size(),
.uncompressed_size = uncompressed_size(),
.reject_end_of_stream_marker = false,
};
}
@ -457,17 +458,15 @@ ErrorOr<u32> LzmaDecompressor::decode_normalized_match_distance(u16 normalized_m
ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
{
while (m_dictionary->used_space() < bytes.size() && m_dictionary->empty_space() != 0) {
if (m_found_end_of_stream_marker) {
if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes < m_options.uncompressed_size.value())
return Error::from_string_literal("Found end-of-stream marker earlier than expected");
if (m_found_end_of_stream_marker)
break;
}
if (has_reached_expected_data_size()) {
// FIXME: This should validate that either EOF or the 'end of stream' marker follow immediately.
// Both of those cases count as the 'end of stream' marker being found and should check for a clean decoder state.
break;
// If the decoder is in a clean state, we assume that this is fine.
if (is_range_decoder_in_clean_state())
break;
// Otherwise, we give it one last try to find the end marker in the remaining data.
}
// "The decoder calculates "state2" variable value to select exact variable from
@ -543,6 +542,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
// IsMatch[state2] decode
// 0 - the Literal"
if (TRY(decode_bit_with_probability(m_is_match_probabilities[state2])) == 0) {
// If we are already past the expected uncompressed size, we are already in "look for EOS only" mode.
if (has_reached_expected_data_size())
return Error::from_string_literal("Found literal after reaching expected uncompressed size");
// "At first the LZMA decoder must check that it doesn't exceed
// specified uncompressed size."
// This is already checked for at the beginning of the loop.
@ -577,11 +580,20 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
// "End of stream" marker, so we can stop decoding and check finishing
// condition in Range Decoder"
if (m_rep0 == 0xFFFFFFFF) {
// If we should reject end-of-stream markers, do so now.
// Note that this is not part of LZMA, as LZMA allows end-of-stream markers in all contexts, so pure LZMA should never set this option.
if (m_options.reject_end_of_stream_marker)
return Error::from_string_literal("An end-of-stream marker was found, but the LZMA stream is configured to reject them");
// The range decoder condition is checked after breaking out of the loop.
m_found_end_of_stream_marker = true;
continue;
}
// If we are looking for EOS, but haven't found it here, the stream is corrupted.
if (has_reached_expected_data_size())
return Error::from_string_literal("First simple match after the expected uncompressed size is not the EOS marker");
// "If uncompressed size is defined, LZMA decoder must check that it doesn't
// exceed that specified uncompressed size."
// This is being checked for in the common "copy to buffer" implementation.
@ -598,6 +610,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
continue;
}
// If we are looking for EOS, but find another match type, the stream is also corrupted.
if (has_reached_expected_data_size())
return Error::from_string_literal("First match type after the expected uncompressed size is not a simple match");
// " 1 - Rep Match
// IsRepG0[state] decode
// 0 - the distance is rep0"
@ -666,7 +682,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
TRY(copy_match_to_buffer(normalized_length + normalized_to_real_match_length_offset));
}
if (m_found_end_of_stream_marker) {
if (m_found_end_of_stream_marker || has_reached_expected_data_size()) {
if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes < m_options.uncompressed_size.value())
return Error::from_string_literal("Found end-of-stream marker earlier than expected");
if (!is_range_decoder_in_clean_state())
return Error::from_string_literal("LZMA stream ends in an unclean state");
}

View File

@ -29,6 +29,7 @@ struct LzmaDecompressorOptions {
u8 position_bits { 0 };
u32 dictionary_size { 0 };
Optional<u64> uncompressed_size;
bool reject_end_of_stream_marker { false };
};
// Described in section "lzma file format".

View File

@ -106,6 +106,9 @@ ErrorOr<Bytes> Lzma2Decompressor::read_some(Bytes bytes)
.position_bits = properties.position_bits,
.dictionary_size = static_cast<u32>(dictionary_size),
.uncompressed_size = uncompressed_size,
// Note: This is not specified anywhere. However, it is apparently tested by bad-1-lzma2-7.xz from the XZ utils test files.
.reject_end_of_stream_marker = true,
};
[[fallthrough]];
}