mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-29 06:02:07 +03:00
LibPDF: Improve hex string parsing
A local (non-public) PDF I have lying around contains this in a page's operator stream: ``` [<00b4003e> 3 <002600480051> 3 <005700550044004f0003> -29 <00330044> 3 <0055> -3 <004e0040> 4 <0003> -29 <004c00560003> -31 <0057004b> 4 <00480003> -37 <0050 >] TJ ``` That is, there's a newline in a hexstring after a character. This led to `Parser error at offset 5184: Unexpected character`. The spec says in 3.2.3 String Objects, Hexadecimal Strings: """Each pair of hexadecimal digits defines one byte of the string. White-space characters (such as space, tab, carriage return, line feed, and form feed) are ignored.""" But we didn't ignore whitespace before or after a character, only in between the bytes. The spec also says: """If the final digit of a hexadecimal string is missing—that is, if there is an odd number of digits—the final digit is assumed to be 0.""" In that case, we were skipping the closing `>` twice -- or, more accurately, we ignored the character after it too. This has been wrong all the way back in #6974. Add a test that fails if either of the two changes isn't present.
This commit is contained in:
parent
d11c7a19da
commit
9495f64f91
Notes:
sideshowbarker
2024-07-17 02:39:10 +09:00
Author: https://github.com/nico Commit: https://github.com/SerenityOS/serenity/commit/9495f64f91 Pull-request: https://github.com/SerenityOS/serenity/pull/22535
@ -15,6 +15,27 @@
|
|||||||
#include <LibTest/Macros.h>
|
#include <LibTest/Macros.h>
|
||||||
#include <LibTest/TestCase.h>
|
#include <LibTest/TestCase.h>
|
||||||
|
|
||||||
|
TEST_CASE(parse_value)
|
||||||
|
{
|
||||||
|
// document isn't really used for anything, only to check there's no security_handler.
|
||||||
|
auto file = MUST(Core::MappedFile::map("linearized.pdf"sv));
|
||||||
|
auto document = MUST(PDF::Document::create(file->bytes()));
|
||||||
|
|
||||||
|
auto contents = "<50607><10\n>"sv;
|
||||||
|
PDF::Parser parser(contents.bytes());
|
||||||
|
parser.set_document(document->make_weak_ptr());
|
||||||
|
|
||||||
|
auto value1 = MUST(parser.parse_value(PDF::Parser::CanBeIndirectValue::No));
|
||||||
|
auto string1 = value1.get<NonnullRefPtr<PDF::Object>>()->cast<PDF::StringObject>();
|
||||||
|
EXPECT(string1->is_binary());
|
||||||
|
EXPECT_EQ(string1->string(), "\x50\x60\x70"sv);
|
||||||
|
|
||||||
|
auto value2 = MUST(parser.parse_value(PDF::Parser::CanBeIndirectValue::No));
|
||||||
|
auto string2 = value2.get<NonnullRefPtr<PDF::Object>>()->cast<PDF::StringObject>();
|
||||||
|
EXPECT(string2->is_binary());
|
||||||
|
EXPECT_EQ(string2->string(), "\x10"sv);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(linearized_pdf)
|
TEST_CASE(linearized_pdf)
|
||||||
{
|
{
|
||||||
auto file = MUST(Core::MappedFile::map("linearized.pdf"sv));
|
auto file = MUST(Core::MappedFile::map("linearized.pdf"sv));
|
||||||
|
@ -352,6 +352,7 @@ PDFErrorOr<ByteString> Parser::parse_hex_string()
|
|||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
m_reader.consume_whitespace();
|
||||||
if (m_reader.matches('>')) {
|
if (m_reader.matches('>')) {
|
||||||
m_reader.consume();
|
m_reader.consume();
|
||||||
return builder.to_byte_string();
|
return builder.to_byte_string();
|
||||||
@ -364,7 +365,6 @@ PDFErrorOr<ByteString> Parser::parse_hex_string()
|
|||||||
if (ch == '>') {
|
if (ch == '>') {
|
||||||
// The hex string contains an odd number of characters, and the last character
|
// The hex string contains an odd number of characters, and the last character
|
||||||
// is assumed to be '0'
|
// is assumed to be '0'
|
||||||
m_reader.consume();
|
|
||||||
hex_value *= 16;
|
hex_value *= 16;
|
||||||
builder.append(static_cast<char>(hex_value));
|
builder.append(static_cast<char>(hex_value));
|
||||||
return builder.to_byte_string();
|
return builder.to_byte_string();
|
||||||
|
Loading…
Reference in New Issue
Block a user