2021-05-01 04:33:13 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <AK/NonnullRefPtrVector.h>
|
2021-05-10 20:39:19 +03:00
|
|
|
#include <LibPDF/Command.h>
|
2021-05-01 04:33:13 +03:00
|
|
|
#include <LibPDF/Object.h>
|
|
|
|
#include <LibPDF/Reader.h>
|
|
|
|
#include <LibPDF/XRefTable.h>
|
|
|
|
|
|
|
|
namespace PDF {
|
|
|
|
|
|
|
|
class Document;
|
|
|
|
|
2021-05-24 23:57:16 +03:00
|
|
|
class Parser final : public RefCounted<Parser> {
|
2021-05-01 04:33:13 +03:00
|
|
|
public:
|
2021-05-10 20:39:19 +03:00
|
|
|
static Vector<Command> parse_graphics_commands(const ReadonlyBytes&);
|
|
|
|
|
2021-05-01 04:33:13 +03:00
|
|
|
Parser(Badge<Document>, const ReadonlyBytes&);
|
|
|
|
|
2021-05-10 20:33:32 +03:00
|
|
|
void set_document(RefPtr<Document> document) { m_document = document; }
|
|
|
|
|
2021-05-01 04:33:13 +03:00
|
|
|
bool perform_validation();
|
|
|
|
|
|
|
|
struct XRefTableAndTrailer {
|
|
|
|
XRefTable xref_table;
|
|
|
|
NonnullRefPtr<DictObject> trailer;
|
|
|
|
};
|
2021-05-24 23:57:16 +03:00
|
|
|
Optional<XRefTableAndTrailer> parse_last_xref_table_and_trailer();
|
2021-05-01 04:33:13 +03:00
|
|
|
|
2021-05-24 23:57:16 +03:00
|
|
|
RefPtr<IndirectValue> parse_indirect_value_at_offset(size_t offset);
|
2021-05-09 00:57:49 +03:00
|
|
|
|
2021-05-24 23:57:16 +03:00
|
|
|
RefPtr<DictObject> conditionally_parse_page_tree_node_at_offset(size_t offset, bool& ok);
|
2021-05-03 04:53:07 +03:00
|
|
|
|
2021-05-01 04:33:13 +03:00
|
|
|
private:
|
2021-05-10 20:39:19 +03:00
|
|
|
explicit Parser(const ReadonlyBytes&);
|
|
|
|
|
2021-05-01 04:33:13 +03:00
|
|
|
bool parse_header();
|
2021-05-24 23:57:16 +03:00
|
|
|
Optional<XRefTable> parse_xref_table();
|
|
|
|
RefPtr<DictObject> parse_file_trailer();
|
2021-05-01 04:33:13 +03:00
|
|
|
|
|
|
|
bool navigate_to_before_eof_marker();
|
|
|
|
bool navigate_to_after_startxref();
|
|
|
|
|
|
|
|
// If the PDF is linearized, the first object will be the linearization
|
|
|
|
// parameter dictionary, and it will always occur within the first 1024 bytes.
|
|
|
|
// We do a very sloppy and context-free search for this object. A return value
|
|
|
|
// of true does not necessarily mean this PDF is linearized, but a return value
|
|
|
|
// of false does mean this PDF is not linearized.
|
|
|
|
// FIXME: false doesn't guarantee non-linearization, but we VERIFY the result!
|
|
|
|
bool sloppy_is_linearized();
|
|
|
|
|
|
|
|
String parse_comment();
|
|
|
|
|
|
|
|
Value parse_value();
|
|
|
|
Value parse_possible_indirect_value_or_ref();
|
2021-05-24 23:57:16 +03:00
|
|
|
RefPtr<IndirectValue> parse_indirect_value(int index, int generation);
|
|
|
|
RefPtr<IndirectValue> parse_indirect_value();
|
2021-05-01 04:33:13 +03:00
|
|
|
Value parse_number();
|
2021-05-24 23:57:16 +03:00
|
|
|
RefPtr<NameObject> parse_name();
|
|
|
|
RefPtr<StringObject> parse_string();
|
2021-05-01 04:33:13 +03:00
|
|
|
String parse_literal_string();
|
|
|
|
String parse_hex_string();
|
2021-05-24 23:57:16 +03:00
|
|
|
RefPtr<ArrayObject> parse_array();
|
|
|
|
RefPtr<DictObject> parse_dict();
|
|
|
|
RefPtr<StreamObject> parse_stream(NonnullRefPtr<DictObject> dict);
|
2021-05-01 04:33:13 +03:00
|
|
|
|
2021-05-10 20:39:19 +03:00
|
|
|
Vector<Command> parse_graphics_commands();
|
|
|
|
|
2021-05-01 04:33:13 +03:00
|
|
|
bool matches_eol() const;
|
|
|
|
bool matches_whitespace() const;
|
|
|
|
bool matches_number() const;
|
2021-05-09 00:57:49 +03:00
|
|
|
bool matches_delimiter() const;
|
|
|
|
bool matches_regular_character() const;
|
2021-05-01 04:33:13 +03:00
|
|
|
|
2021-05-24 23:57:16 +03:00
|
|
|
bool consume_eol();
|
2021-05-01 04:33:13 +03:00
|
|
|
bool consume_whitespace();
|
|
|
|
char consume();
|
2021-05-24 23:57:16 +03:00
|
|
|
void consume(int amount);
|
|
|
|
bool consume(char);
|
2021-05-01 04:33:13 +03:00
|
|
|
|
|
|
|
Reader m_reader;
|
2021-05-10 20:33:32 +03:00
|
|
|
RefPtr<Document> m_document;
|
2021-05-01 04:33:13 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|