mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-09-20 09:49:15 +03:00
LibPDF: Add support for stream filters
This commit also splits up StreamObject into PlainTextStreamObject and EncodedStreamObject, which is essentially just a stream object which does not own its bytes vs one which does.
This commit is contained in:
parent
97cc482087
commit
477e3946e5
Notes:
sideshowbarker
2024-07-18 17:26:46 +09:00
Author: https://github.com/mattco98 Commit: https://github.com/SerenityOS/serenity/commit/477e3946e50 Pull-request: https://github.com/SerenityOS/serenity/pull/7436 Reviewed-by: https://github.com/alimpfard
@ -1,6 +1,6 @@
|
||||
set(SOURCES
|
||||
Object.cpp
|
||||
Document.cpp
|
||||
Filter.cpp
|
||||
Object.cpp
|
||||
Parser.cpp
|
||||
Renderer.cpp
|
||||
|
173
Userland/Libraries/LibPDF/Filter.cpp
Normal file
173
Userland/Libraries/LibPDF/Filter.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Hex.h>
|
||||
#include <LibCompress/Deflate.h>
|
||||
#include <LibPDF/Filter.h>
|
||||
|
||||
namespace PDF {
|
||||
|
||||
Optional<ByteBuffer> Filter::decode(const ReadonlyBytes& bytes, const FlyString& encoding_type)
|
||||
{
|
||||
if (encoding_type == "ASCIIHexDecode")
|
||||
return decode_ascii_hex(bytes);
|
||||
if (encoding_type == "ASCII85Decode")
|
||||
return decode_ascii85(bytes);
|
||||
if (encoding_type == "LZWDecode")
|
||||
return decode_lzw(bytes);
|
||||
if (encoding_type == "FlateDecode")
|
||||
return decode_flate(bytes);
|
||||
if (encoding_type == "RunLengthDecode")
|
||||
return decode_run_length(bytes);
|
||||
if (encoding_type == "CCITTFaxDecode")
|
||||
return decode_ccitt(bytes);
|
||||
if (encoding_type == "JBIG2Decode")
|
||||
return decode_jbig2(bytes);
|
||||
if (encoding_type == "DCTDecode")
|
||||
return decode_dct(bytes);
|
||||
if (encoding_type == "JPXDecode")
|
||||
return decode_jpx(bytes);
|
||||
if (encoding_type == "Crypt")
|
||||
return decode_crypt(bytes);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_ascii_hex(const ReadonlyBytes& bytes)
|
||||
{
|
||||
if (bytes.size() % 2 == 0)
|
||||
return decode_hex(bytes);
|
||||
|
||||
// FIXME: Integrate this padding into AK/Hex?
|
||||
|
||||
auto output = ByteBuffer::create_zeroed(bytes.size() / 2 + 1);
|
||||
|
||||
for (size_t i = 0; i < bytes.size() / 2; ++i) {
|
||||
const auto c1 = decode_hex_digit(static_cast<char>(bytes[i * 2]));
|
||||
if (c1 >= 16)
|
||||
return {};
|
||||
|
||||
const auto c2 = decode_hex_digit(static_cast<char>(bytes[i * 2 + 1]));
|
||||
if (c2 >= 16)
|
||||
return {};
|
||||
|
||||
output[i] = (c1 << 4) + c2;
|
||||
}
|
||||
|
||||
// Process last byte with a padded zero
|
||||
output[output.size() - 1] = decode_hex_digit(static_cast<char>(bytes[bytes.size() - 1])) * 16;
|
||||
|
||||
return output;
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_ascii85(const ReadonlyBytes& bytes)
|
||||
{
|
||||
Vector<u8> buff;
|
||||
buff.ensure_capacity(bytes.size());
|
||||
|
||||
size_t byte_index = 0;
|
||||
|
||||
while (byte_index < bytes.size()) {
|
||||
if (bytes[byte_index] == ' ') {
|
||||
byte_index++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bytes[byte_index] == 'z') {
|
||||
byte_index++;
|
||||
for (int i = 0; i < 4; i++)
|
||||
buff.append(0);
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 number = 0;
|
||||
|
||||
if (byte_index + 5 >= bytes.size()) {
|
||||
auto to_write = bytes.size() - byte_index;
|
||||
for (int i = 0; i < 5; i++) {
|
||||
auto byte = byte_index >= bytes.size() ? 'u' : bytes[byte_index++];
|
||||
if (byte == ' ') {
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
number = number * 85 + byte - 33;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < to_write - 1; i++)
|
||||
buff.append(reinterpret_cast<u8*>(&number)[3 - i]);
|
||||
|
||||
break;
|
||||
} else {
|
||||
for (int i = 0; i < 5; i++) {
|
||||
auto byte = bytes[byte_index++];
|
||||
if (byte == ' ') {
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
number = number * 85 + byte - 33;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
buff.append(reinterpret_cast<u8*>(&number)[3 - i]);
|
||||
}
|
||||
|
||||
return ByteBuffer::copy(buff.span());
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_lzw(const ReadonlyBytes&)
|
||||
{
|
||||
dbgln("LZW decoding is not supported");
|
||||
VERIFY_NOT_REACHED();
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_flate(const ReadonlyBytes& bytes)
|
||||
{
|
||||
// FIXME: The spec says Flate decoding is "based on" zlib, does that mean they
|
||||
// aren't exactly the same?
|
||||
|
||||
auto buff = Compress::DeflateDecompressor::decompress_all(bytes.slice(2));
|
||||
VERIFY(buff.has_value());
|
||||
return buff.value();
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_run_length(const ReadonlyBytes&)
|
||||
{
|
||||
// FIXME: Support RunLength decoding
|
||||
TODO();
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_ccitt(const ReadonlyBytes&)
|
||||
{
|
||||
// FIXME: Support CCITT decoding
|
||||
TODO();
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_jbig2(const ReadonlyBytes&)
|
||||
{
|
||||
// FIXME: Support JBIG2 decoding
|
||||
TODO();
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_dct(const ReadonlyBytes&)
|
||||
{
|
||||
// FIXME: Support dct decoding
|
||||
TODO();
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_jpx(const ReadonlyBytes&)
|
||||
{
|
||||
// FIXME: Support JPX decoding
|
||||
TODO();
|
||||
};
|
||||
|
||||
Optional<ByteBuffer> Filter::decode_crypt(const ReadonlyBytes&)
|
||||
{
|
||||
// FIXME: Support Crypt decoding
|
||||
TODO();
|
||||
};
|
||||
|
||||
}
|
31
Userland/Libraries/LibPDF/Filter.h
Normal file
31
Userland/Libraries/LibPDF/Filter.h
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/ByteBuffer.h>
|
||||
#include <AK/FlyString.h>
|
||||
|
||||
namespace PDF {
|
||||
|
||||
class Filter {
|
||||
public:
|
||||
static Optional<ByteBuffer> decode(const ReadonlyBytes& bytes, const FlyString& encoding_type);
|
||||
|
||||
private:
|
||||
static Optional<ByteBuffer> decode_ascii_hex(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_ascii85(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_lzw(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_flate(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_run_length(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_ccitt(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_jbig2(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_dct(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_jpx(const ReadonlyBytes& bytes);
|
||||
static Optional<ByteBuffer> decode_crypt(const ReadonlyBytes& bytes);
|
||||
};
|
||||
|
||||
}
|
@ -11,6 +11,9 @@ namespace PDF {
|
||||
class Document;
|
||||
class Object;
|
||||
|
||||
// Note: This macro doesn't care about PlainTextStreamObject and EncodedStreamObject because
|
||||
// we never need to work directly with either of them.
|
||||
|
||||
#define ENUMERATE_DIRECT_OBJECT_TYPES(V) \
|
||||
V(StringObject, string) \
|
||||
V(NameObject, name) \
|
||||
|
@ -147,18 +147,17 @@ private:
|
||||
HashMap<FlyString, Value> m_map;
|
||||
};
|
||||
|
||||
class StreamObject final : public Object {
|
||||
class StreamObject : public Object {
|
||||
public:
|
||||
StreamObject(const NonnullRefPtr<DictObject>& dict, const ReadonlyBytes& bytes)
|
||||
explicit StreamObject(const NonnullRefPtr<DictObject>& dict)
|
||||
: m_dict(dict)
|
||||
, m_bytes(bytes)
|
||||
{
|
||||
}
|
||||
|
||||
~StreamObject() override = default;
|
||||
virtual ~StreamObject() override = default;
|
||||
|
||||
[[nodiscard]] ALWAYS_INLINE NonnullRefPtr<DictObject> dict() const { return m_dict; }
|
||||
[[nodiscard]] ALWAYS_INLINE const ReadonlyBytes& bytes() const { return m_bytes; }
|
||||
[[nodiscard]] virtual ReadonlyBytes bytes() const = 0;
|
||||
|
||||
ALWAYS_INLINE bool is_stream() const override { return true; }
|
||||
ALWAYS_INLINE const char* type_name() const override { return "stream"; }
|
||||
@ -166,9 +165,40 @@ public:
|
||||
|
||||
private:
|
||||
NonnullRefPtr<DictObject> m_dict;
|
||||
};
|
||||
|
||||
class PlainTextStreamObject final : public StreamObject {
|
||||
public:
|
||||
PlainTextStreamObject(const NonnullRefPtr<DictObject>& dict, const ReadonlyBytes& bytes)
|
||||
: StreamObject(dict)
|
||||
, m_bytes(bytes)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~PlainTextStreamObject() override = default;
|
||||
|
||||
[[nodiscard]] ALWAYS_INLINE virtual ReadonlyBytes bytes() const override { return m_bytes; }
|
||||
|
||||
private:
|
||||
ReadonlyBytes m_bytes;
|
||||
};
|
||||
|
||||
class EncodedStreamObject final : public StreamObject {
|
||||
public:
|
||||
EncodedStreamObject(const NonnullRefPtr<DictObject>& dict, ByteBuffer&& buffer)
|
||||
: StreamObject(dict)
|
||||
, m_buffer(buffer)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~EncodedStreamObject() override = default;
|
||||
|
||||
[[nodiscard]] ALWAYS_INLINE virtual ReadonlyBytes bytes() const override { return m_buffer.bytes(); }
|
||||
|
||||
private:
|
||||
ByteBuffer m_buffer;
|
||||
};
|
||||
|
||||
class IndirectValue final : public Object {
|
||||
public:
|
||||
IndirectValue(u32 index, u32 generation_index, const Value& value)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <AK/ScopeGuard.h>
|
||||
#include <AK/TypeCasts.h>
|
||||
#include <LibPDF/Document.h>
|
||||
#include <LibPDF/Filter.h>
|
||||
#include <LibPDF/Parser.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
@ -657,7 +658,15 @@ NonnullRefPtr<StreamObject> Parser::parse_stream(NonnullRefPtr<DictObject> dict)
|
||||
m_reader.move_by(9);
|
||||
consume_whitespace();
|
||||
|
||||
return make_object<StreamObject>(dict, bytes);
|
||||
if (dict->contains("Filter")) {
|
||||
auto filter_type = dict->get_name(m_document, "Filter")->name();
|
||||
auto maybe_bytes = Filter::decode(bytes, filter_type);
|
||||
// FIXME: Handle error condition
|
||||
VERIFY(maybe_bytes.has_value());
|
||||
return make_object<EncodedStreamObject>(dict, move(maybe_bytes.value()));
|
||||
}
|
||||
|
||||
return make_object<PlainTextStreamObject>(dict, bytes);
|
||||
}
|
||||
|
||||
Vector<Command> Parser::parse_graphics_commands()
|
||||
|
Loading…
Reference in New Issue
Block a user