mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-01 07:35:02 +03:00
LibPDF: Implement most of the spec algorithm for picking TrueType glyphs
Non-CID-keyed fonts in PDFs have 8-bit codepoints which are mapped from bytes to character names via encoding. TrueType fonts don't index glyphs by name (Type1 fonts do), so the fix (codified in the spec) was to make a list of all possible glyph names and map those to (16-bit) unicode values, and then pass those into the truetype cmap. (As a fallback, we're supposed to look at the optional names in the font's "post" table. That part isn't implemented here yet.) (Note that this affects the behavior of fallback fonts for TrueType fonts, but not yet fallback fonts for Type1 fonts, and neither the behavior of the 14 built-in Type1 fonts (which we implement as fallback fonts), since the TrueType fallback in Type1Font.cpp does not use this algorithm yet. This will be fixed in a future patch.)
This commit is contained in:
parent
207717982c
commit
83128d093e
Notes:
sideshowbarker
2024-07-17 00:37:23 +09:00
Author: https://github.com/nico Commit: https://github.com/SerenityOS/serenity/commit/83128d093e Pull-request: https://github.com/SerenityOS/serenity/pull/23325
@ -6,6 +6,7 @@ set(SOURCES
|
||||
Encoding.cpp
|
||||
Encryption.cpp
|
||||
Filter.cpp
|
||||
Fonts/AdobeGlyphList.cpp
|
||||
Fonts/CFF.cpp
|
||||
Fonts/PDFFont.cpp
|
||||
Fonts/PS1FontProgram.cpp
|
||||
|
4247
Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.cpp
Normal file
4247
Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.cpp
Normal file
File diff suppressed because it is too large
Load Diff
15
Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.h
Normal file
15
Userland/Libraries/LibPDF/Fonts/AdobeGlyphList.h
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Nico Weber <thakis@chromium.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace PDF {
|
||||
|
||||
Optional<u32> glyph_name_to_unicode(StringView);
|
||||
|
||||
}
|
@ -9,6 +9,7 @@
|
||||
#include <LibGfx/Font/ScaledFont.h>
|
||||
#include <LibGfx/Painter.h>
|
||||
#include <LibPDF/CommonNames.h>
|
||||
#include <LibPDF/Fonts/AdobeGlyphList.h>
|
||||
#include <LibPDF/Fonts/TrueTypeFont.h>
|
||||
#include <LibPDF/Renderer.h>
|
||||
|
||||
@ -18,6 +19,15 @@ PDFErrorOr<void> TrueTypeFont::initialize(Document* document, NonnullRefPtr<Dict
|
||||
{
|
||||
TRY(SimpleFont::initialize(document, dict, font_size));
|
||||
|
||||
if (dict->contains(CommonNames::Encoding)) {
|
||||
auto encoding_object = MUST(dict->get_object(document, CommonNames::Encoding));
|
||||
if (encoding_object->is<NameObject>()) {
|
||||
auto name = encoding_object->cast<NameObject>()->name();
|
||||
if (name == "MacRomanEncoding" || name == "WinAnsiEncoding")
|
||||
m_encoding_is_mac_roman_or_win_ansi = true;
|
||||
}
|
||||
}
|
||||
|
||||
m_base_font_name = TRY(dict->get_name(document, CommonNames::BaseFont))->name();
|
||||
|
||||
// If there's an embedded font program we use that; otherwise we try to find a replacement font
|
||||
@ -33,8 +43,26 @@ PDFErrorOr<void> TrueTypeFont::initialize(Document* document, NonnullRefPtr<Dict
|
||||
if (!m_font) {
|
||||
m_font = TRY(replacement_for(base_font_name().to_lowercase(), font_size));
|
||||
}
|
||||
|
||||
VERIFY(m_font);
|
||||
|
||||
// See long spec comment in TrueTypeFont::draw_glyp().
|
||||
if (!dict->contains(CommonNames::Encoding) || is_symbolic()) {
|
||||
// FIXME: Looks like this is never hit in the test set (?).
|
||||
for (u8 prefix : { 0x00, 0xF0, 0xF1, 0xF2 }) {
|
||||
bool has_all = true;
|
||||
for (unsigned suffix = 0x00; suffix <= 0xFF; ++suffix) {
|
||||
if (!m_font->contains_glyph((prefix << 8) | suffix)) {
|
||||
has_all = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (has_all) {
|
||||
m_high_byte = prefix;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -48,6 +76,18 @@ void TrueTypeFont::set_font_size(float font_size)
|
||||
m_font = m_font->with_size((font_size * POINTS_PER_INCH) / DEFAULT_DPI);
|
||||
}
|
||||
|
||||
static void do_draw_glyph(Gfx::Painter& painter, Gfx::FloatPoint position, float width, u32 unicode, Gfx::Font const& font, ColorOrStyle const& style)
|
||||
{
|
||||
if (style.has<Color>()) {
|
||||
painter.draw_glyph(position, unicode, font, style.get<Color>());
|
||||
} else {
|
||||
// FIXME: Bounding box and sample point look to be pretty wrong
|
||||
style.get<NonnullRefPtr<Gfx::PaintStyle>>()->paint(Gfx::IntRect(position.x(), position.y(), width, 0), [&](auto sample) {
|
||||
painter.draw_glyph(position, unicode, font, sample(Gfx::IntPoint(position.x(), position.y())));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
PDFErrorOr<void> TrueTypeFont::draw_glyph(Gfx::Painter& painter, Gfx::FloatPoint point, float width, u8 char_code, Renderer const& renderer)
|
||||
{
|
||||
auto style = renderer.state().paint_style;
|
||||
@ -55,14 +95,71 @@ PDFErrorOr<void> TrueTypeFont::draw_glyph(Gfx::Painter& painter, Gfx::FloatPoint
|
||||
// Undo shift in Glyf::Glyph::append_simple_path() via OpenType::Font::rasterize_glyph().
|
||||
auto position = point.translated(0, -m_font->pixel_metrics().ascent);
|
||||
|
||||
if (style.has<Color>()) {
|
||||
painter.draw_glyph(position, char_code, *m_font, style.get<Color>());
|
||||
} else {
|
||||
// FIXME: Bounding box and sample point look to be pretty wrong
|
||||
style.get<NonnullRefPtr<Gfx::PaintStyle>>()->paint(Gfx::IntRect(position.x(), position.y(), width, 0), [&](auto sample) {
|
||||
painter.draw_glyph(position, char_code, *m_font, sample(Gfx::IntPoint(position.x(), position.y())));
|
||||
});
|
||||
// 5.5.5 Character Encoding, Encodings for TrueType Fonts
|
||||
u32 unicode;
|
||||
|
||||
// "If the font has a named Encoding entry of either MacRomanEncoding or WinAnsiEncoding,
|
||||
// or if the font descriptor’s Nonsymbolic flag (see Table 5.20) is set, the viewer creates
|
||||
// a table that maps from character codes to glyph names:
|
||||
if (m_encoding_is_mac_roman_or_win_ansi || is_nonsymbolic()) {
|
||||
// • If the Encoding entry is one of the names MacRomanEncoding or WinAnsiEncoding,
|
||||
// the table is initialized with the mappings described in Appendix D.
|
||||
// • If the Encoding entry is a dictionary, the table is initialized with the entries
|
||||
// from the dictionary’s BaseEncoding entry (see Table 5.11). Any entries in the
|
||||
// Differences array are used to update the table. Finally, any undefined entries in
|
||||
// the table are filled using StandardEncoding."
|
||||
// Implementor's note: This is (mostly) done in SimpleFont::initialize() and encoding() returns the result.
|
||||
auto effective_encoding = encoding();
|
||||
if (!effective_encoding)
|
||||
effective_encoding = Encoding::standard_encoding();
|
||||
|
||||
// "If a (3, 1) “cmap” subtable (Microsoft Unicode) is present:
|
||||
// • A character code is first mapped to a glyph name using the table described above.
|
||||
// • The glyph name is then mapped to a Unicode value by consulting the Adobe Glyph List (see the Bibliography).
|
||||
// • Finally, the Unicode value is mapped to a glyph description according to the (3, 1) subtable.
|
||||
//
|
||||
// If no (3, 1) subtable is present but a (1, 0) subtable (Macintosh Roman) is present:
|
||||
// • A character code is first mapped to a glyph name using the table described above.
|
||||
// • The glyph name is then mapped back to a character code according to the standard
|
||||
// Roman encoding used on Mac OS (see note below).
|
||||
// • Finally, the code is mapped to a glyph description according to the (1, 0) subtable."
|
||||
// Implementor's note: We currently don't know which tables are present, so we for now we always
|
||||
// use the (3, 1) algorithm.
|
||||
// FIXME: Implement (1, 0) subtable support.
|
||||
auto char_name = effective_encoding->get_name(char_code);
|
||||
u32 unicode = glyph_name_to_unicode(char_name).value_or(char_code);
|
||||
if (m_font->contains_glyph(unicode)) {
|
||||
do_draw_glyph(painter, position, width, unicode, *m_font, style);
|
||||
return {};
|
||||
}
|
||||
|
||||
// "In either of the cases above, if the glyph name cannot be mapped as specified, the glyph name is looked up
|
||||
// in the font program’s “post” table (if one is present) and the associated glyph description is used."
|
||||
// FIXME: Implement this.
|
||||
return Error::rendering_unsupported_error("Looking up glyph in 'post' table not yet implemented.");
|
||||
} else if (m_high_byte.has_value()) {
|
||||
// "When the font has no Encoding entry, or the font descriptor’s Symbolic flag is set (in which case the
|
||||
// Encoding entry is ignored), the following occurs:
|
||||
//
|
||||
// • If the font contains a (3, 0) subtable, the range of character codes must be one of the following:
|
||||
// 0x0000 - 0x00FF, 0xF000 - 0xF0FF, 0xF100 - 0xF1FF, or 0xF200 - 0xF2FF. Depending on the range of codes,
|
||||
// each byte from the string is prepended with the high byte of the range, to form a two-byte character,
|
||||
// which is used to select the associated glyph description from the subtable.
|
||||
// • Otherwise, if the font contains a (1, 0) subtable, single bytes from the string are used to look up the
|
||||
// associated glyph descriptions from the subtable."
|
||||
// Implementor's note: We currently don't know which tables are present, so we for now we always use the (3, 0) algorithm.
|
||||
unicode = (m_high_byte.value() << 8) | char_code;
|
||||
} else {
|
||||
// "If a character cannot be mapped in any of the ways described above, the results are implementation-dependent."
|
||||
// FIXME: Do something smarter?
|
||||
auto effective_encoding = encoding();
|
||||
if (!effective_encoding)
|
||||
effective_encoding = Encoding::standard_encoding();
|
||||
auto char_name = effective_encoding->get_name(char_code);
|
||||
unicode = glyph_name_to_unicode(char_name).value_or(char_code);
|
||||
}
|
||||
|
||||
do_draw_glyph(painter, position, width, unicode, *m_font, style);
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,8 @@ protected:
|
||||
private:
|
||||
DeprecatedFlyString m_base_font_name;
|
||||
RefPtr<Gfx::Font> m_font;
|
||||
bool m_encoding_is_mac_roman_or_win_ansi { false };
|
||||
Optional<u8> m_high_byte;
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user