LibPDF: Implement ZapfDingbats clause of the adobe glphy list algorithm

Liberation Sans still doesn't have the vast majority of the
Zapf Dingbats glyphs, but now we map the Zapf Dingbats names to good
unicode values.  So we only need to use a different font and all should
work.  (And Liberation Sans has _some_ of the glyphs, like 13 of the
223.) And we now render empty squares instead of wrong glyphs for the
ones we don't have.

I haven't seen any PDFs using ZapfDingbats in the wild, but they
probably exist somewhere.
(Tests/LibPDF/standard-14-fonts.pdf is a synthetic PDF using it.)
This commit is contained in:
Nico Weber 2024-02-28 17:36:51 -05:00 committed by Andreas Kling
parent 2eb099aabe
commit 8e3c54f203
Notes: sideshowbarker 2024-07-18 00:54:03 +09:00
5 changed files with 242 additions and 32 deletions

View File

@ -12,23 +12,24 @@ Contains the Adobe Glyph List, transformed into C++ via this script:
import sys
print(f'static HashMap<StringView, u32> constexpr glyph_list = {{')
for line in open('glyphlist.txt'):
line = line.strip()
if line.startswith('#'):
continue
def write_table(source_filename, table_name):
print(f'static HashMap<StringView, u32> constexpr {table_name} = {{')
for line in open(source_filename):
line = line.strip()
if line.startswith('#'):
continue
name, codepoint = line.split(';')
if ' ' in codepoint:
print(f'skipping {name}, multiple codepoints', file=sys.stderr)
continue
print(f' {{ "{name}"sv, 0x{codepoint} }},')
print(f'}};')
name, codepoint = line.split(';')
if ' ' in codepoint:
print(f'skipping {name}, multiple codepoints', file=sys.stderr)
continue
print(f' {{ "{name}"sv, 0x{codepoint} }},')
print(f'}};')
write_table('glyphlist.txt', 'glyph_list')
write_table('zapfdingbats.txt', 'zapf_dingbats_list')
```
where glyphlist.txt is from https://github.com/adobe-type-tools/agl-aglfn/blob/master/glyphlist.txt
where glyphlist.txt and zapfdingbats.txt are from https://github.com/adobe-type-tools/agl-aglfn/blob/master/glyphlist.txt
*/
#include <AK/CharacterTypes.h>
@ -4240,6 +4241,209 @@ static HashMap<StringView, u32> const glyph_list = {
{ "zuhiragana"sv, 0x305A },
{ "zukatakana"sv, 0x30BA },
};
static HashMap<StringView, u32> const zapf_dingbats_list = {
{ "a100"sv, 0x275E },
{ "a101"sv, 0x2761 },
{ "a102"sv, 0x2762 },
{ "a103"sv, 0x2763 },
{ "a104"sv, 0x2764 },
{ "a105"sv, 0x2710 },
{ "a106"sv, 0x2765 },
{ "a107"sv, 0x2766 },
{ "a108"sv, 0x2767 },
{ "a109"sv, 0x2660 },
{ "a10"sv, 0x2721 },
{ "a110"sv, 0x2665 },
{ "a111"sv, 0x2666 },
{ "a112"sv, 0x2663 },
{ "a117"sv, 0x2709 },
{ "a118"sv, 0x2708 },
{ "a119"sv, 0x2707 },
{ "a11"sv, 0x261B },
{ "a120"sv, 0x2460 },
{ "a121"sv, 0x2461 },
{ "a122"sv, 0x2462 },
{ "a123"sv, 0x2463 },
{ "a124"sv, 0x2464 },
{ "a125"sv, 0x2465 },
{ "a126"sv, 0x2466 },
{ "a127"sv, 0x2467 },
{ "a128"sv, 0x2468 },
{ "a129"sv, 0x2469 },
{ "a12"sv, 0x261E },
{ "a130"sv, 0x2776 },
{ "a131"sv, 0x2777 },
{ "a132"sv, 0x2778 },
{ "a133"sv, 0x2779 },
{ "a134"sv, 0x277A },
{ "a135"sv, 0x277B },
{ "a136"sv, 0x277C },
{ "a137"sv, 0x277D },
{ "a138"sv, 0x277E },
{ "a139"sv, 0x277F },
{ "a13"sv, 0x270C },
{ "a140"sv, 0x2780 },
{ "a141"sv, 0x2781 },
{ "a142"sv, 0x2782 },
{ "a143"sv, 0x2783 },
{ "a144"sv, 0x2784 },
{ "a145"sv, 0x2785 },
{ "a146"sv, 0x2786 },
{ "a147"sv, 0x2787 },
{ "a148"sv, 0x2788 },
{ "a149"sv, 0x2789 },
{ "a14"sv, 0x270D },
{ "a150"sv, 0x278A },
{ "a151"sv, 0x278B },
{ "a152"sv, 0x278C },
{ "a153"sv, 0x278D },
{ "a154"sv, 0x278E },
{ "a155"sv, 0x278F },
{ "a156"sv, 0x2790 },
{ "a157"sv, 0x2791 },
{ "a158"sv, 0x2792 },
{ "a159"sv, 0x2793 },
{ "a15"sv, 0x270E },
{ "a160"sv, 0x2794 },
{ "a161"sv, 0x2192 },
{ "a162"sv, 0x27A3 },
{ "a163"sv, 0x2194 },
{ "a164"sv, 0x2195 },
{ "a165"sv, 0x2799 },
{ "a166"sv, 0x279B },
{ "a167"sv, 0x279C },
{ "a168"sv, 0x279D },
{ "a169"sv, 0x279E },
{ "a16"sv, 0x270F },
{ "a170"sv, 0x279F },
{ "a171"sv, 0x27A0 },
{ "a172"sv, 0x27A1 },
{ "a173"sv, 0x27A2 },
{ "a174"sv, 0x27A4 },
{ "a175"sv, 0x27A5 },
{ "a176"sv, 0x27A6 },
{ "a177"sv, 0x27A7 },
{ "a178"sv, 0x27A8 },
{ "a179"sv, 0x27A9 },
{ "a17"sv, 0x2711 },
{ "a180"sv, 0x27AB },
{ "a181"sv, 0x27AD },
{ "a182"sv, 0x27AF },
{ "a183"sv, 0x27B2 },
{ "a184"sv, 0x27B3 },
{ "a185"sv, 0x27B5 },
{ "a186"sv, 0x27B8 },
{ "a187"sv, 0x27BA },
{ "a188"sv, 0x27BB },
{ "a189"sv, 0x27BC },
{ "a18"sv, 0x2712 },
{ "a190"sv, 0x27BD },
{ "a191"sv, 0x27BE },
{ "a192"sv, 0x279A },
{ "a193"sv, 0x27AA },
{ "a194"sv, 0x27B6 },
{ "a195"sv, 0x27B9 },
{ "a196"sv, 0x2798 },
{ "a197"sv, 0x27B4 },
{ "a198"sv, 0x27B7 },
{ "a199"sv, 0x27AC },
{ "a19"sv, 0x2713 },
{ "a1"sv, 0x2701 },
{ "a200"sv, 0x27AE },
{ "a201"sv, 0x27B1 },
{ "a202"sv, 0x2703 },
{ "a203"sv, 0x2750 },
{ "a204"sv, 0x2752 },
{ "a205"sv, 0x276E },
{ "a206"sv, 0x2770 },
{ "a20"sv, 0x2714 },
{ "a21"sv, 0x2715 },
{ "a22"sv, 0x2716 },
{ "a23"sv, 0x2717 },
{ "a24"sv, 0x2718 },
{ "a25"sv, 0x2719 },
{ "a26"sv, 0x271A },
{ "a27"sv, 0x271B },
{ "a28"sv, 0x271C },
{ "a29"sv, 0x2722 },
{ "a2"sv, 0x2702 },
{ "a30"sv, 0x2723 },
{ "a31"sv, 0x2724 },
{ "a32"sv, 0x2725 },
{ "a33"sv, 0x2726 },
{ "a34"sv, 0x2727 },
{ "a35"sv, 0x2605 },
{ "a36"sv, 0x2729 },
{ "a37"sv, 0x272A },
{ "a38"sv, 0x272B },
{ "a39"sv, 0x272C },
{ "a3"sv, 0x2704 },
{ "a40"sv, 0x272D },
{ "a41"sv, 0x272E },
{ "a42"sv, 0x272F },
{ "a43"sv, 0x2730 },
{ "a44"sv, 0x2731 },
{ "a45"sv, 0x2732 },
{ "a46"sv, 0x2733 },
{ "a47"sv, 0x2734 },
{ "a48"sv, 0x2735 },
{ "a49"sv, 0x2736 },
{ "a4"sv, 0x260E },
{ "a50"sv, 0x2737 },
{ "a51"sv, 0x2738 },
{ "a52"sv, 0x2739 },
{ "a53"sv, 0x273A },
{ "a54"sv, 0x273B },
{ "a55"sv, 0x273C },
{ "a56"sv, 0x273D },
{ "a57"sv, 0x273E },
{ "a58"sv, 0x273F },
{ "a59"sv, 0x2740 },
{ "a5"sv, 0x2706 },
{ "a60"sv, 0x2741 },
{ "a61"sv, 0x2742 },
{ "a62"sv, 0x2743 },
{ "a63"sv, 0x2744 },
{ "a64"sv, 0x2745 },
{ "a65"sv, 0x2746 },
{ "a66"sv, 0x2747 },
{ "a67"sv, 0x2748 },
{ "a68"sv, 0x2749 },
{ "a69"sv, 0x274A },
{ "a6"sv, 0x271D },
{ "a70"sv, 0x274B },
{ "a71"sv, 0x25CF },
{ "a72"sv, 0x274D },
{ "a73"sv, 0x25A0 },
{ "a74"sv, 0x274F },
{ "a75"sv, 0x2751 },
{ "a76"sv, 0x25B2 },
{ "a77"sv, 0x25BC },
{ "a78"sv, 0x25C6 },
{ "a79"sv, 0x2756 },
{ "a7"sv, 0x271E },
{ "a81"sv, 0x25D7 },
{ "a82"sv, 0x2758 },
{ "a83"sv, 0x2759 },
{ "a84"sv, 0x275A },
{ "a85"sv, 0x276F },
{ "a86"sv, 0x2771 },
{ "a87"sv, 0x2772 },
{ "a88"sv, 0x2773 },
{ "a89"sv, 0x2768 },
{ "a8"sv, 0x271F },
{ "a90"sv, 0x2769 },
{ "a91"sv, 0x276C },
{ "a92"sv, 0x276D },
{ "a93"sv, 0x276A },
{ "a94"sv, 0x276B },
{ "a95"sv, 0x2774 },
{ "a96"sv, 0x2775 },
{ "a97"sv, 0x275B },
{ "a98"sv, 0x275C },
{ "a99"sv, 0x275D },
{ "a9"sv, 0x2720 },
};
static bool are_all_uppercase_hex(StringView component)
{
@ -4259,7 +4463,7 @@ static u32 decode_hex(StringView hex_string)
return code_point;
}
Optional<u32> glyph_name_to_unicode(StringView name)
Optional<u32> glyph_name_to_unicode(StringView name, bool is_zapf_dingbats)
{
// https://github.com/adobe-type-tools/agl-specification?tab=readme-ov-file#2-the-mapping
// "To map a glyph name to a character string, follow the three steps below:
@ -4277,10 +4481,14 @@ Optional<u32> glyph_name_to_unicode(StringView name)
// 3. Map each component to a character string according to the procedure below, and concatenate those strings; the result is the character string to which the glyph name is mapped.
StringView component = name;
// If the font is Zapf Dingbats (PostScript FontName: ZapfDingbats), and the component is in the ITC Zapf Dingbats Glyph List, then map it to the corresponding character in that list."
// FIXME: Implement.
// If the font is Zapf Dingbats (PostScript FontName: ZapfDingbats), and the component is in the ITC Zapf Dingbats Glyph List, then map it to the corresponding character in that list.
if (is_zapf_dingbats) {
auto zapf_dingbats_entry = zapf_dingbats_list.get(component);
if (zapf_dingbats_entry.has_value())
return zapf_dingbats_entry.value();
}
// "Otherwise, if the component is in AGL, then map it to the corresponding character in that list.
// Otherwise, if the component is in AGL, then map it to the corresponding character in that list.
auto agl_entry = glyph_list.get(component);
if (agl_entry.has_value())
return agl_entry.value();

View File

@ -10,6 +10,6 @@
namespace PDF {
Optional<u32> glyph_name_to_unicode(StringView);
Optional<u32> glyph_name_to_unicode(StringView, bool is_zapf_dingbats);
}

View File

@ -15,16 +15,17 @@
namespace PDF {
TrueTypePainter::TrueTypePainter(AK::NonnullRefPtr<Gfx::Font> font, NonnullRefPtr<Encoding> encoding, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional<u8> high_byte)
TrueTypePainter::TrueTypePainter(AK::NonnullRefPtr<Gfx::Font> font, NonnullRefPtr<Encoding> encoding, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional<u8> high_byte, bool is_zapf_dingbats)
: m_font(move(font))
, m_encoding(move(encoding))
, m_encoding_is_mac_roman_or_win_ansi(encoding_is_mac_roman_or_win_ansi)
, m_is_nonsymbolic(is_nonsymbolic)
, m_high_byte(high_byte)
, m_is_zapf_dingbats(is_zapf_dingbats)
{
}
NonnullOwnPtr<TrueTypePainter> TrueTypePainter::create(Document* document, NonnullRefPtr<DictObject> const& dict, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr<Gfx::Font> font, NonnullRefPtr<Encoding> encoding)
NonnullOwnPtr<TrueTypePainter> TrueTypePainter::create(Document* document, NonnullRefPtr<DictObject> const& dict, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr<Gfx::Font> font, NonnullRefPtr<Encoding> encoding, bool is_zapf_dingbats)
{
bool encoding_is_mac_roman_or_win_ansi = false;
if (dict->contains(CommonNames::Encoding)) {
@ -55,7 +56,7 @@ NonnullOwnPtr<TrueTypePainter> TrueTypePainter::create(Document* document, Nonnu
}
}
return adopt_own(*new TrueTypePainter { move(font), move(encoding), encoding_is_mac_roman_or_win_ansi, containing_pdf_font.is_nonsymbolic(), high_byte });
return adopt_own(*new TrueTypePainter { move(font), move(encoding), encoding_is_mac_roman_or_win_ansi, containing_pdf_font.is_nonsymbolic(), high_byte, is_zapf_dingbats });
}
static void do_draw_glyph(Gfx::Painter& painter, Gfx::FloatPoint point, float width, u32 unicode, Gfx::Font const& font, ColorOrStyle const& style)
@ -106,7 +107,7 @@ PDFErrorOr<void> TrueTypePainter::draw_glyph(Gfx::Painter& painter, Gfx::FloatPo
// use the (3, 1) algorithm.
// FIXME: Implement (1, 0) subtable support.
auto char_name = m_encoding->get_name(char_code);
u32 unicode = glyph_name_to_unicode(char_name).value_or(char_code);
u32 unicode = glyph_name_to_unicode(char_name, m_is_zapf_dingbats).value_or(char_code);
if (m_font->contains_glyph(unicode)) {
do_draw_glyph(painter, point, width, unicode, *m_font, style);
return {};
@ -132,7 +133,7 @@ PDFErrorOr<void> TrueTypePainter::draw_glyph(Gfx::Painter& painter, Gfx::FloatPo
// "If a character cannot be mapped in any of the ways described above, the results are implementation-dependent."
// FIXME: Do something smarter?
auto char_name = m_encoding->get_name(char_code);
unicode = glyph_name_to_unicode(char_name).value_or(char_code);
unicode = glyph_name_to_unicode(char_name, m_is_zapf_dingbats).value_or(char_code);
}
do_draw_glyph(painter, point, width, unicode, *m_font, style);
@ -143,7 +144,7 @@ Optional<float> TrueTypePainter::get_glyph_width(u8 char_code) const
{
// FIXME: Make this use the full char_code lookup method used in draw_glyph() once that's complete.
auto char_name = m_encoding->get_name(char_code);
u32 unicode = glyph_name_to_unicode(char_name).value_or(char_code);
u32 unicode = glyph_name_to_unicode(char_name, m_is_zapf_dingbats).value_or(char_code);
return m_font->glyph_width(unicode);
}
@ -177,7 +178,8 @@ PDFErrorOr<void> TrueTypeFont::initialize(Document* document, NonnullRefPtr<Dict
if (!effective_encoding)
effective_encoding = Encoding::standard_encoding();
m_font_painter = TrueTypePainter::create(document, dict, *this, *font, *effective_encoding);
bool const is_zapf_dingbats = false;
m_font_painter = TrueTypePainter::create(document, dict, *this, *font, *effective_encoding, is_zapf_dingbats);
return {};
}

View File

@ -14,20 +14,21 @@ namespace PDF {
class TrueTypePainter {
public:
static NonnullOwnPtr<TrueTypePainter> create(Document*, NonnullRefPtr<DictObject> const&, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr<Gfx::Font>, NonnullRefPtr<Encoding>);
static NonnullOwnPtr<TrueTypePainter> create(Document*, NonnullRefPtr<DictObject> const&, SimpleFont const& containing_pdf_font, AK::NonnullRefPtr<Gfx::Font>, NonnullRefPtr<Encoding>, bool is_zapf_dingbats);
PDFErrorOr<void> draw_glyph(Gfx::Painter&, Gfx::FloatPoint, float width, u8 char_code, Renderer const&);
Optional<float> get_glyph_width(u8 char_code) const;
void set_font_size(float font_size);
private:
TrueTypePainter(AK::NonnullRefPtr<Gfx::Font>, NonnullRefPtr<Encoding>, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional<u8> high_byte);
TrueTypePainter(AK::NonnullRefPtr<Gfx::Font>, NonnullRefPtr<Encoding>, bool encoding_is_mac_roman_or_win_ansi, bool is_nonsymbolic, Optional<u8> high_byte, bool is_zapf_dingbats);
NonnullRefPtr<Gfx::Font> m_font;
NonnullRefPtr<Encoding> m_encoding;
bool m_encoding_is_mac_roman_or_win_ansi { false };
bool m_is_nonsymbolic { false };
Optional<u8> m_high_byte;
bool m_is_zapf_dingbats { false };
};
class TrueTypeFont : public SimpleFont {

View File

@ -63,9 +63,8 @@ PDFErrorOr<void> Type1Font::initialize(Document* document, NonnullRefPtr<DictObj
// since the names in that font are meaningless.)"
// FIXME: We use Liberation Sans for both Symbol and ZapfDingbats. It doesn't have all Symbol
// characters, or at least not under the codepoints used in AdobeGlpyhList. It doesn't
// have any ZapfDingbats characters, or at least not the names for it. Not sure what to do about
// this -- we either need a different font, or need to tweak the encoding somehow.
// (For Helvetica / Times / Courier, the Liberation family doesn't have the right metrics.)
// have most of the ZapfDingbats characters. Not sure what to do about this -- we might need a different font.
// (For Helvetica / Times / Courier, the Liberation family also doesn't have the right metrics.)
if (base_font_name() == "Symbol"sv)
effective_encoding = Encoding::symbol_encoding();
else if (base_font_name() == "ZapfDingbats"sv)
@ -76,7 +75,7 @@ PDFErrorOr<void> Type1Font::initialize(Document* document, NonnullRefPtr<DictObj
// FIXME: For the standard 14 fonts, set some m_flags bits (symbolic/nonsymbolic, italic, bold, fixed pitch, serif).
m_fallback_font_painter = TrueTypePainter::create(document, dict, *this, *font, *effective_encoding);
m_fallback_font_painter = TrueTypePainter::create(document, dict, *this, *font, *effective_encoding, base_font_name() == "ZapfDingbats"sv);
}
VERIFY(m_font_program || m_fallback_font_painter);