LibPDF: Implement CFF supplemental encoding

The main encoding data maps glyph ID ("GID") to its codepoint.
If a glyph has several codepoints, then a secondary table mapping
codepoint to string ID ("SID") of the glyph's name is present.

(A separate table associates each glyph with its name already.)

I haven't seen this used in the wild, but the structure of the
supplemental data is also going to be needed for built-in encodings.
This commit is contained in:
Nico Weber 2023-10-16 16:21:08 -04:00 committed by Andreas Kling
parent 37daeae6fd
commit 1cfe639b6c
Notes: sideshowbarker 2024-07-16 21:39:23 +09:00
2 changed files with 20 additions and 7 deletions

View File

@ -72,7 +72,8 @@ PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPt
// CFF spec, "8 Top DICT INDEX"
int charset_offset = 0;
Vector<u8> encoding_codes;
Vector<u8> encoding_codes; // Maps GID to its codepoint.
HashMap<Card8, SID> encoding_supplemental; // Maps codepoint to SID.
auto charstrings_offset = 0;
Vector<ByteBuffer> subroutines;
float defaultWidthX = 0;
@ -125,7 +126,7 @@ PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPt
dbgln("CFF: Built-in Expert Encoding not yet implemented");
break;
default:
encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset))));
encoding_codes = TRY(parse_encoding(Reader(cff_bytes.slice(encoding_offset)), encoding_supplemental));
break;
}
break;
@ -236,10 +237,14 @@ PDFErrorOr<NonnullRefPtr<CFF>> CFF::create(ReadonlyBytes const& cff_bytes, RefPt
encoding->set(0, ".notdef");
continue;
}
if (i >= encoding_codes.size() || i >= charset.size())
break;
auto code = encoding_codes[i - 1];
auto char_name = charset[i - 1];
encoding->set(code, char_name);
}
for (auto const& entry : encoding_supplemental)
encoding->set(entry.key, resolve_sid(entry.value, strings));
cff->set_encoding(move(encoding));
}
@ -711,7 +716,7 @@ PDFErrorOr<Vector<CFF::Glyph>> CFF::parse_charstrings(Reader&& reader, Vector<By
return glyphs;
}
PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader)
PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader, HashMap<Card8, SID>& supplemental)
{
// CFF spec, "12 Encodings"
Vector<u8> encoding_codes;
@ -739,9 +744,17 @@ PDFErrorOr<Vector<u8>> CFF::parse_encoding(Reader&& reader)
} else
return error(DeprecatedString::formatted("Invalid encoding format: {}", format));
// TODO: support encoding supplements when highest bit is set (tables 14 and 15).
if (format_raw & 0x80)
dbgln("CFF: Support for multiply-encoded glyphs not yet implemented");
if (format_raw & 0x80) {
// CFF spec, "Table 14 Supplemental Encoding Data"
auto n_sups = TRY(reader.try_read<Card8>());
dbgln_if(CFF_DEBUG, "CFF encoding, {} supplemental entries", n_sups);
for (u8 i = 0; i < n_sups; i++) {
// CFF spec, "Table 15 Supplement Format"
auto code = TRY(reader.try_read<Card8>());
SID name = TRY(reader.try_read<SID>());
TRY(supplemental.try_set(code, name));
}
}
return encoding_codes;
}

View File

@ -98,7 +98,7 @@ public:
static DeprecatedFlyString resolve_sid(SID, Vector<StringView> const&);
static PDFErrorOr<Vector<DeprecatedFlyString>> parse_charset(Reader&&, size_t, Vector<StringView> const&);
static PDFErrorOr<Vector<u8>> parse_encoding(Reader&&);
static PDFErrorOr<Vector<u8>> parse_encoding(Reader&&, HashMap<Card8, SID>& supplemental);
};
}