LibUnicode: Update code point ideographic replacements for Unicode 15

This commit is contained in:
Timothy Flynn 2022-10-06 09:02:04 -04:00 committed by Linus Groh
parent 400cc41176
commit f38c68177b
Notes: sideshowbarker 2024-07-17 06:10:42 +09:00
2 changed files with 8 additions and 6 deletions

View File

@ -487,7 +487,7 @@ static ErrorOr<void> parse_normalization_props(Core::Stream::BufferedFile& file,
static void add_canonical_code_point_name(CodePointRange range, StringView name, UnicodeData& unicode_data)
{
// https://www.unicode.org/versions/Unicode14.0.0/ch04.pdf#G142981
// https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G142981
// FIXME: Implement the NR1 rules for Hangul syllables.
struct CodePointNameFormat {
@ -496,22 +496,23 @@ static void add_canonical_code_point_name(CodePointRange range, StringView name,
};
// These code point ranges are the NR2 set of name replacements defined by Table 4-8.
constexpr Array<CodePointNameFormat, 15> s_ideographic_replacements { {
constexpr Array<CodePointNameFormat, 16> s_ideographic_replacements { {
{ { 0x3400, 0x4DBF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x4E00, 0x9FFC }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x4E00, 0x9FFF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0xF900, 0xFA6D }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv },
{ { 0xFA70, 0xFAD9 }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv },
{ { 0x17000, 0x187F7 }, "TANGUT IDEOGRAPH-{:X}"sv },
{ { 0x18B00, 0x18CD5 }, "KHITAN SMALL SCRIPT CHARACTER-{:X}"sv },
{ { 0x18D00, 0x18D08 }, "TANGUT IDEOGRAPH-{:X}"sv },
{ { 0x1B170, 0x1B2FB }, "NUSHU CHARACTER-{:X}"sv },
{ { 0x20000, 0x2A6DD }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x2A700, 0x2B734 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x20000, 0x2A6DF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x2A700, 0x2B739 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x2B740, 0x2B81D }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x2B820, 0x2CEA1 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x2CEB0, 0x2EBE0 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x2F800, 0x2FA1D }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv },
{ { 0x30000, 0x3134A }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
{ { 0x31350, 0x323AF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
} };
auto it = find_if(s_ideographic_replacements.begin(), s_ideographic_replacements.end(),

View File

@ -689,7 +689,8 @@ TEST_CASE(code_point_display_name)
EXPECT_EQ(code_point_display_name(0x20000), "CJK UNIFIED IDEOGRAPH-20000"sv);
EXPECT_EQ(code_point_display_name(0x20001), "CJK UNIFIED IDEOGRAPH-20001"sv);
EXPECT_EQ(code_point_display_name(0x20002), "CJK UNIFIED IDEOGRAPH-20002"sv);
EXPECT(!Unicode::code_point_display_name(0x2a6df).has_value());
EXPECT_EQ(code_point_display_name(0x2a6df), "CJK UNIFIED IDEOGRAPH-2A6DF"sv);
EXPECT(!Unicode::code_point_display_name(0x2a6e0).has_value());
// Ideographic code points (which appeared individually in UnicodeData.txt and were coalesced into a range).
EXPECT_EQ(code_point_display_name(0x2f800), "CJK COMPATIBILITY IDEOGRAPH-2F800"sv);