diff --git a/Meta/CMake/unicode_data.cmake b/Meta/CMake/unicode_data.cmake index 386fe258060..d1e5645dde9 100644 --- a/Meta/CMake/unicode_data.cmake +++ b/Meta/CMake/unicode_data.cmake @@ -38,18 +38,12 @@ set(PROP_ALIAS_PATH "${UCD_PATH}/${PROP_ALIAS_SOURCE}") set(PROP_VALUE_ALIAS_SOURCE "PropertyValueAliases.txt") set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/${PROP_VALUE_ALIAS_SOURCE}") -set(NAME_ALIAS_SOURCE "NameAliases.txt") -set(NAME_ALIAS_PATH "${UCD_PATH}/${NAME_ALIAS_SOURCE}") - set(SCRIPTS_SOURCE "Scripts.txt") set(SCRIPTS_PATH "${UCD_PATH}/${SCRIPTS_SOURCE}") set(SCRIPT_EXTENSIONS_SOURCE "ScriptExtensions.txt") set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/${SCRIPT_EXTENSIONS_SOURCE}") -set(BLOCKS_SOURCE "Blocks.txt") -set(BLOCKS_PATH "${UCD_PATH}/${BLOCKS_SOURCE}") - set(EMOJI_DATA_SOURCE "emoji/emoji-data.txt") set(EMOJI_DATA_PATH "${UCD_PATH}/${EMOJI_DATA_SOURCE}") @@ -90,10 +84,8 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_BINARY_PROP_SOURCE}" "${DERIVED_BINARY_PROP_PATH}") extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_ALIAS_SOURCE}" "${PROP_ALIAS_PATH}") extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_VALUE_ALIAS_SOURCE}" "${PROP_VALUE_ALIAS_PATH}") - extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NAME_ALIAS_SOURCE}" "${NAME_ALIAS_PATH}") extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPTS_SOURCE}" "${SCRIPTS_PATH}") extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPT_EXTENSIONS_SOURCE}" "${SCRIPT_EXTENSIONS_PATH}") - extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${BLOCKS_SOURCE}" "${BLOCKS_PATH}") extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${EMOJI_DATA_SOURCE}" "${EMOJI_DATA_PATH}") extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NORM_PROPS_SOURCE}" "${NORM_PROPS_PATH}") extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${GRAPHEME_BREAK_PROP_SOURCE}" "${GRAPHEME_BREAK_PROP_PATH}") @@ -129,7 +121,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${UCD_VERSION_FILE}" "${UNICODE_DATA_HEADER}" "${UNICODE_DATA_IMPLEMENTATION}" - arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -o "${CASE_FOLDING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -k "${BLOCKS_PATH}" -e "${EMOJI_DATA_PATH}" -m "${NAME_ALIAS_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}" + arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -o "${CASE_FOLDING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}" ) invoke_generator( "EmojiData" diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index 466ccdc42b4..4a6bb3685ac 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -65,11 +65,6 @@ struct Normalization { using NormalizationProps = HashMap>; -struct CodePointName { - Unicode::CodePointRange code_point_range; - size_t name { 0 }; -}; - struct CasingTable { bool operator==(CasingTable const& other) const { @@ -93,7 +88,6 @@ struct CasingTable { struct CodePointData { u32 code_point { 0 }; ByteString name; - Optional abbreviation; ByteString bidi_class; Optional decomposition_mapping; Optional numeric_value_decimal; @@ -105,11 +99,6 @@ struct CodePointData { CasingTable casing; }; -struct BlockName { - Unicode::CodePointRange code_point_range; - size_t name { 0 }; -}; - using PropertyTable = Vector; static constexpr auto CODE_POINT_TABLES_MSB_COUNT = 16u; @@ -136,8 +125,6 @@ struct CodePointComposition { }; struct UnicodeData { - UniqueStringStorage unique_strings; - u32 code_points_with_decomposition_mapping { 0 }; Vector decomposition_mappings; HashMap> composition_mappings; @@ -154,10 +141,6 @@ struct UnicodeData { Vector code_point_data; - HashMap code_point_abbreviations; - HashMap code_point_display_name_aliases; - Vector code_point_display_names; - // https://www.unicode.org/reports/tr44/#General_Category_Values PropList general_categories; Vector general_category_aliases; @@ -178,8 +161,6 @@ struct UnicodeData { Vector script_aliases; PropList script_extensions; - Vector block_display_names; - // FIXME: We are not yet doing anything with this data. It will be needed for String.prototype.normalize. NormalizationProps normalization_props; @@ -430,37 +411,6 @@ static ErrorOr parse_alias_list(Core::InputBufferedFile& file, PropList co return {}; } -static ErrorOr parse_name_aliases(Core::InputBufferedFile& file, UnicodeData& unicode_data) -{ - Array buffer; - - while (TRY(file.can_read_line())) { - auto line = TRY(file.read_line(buffer)); - - if (line.is_empty() || line.starts_with('#')) - continue; - - auto segments = line.split_view(';', SplitBehavior::KeepEmpty); - VERIFY(segments.size() == 3); - - auto code_point = AK::StringUtils::convert_to_uint_from_hex(segments[0].trim_whitespace()); - auto alias = segments[1].trim_whitespace(); - auto reason = segments[2].trim_whitespace(); - - if (reason == "abbreviation"sv) { - auto index = unicode_data.unique_strings.ensure(alias); - unicode_data.code_point_abbreviations.set(*code_point, index); - } else if (reason.is_one_of("correction"sv, "control"sv)) { - if (!unicode_data.code_point_display_name_aliases.contains(*code_point)) { - auto index = unicode_data.unique_strings.ensure(alias); - unicode_data.code_point_display_name_aliases.set(*code_point, index); - } - } - } - - return {}; -} - static ErrorOr parse_value_alias_list(Core::InputBufferedFile& file, StringView desired_category, Vector const& value_list, Vector& prop_aliases, bool primary_value_is_first = true, bool sanitize_alias = false) { TRY(file.seek(0, SeekMode::SetPosition)); @@ -550,68 +500,6 @@ static ErrorOr parse_normalization_props(Core::InputBufferedFile& file, Un return {}; } -static void add_canonical_code_point_name(Unicode::CodePointRange range, StringView name, UnicodeData& unicode_data) -{ - // https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G142981 - // FIXME: Implement the NR1 rules for Hangul syllables. - - struct CodePointNameFormat { - Unicode::CodePointRange code_point_range; - StringView name; - }; - - // These code point ranges are the NR2 set of name replacements defined by Table 4-8. - constexpr Array s_ideographic_replacements { { - { { 0x3400, 0x4DBF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0x4E00, 0x9FFF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0xF900, 0xFA6D }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv }, - { { 0xFA70, 0xFAD9 }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv }, - { { 0x17000, 0x187F7 }, "TANGUT IDEOGRAPH-{:X}"sv }, - { { 0x18B00, 0x18CD5 }, "KHITAN SMALL SCRIPT CHARACTER-{:X}"sv }, - { { 0x18D00, 0x18D08 }, "TANGUT IDEOGRAPH-{:X}"sv }, - { { 0x1B170, 0x1B2FB }, "NUSHU CHARACTER-{:X}"sv }, - { { 0x20000, 0x2A6DF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0x2A700, 0x2B739 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0x2B740, 0x2B81D }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0x2B820, 0x2CEA1 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0x2CEB0, 0x2EBE0 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0x2F800, 0x2FA1D }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv }, - { { 0x30000, 0x3134A }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - { { 0x31350, 0x323AF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv }, - } }; - - auto it = find_if(s_ideographic_replacements.begin(), s_ideographic_replacements.end(), - [&](auto const& replacement) { - return replacement.code_point_range.first == range.first; - }); - - if (it != s_ideographic_replacements.end()) { - auto index = unicode_data.unique_strings.ensure(it->name); - unicode_data.code_point_display_names.append({ it->code_point_range, index }); - return; - } - - it = find_if(s_ideographic_replacements.begin(), s_ideographic_replacements.end(), - [&](auto const& replacement) { - return (replacement.code_point_range.first <= range.first) && (range.first <= replacement.code_point_range.last); - }); - - if (it != s_ideographic_replacements.end()) { - // Drop code points that will have been captured by a range defined by the ideographic replacements. - return; - } - - if (auto alias = unicode_data.code_point_display_name_aliases.get(range.first); alias.has_value()) { - // NR4 states that control code points have a null string as their name. Our implementation - // uses the control code's alias as its display name. - unicode_data.code_point_display_names.append({ range, *alias }); - return; - } - - auto index = unicode_data.unique_strings.ensure(name); - unicode_data.code_point_display_names.append({ range, index }); -} - static Optional parse_decomposition_mapping(StringView string, UnicodeData& unicode_data) { if (string.is_empty()) @@ -660,29 +548,6 @@ static void add_composition_mapping(u32 code_point, CodePointDecomposition& deco unicode_data.composition_mappings.ensure(first_code_point).append(CodePointComposition { .second_code_point = second_code_point, .combined_code_point = code_point }); } -static ErrorOr parse_block_display_names(Core::InputBufferedFile& file, UnicodeData& unicode_data) -{ - Array buffer; - while (TRY(file.can_read_line())) { - auto line = TRY(file.read_line(buffer)); - if (line.is_empty() || line.starts_with('#')) - continue; - - auto segments = line.split_view(';', SplitBehavior::KeepEmpty); - VERIFY(segments.size() == 2); - - auto code_point_range = parse_code_point_range(segments[0].trim_whitespace()); - auto display_name = segments[1].trim_whitespace(); - - auto index = unicode_data.unique_strings.ensure(display_name); - unicode_data.block_display_names.append({ code_point_range, index }); - } - - TRY(file.seek(0, SeekMode::SetPosition)); - - return {}; -} - static ErrorOr parse_unicode_data(Core::InputBufferedFile& file, UnicodeData& unicode_data) { Optional code_point_range_start; @@ -719,9 +584,6 @@ static ErrorOr parse_unicode_data(Core::InputBufferedFile& file, UnicodeDa data.casing.simple_lowercase_mapping = AK::StringUtils::convert_to_uint_from_hex(segments[13]); data.casing.simple_titlecase_mapping = AK::StringUtils::convert_to_uint_from_hex(segments[14]); - if (auto abbreviation = unicode_data.code_point_abbreviations.get(data.code_point); abbreviation.has_value()) - data.abbreviation = *abbreviation; - if (!assigned_code_point_range_start.has_value()) assigned_code_point_range_start = data.code_point; @@ -742,10 +604,8 @@ static ErrorOr parse_unicode_data(Core::InputBufferedFile& file, UnicodeDa data.name = data.name.substring(1, data.name.length() - 8); code_point_range_start.clear(); - add_canonical_code_point_name(code_point_range, data.name, unicode_data); unicode_data.code_point_bidirectional_classes.append({ code_point_range, data.bidi_class }); } else { - add_canonical_code_point_name({ data.code_point, data.code_point }, data.name, unicode_data); unicode_data.code_point_bidirectional_classes.append({ { data.code_point, data.code_point }, data.bidi_class }); if ((data.code_point > 0) && (data.code_point - previous_code_point) != 1) { @@ -905,7 +765,6 @@ static ErrorOr generate_unicode_data_implementation(Core::InputBufferedFil StringBuilder builder; SourceGenerator generator { builder }; - generator.set("string_index_type"sv, unicode_data.unique_strings.type_that_fits()); generator.set("special_casing_size", ByteString::number(unicode_data.special_casing.size())); generator.set("case_folding_size", ByteString::number(unicode_data.case_folding.size())); @@ -927,8 +786,6 @@ static ErrorOr generate_unicode_data_implementation(Core::InputBufferedFil namespace Unicode { )~~~"); - unicode_data.unique_strings.generate(generator); - auto append_list_and_size = [&](auto const& list, StringView format) { if (list.is_empty()) { generator.append(", {}, 0"); @@ -998,11 +855,6 @@ struct CasingTable { u32 case_folding_size { 0 }; }; -struct CodePointAbbreviation { - u32 code_point { 0 }; - @string_index_type@ abbreviation { 0 }; -}; - template struct CodePointComparator { constexpr int operator()(u32 code_point, MappingType const& mapping) @@ -1011,30 +863,6 @@ struct CodePointComparator { } }; -struct BlockNameData { - CodePointRange code_point_range {}; - @string_index_type@ display_name { 0 }; -}; - -struct BlockNameComparator : public CodePointRangeComparator { - constexpr int operator()(u32 code_point, BlockNameData const& name) - { - return CodePointRangeComparator::operator()(code_point, name.code_point_range); - } -}; - -struct CodePointName { - CodePointRange code_point_range {}; - @string_index_type@ display_name { 0 }; -}; - -struct CodePointNameComparator : public CodePointRangeComparator { - constexpr int operator()(u32 code_point, CodePointName const& name) - { - return CodePointRangeComparator::operator()(code_point, name.code_point_range); - } -}; - struct BidiClassData { CodePointRange code_point_range {}; BidirectionalClass bidi_class {}; @@ -1106,7 +934,6 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { )~~~"); }; - append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; }); append_code_point_mappings("decomposition"sv, "CodePointDecompositionRaw"sv, unicode_data.code_points_with_decomposition_mapping, [](auto const& data) { return data.decomposition_mapping; }); size_t composition_mappings_size = 0; @@ -1254,39 +1081,6 @@ static constexpr Array<@type@, @size@> @name@ { { TRY(append_code_point_tables("s_word_break_properties"sv, unicode_data.word_break_tables, append_property_table)); TRY(append_code_point_tables("s_sentence_break_properties"sv, unicode_data.sentence_break_tables, append_property_table)); - auto append_code_point_display_names = [&](StringView type, StringView name, auto const& display_names) { - constexpr size_t max_values_per_row = 30; - size_t values_in_current_row = 0; - - generator.set("type", type); - generator.set("name", name); - generator.set("size", ByteString::number(display_names.size())); - - generator.append(R"~~~( -static constexpr Array<@type@, @size@> @name@ { { - )~~~"); - for (auto const& display_name : display_names) { - if (values_in_current_row++ > 0) - generator.append(", "); - - generator.set("first", ByteString::formatted("{:#x}", display_name.code_point_range.first)); - generator.set("last", ByteString::formatted("{:#x}", display_name.code_point_range.last)); - generator.set("name", ByteString::number(display_name.name)); - generator.append("{ { @first@, @last@ }, @name@ }"); - - if (values_in_current_row == max_values_per_row) { - values_in_current_row = 0; - generator.append(",\n "); - } - } - generator.append(R"~~~( -} }; -)~~~"); - }; - - append_code_point_display_names("BlockNameData"sv, "s_block_display_names"sv, unicode_data.block_display_names); - append_code_point_display_names("CodePointName"sv, "s_code_point_display_names"sv, unicode_data.code_point_display_names); - { constexpr size_t max_bidi_classes_per_row = 20; size_t bidi_classes_in_current_row = 0; @@ -1315,44 +1109,6 @@ static constexpr Array s_bidirectional_classes { { } generator.append(R"~~~( -Optional code_point_block_display_name(u32 code_point) -{ - if (auto const* entry = binary_search(s_block_display_names, code_point, nullptr, BlockNameComparator {})) - return decode_string(entry->display_name); - - return {}; -} - -ReadonlySpan block_display_names() -{ - static auto display_names = []() { - Array display_names; - - for (size_t i = 0; i < s_block_display_names.size(); ++i) { - auto const& display_name = s_block_display_names[i]; - display_names[i] = { display_name.code_point_range, decode_string(display_name.display_name) }; - } - - return display_names; - }(); - - return display_names.span(); -} - -Optional code_point_display_name(u32 code_point) -{ - if (auto const* entry = binary_search(s_code_point_display_names, code_point, nullptr, CodePointNameComparator {})) { - auto display_name = decode_string(entry->display_name); - - if (display_name.ends_with("{:X}"sv)) - return ByteString::formatted(display_name, code_point); - - return display_name; - } - - return {}; -} - static CasingTable const& casing_table_for_code_point(u32 code_point) { auto stage1_index = code_point >> @CODE_POINT_TABLES_LSB_COUNT@; @@ -1411,17 +1167,6 @@ ReadonlySpan case_folding_mapping(u32 code_point) return s_case_folding.span().slice(casing_table.case_folding_start_index, casing_table.case_folding_size); } -Optional code_point_abbreviation(u32 code_point) -{ - auto const* mapping = binary_search(s_abbreviation_mappings, code_point, nullptr, CodePointComparator {}); - if (mapping == nullptr) - return {}; - if (mapping->abbreviation == 0) - return {}; - - return decode_string(mapping->abbreviation); -} - Optional code_point_decomposition(u32 code_point) { auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator {}); @@ -1842,10 +1587,8 @@ ErrorOr serenity_main(Main::Arguments arguments) StringView derived_binary_prop_path; StringView prop_alias_path; StringView prop_value_alias_path; - StringView name_alias_path; StringView scripts_path; StringView script_extensions_path; - StringView blocks_path; StringView emoji_data_path; StringView normalization_path; StringView grapheme_break_path; @@ -1864,10 +1607,8 @@ ErrorOr serenity_main(Main::Arguments arguments) args_parser.add_option(derived_binary_prop_path, "Path to DerivedBinaryProperties.txt file", "derived-binary-prop-path", 'b', "derived-binary-prop-path"); args_parser.add_option(prop_alias_path, "Path to PropertyAliases.txt file", "prop-alias-path", 'a', "prop-alias-path"); args_parser.add_option(prop_value_alias_path, "Path to PropertyValueAliases.txt file", "prop-value-alias-path", 'v', "prop-value-alias-path"); - args_parser.add_option(name_alias_path, "Path to NameAliases.txt file", "name-alias-path", 'm', "name-alias-path"); args_parser.add_option(scripts_path, "Path to Scripts.txt file", "scripts-path", 'r', "scripts-path"); args_parser.add_option(script_extensions_path, "Path to ScriptExtensions.txt file", "script-extensions-path", 'x', "script-extensions-path"); - args_parser.add_option(blocks_path, "Path to Blocks.txt file", "blocks-path", 'k', "blocks-path"); args_parser.add_option(emoji_data_path, "Path to emoji-data.txt file", "emoji-data-path", 'e', "emoji-data-path"); args_parser.add_option(normalization_path, "Path to DerivedNormalizationProps.txt file", "normalization-path", 'n', "normalization-path"); args_parser.add_option(grapheme_break_path, "Path to GraphemeBreakProperty.txt file", "grapheme-break-path", 'f', "grapheme-break-path"); @@ -1886,10 +1627,8 @@ ErrorOr serenity_main(Main::Arguments arguments) auto derived_binary_prop_file = TRY(open_file(derived_binary_prop_path, Core::File::OpenMode::Read)); auto prop_alias_file = TRY(open_file(prop_alias_path, Core::File::OpenMode::Read)); auto prop_value_alias_file = TRY(open_file(prop_value_alias_path, Core::File::OpenMode::Read)); - auto name_alias_file = TRY(open_file(name_alias_path, Core::File::OpenMode::Read)); auto scripts_file = TRY(open_file(scripts_path, Core::File::OpenMode::Read)); auto script_extensions_file = TRY(open_file(script_extensions_path, Core::File::OpenMode::Read)); - auto blocks_file = TRY(open_file(blocks_path, Core::File::OpenMode::Read)); auto emoji_data_file = TRY(open_file(emoji_data_path, Core::File::OpenMode::Read)); auto normalization_file = TRY(open_file(normalization_path, Core::File::OpenMode::Read)); auto grapheme_break_file = TRY(open_file(grapheme_break_path, Core::File::OpenMode::Read)); @@ -1908,8 +1647,6 @@ ErrorOr serenity_main(Main::Arguments arguments) TRY(parse_alias_list(*prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases)); TRY(parse_prop_list(*scripts_file, unicode_data.script_list)); TRY(parse_prop_list(*script_extensions_file, unicode_data.script_extensions, true)); - TRY(parse_block_display_names(*blocks_file, unicode_data)); - TRY(parse_name_aliases(*name_alias_file, unicode_data)); TRY(parse_prop_list(*grapheme_break_file, unicode_data.grapheme_break_props)); TRY(parse_prop_list(*word_break_file, unicode_data.word_break_props)); TRY(parse_prop_list(*sentence_break_file, unicode_data.sentence_break_props)); diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index eddfb260ebb..a428cbcc26f 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -330,15 +330,6 @@ TEST_CASE(script) } } -TEST_CASE(block) -{ - for (u32 code_point = 0x0000; code_point <= 0x007F; ++code_point) - EXPECT_EQ("Basic Latin"sv, Unicode::code_point_block_display_name(code_point).value()); - - for (u32 code_point = 0x0370; code_point <= 0x03FF; ++code_point) - EXPECT_EQ("Greek and Coptic"sv, Unicode::code_point_block_display_name(code_point).value()); -} - TEST_CASE(script_extension) { auto script = [](StringView name) { @@ -390,38 +381,6 @@ TEST_CASE(script_extension) EXPECT(Unicode::code_point_has_script_extension(0x101fd, script_inherited)); } -TEST_CASE(code_point_display_name) -{ - auto code_point_display_name = [](u32 code_point) { - auto name = Unicode::code_point_display_name(code_point); - VERIFY(name.has_value()); - return name.release_value(); - }; - - // Control code points. - EXPECT_EQ(code_point_display_name(0), "NULL"sv); - EXPECT_EQ(code_point_display_name(1), "START OF HEADING"sv); - EXPECT_EQ(code_point_display_name(0xa), "LINE FEED"sv); - - // Ideographic code points (which already appeared in a range in UnicodeData.txt). - EXPECT_EQ(code_point_display_name(0x3400), "CJK UNIFIED IDEOGRAPH-3400"sv); - EXPECT_EQ(code_point_display_name(0x3401), "CJK UNIFIED IDEOGRAPH-3401"sv); - EXPECT_EQ(code_point_display_name(0x3402), "CJK UNIFIED IDEOGRAPH-3402"sv); - EXPECT_EQ(code_point_display_name(0x4dbf), "CJK UNIFIED IDEOGRAPH-4DBF"sv); - - EXPECT_EQ(code_point_display_name(0x20000), "CJK UNIFIED IDEOGRAPH-20000"sv); - EXPECT_EQ(code_point_display_name(0x20001), "CJK UNIFIED IDEOGRAPH-20001"sv); - EXPECT_EQ(code_point_display_name(0x20002), "CJK UNIFIED IDEOGRAPH-20002"sv); - EXPECT_EQ(code_point_display_name(0x2a6df), "CJK UNIFIED IDEOGRAPH-2A6DF"sv); - EXPECT(!Unicode::code_point_display_name(0x2a6e0).has_value()); - - // Ideographic code points (which appeared individually in UnicodeData.txt and were coalesced into a range). - EXPECT_EQ(code_point_display_name(0x2f800), "CJK COMPATIBILITY IDEOGRAPH-2F800"sv); - EXPECT_EQ(code_point_display_name(0x2f801), "CJK COMPATIBILITY IDEOGRAPH-2F801"sv); - EXPECT_EQ(code_point_display_name(0x2f802), "CJK COMPATIBILITY IDEOGRAPH-2F802"sv); - EXPECT_EQ(code_point_display_name(0x2fa1d), "CJK COMPATIBILITY IDEOGRAPH-2FA1D"sv); -} - TEST_CASE(code_point_bidirectional_character_type) { auto code_point_bidi_class = [](u32 code_point) { diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index b6a5d81f0fa..72206192a04 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -21,11 +21,7 @@ namespace Unicode { -Optional __attribute__((weak)) code_point_display_name(u32) { return {}; } -Optional __attribute__((weak)) code_point_block_display_name(u32) { return {}; } -Optional __attribute__((weak)) code_point_abbreviation(u32) { return {}; } u32 __attribute__((weak)) canonical_combining_class(u32) { return {}; } -ReadonlySpan __attribute__((weak)) block_display_names() { return {}; } u32 __attribute__((weak)) to_unicode_lowercase(u32 code_point) { diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.h b/Userland/Libraries/LibUnicode/CharacterTypes.h index 53a85209efa..7452fe5aa21 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.h +++ b/Userland/Libraries/LibUnicode/CharacterTypes.h @@ -29,17 +29,6 @@ struct CodePointRangeComparator { } }; -struct BlockName { - CodePointRange code_point_range {}; - StringView display_name; -}; - -Optional code_point_display_name(u32 code_point); -Optional code_point_block_display_name(u32 code_point); -Optional code_point_abbreviation(u32 code_point); - -ReadonlySpan block_display_names(); - u32 canonical_combining_class(u32 code_point); // Note: The single code point case conversions only perform simple case folding.