From c950f8861198b980fa36698667704954b19fb741 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 20 Jul 2023 18:26:48 -0400 Subject: [PATCH] LibUnicode: Stop generating Block property data We started generating this data in commit 0505e03, but it was unused. It's still not used, so let's remove it, rather than bloating the size of libunicode.so with unused data. If we need it in the future, it's trivial to add back. Note we *have* always used the block name data from that commit, and that is still present here. --- .../LibUnicode/GenerateUnicodeData.cpp | 11 -------- .../LibUnicode/TestUnicodeCharacterTypes.cpp | 28 ------------------- .../Libraries/LibUnicode/CharacterTypes.cpp | 3 -- .../Libraries/LibUnicode/CharacterTypes.h | 3 -- 4 files changed, 45 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index 8b877720031..1517c7b34d9 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -156,10 +156,6 @@ struct UnicodeData { Vector script_aliases; PropList script_extensions; - PropList block_list { - { "No_Block"sv, {} }, - }; - Vector block_aliases; Vector block_display_names; // FIXME: We are not yet doing anything with this data. It will be needed for String.prototype.normalize. @@ -814,7 +810,6 @@ namespace Unicode { generate_enum("GeneralCategory"sv, {}, unicode_data.general_categories.keys(), unicode_data.general_category_aliases); generate_enum("Property"sv, {}, unicode_data.prop_list.keys(), unicode_data.prop_aliases); generate_enum("Script"sv, {}, unicode_data.script_list.keys(), unicode_data.script_aliases); - generate_enum("Block"sv, {}, unicode_data.block_list.keys(), unicode_data.block_aliases); generate_enum("GraphemeBreakProperty"sv, {}, unicode_data.grapheme_break_props.keys()); generate_enum("WordBreakProperty"sv, {}, unicode_data.word_break_props.keys()); generate_enum("SentenceBreakProperty"sv, {}, unicode_data.sentence_break_props.keys()); @@ -1149,7 +1144,6 @@ static constexpr Array, @size@> @name@ { {)~~~"); append_prop_list("s_properties"sv, "s_property_{}"sv, unicode_data.prop_list); append_prop_list("s_scripts"sv, "s_script_{}"sv, unicode_data.script_list); append_prop_list("s_script_extensions"sv, "s_script_extension_{}"sv, unicode_data.script_extensions); - append_prop_list("s_blocks"sv, "s_block_{}"sv, unicode_data.block_list); append_prop_list("s_grapheme_break_properties"sv, "s_grapheme_break_property_{}"sv, unicode_data.grapheme_break_props); append_prop_list("s_word_break_properties"sv, "s_word_break_property_{}"sv, unicode_data.word_break_props); append_prop_list("s_sentence_break_properties"sv, "s_sentence_break_property_{}"sv, unicode_data.sentence_break_props); @@ -1343,9 +1337,6 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@) append_prop_search("Script"sv, "script_extension"sv, "s_script_extensions"sv); TRY(append_from_string("Script"sv, "script"sv, unicode_data.script_list, unicode_data.script_aliases)); - append_prop_search("Block"sv, "block"sv, "s_blocks"sv); - TRY(append_from_string("Block"sv, "block"sv, unicode_data.block_list, unicode_data.block_aliases)); - append_prop_search("GraphemeBreakProperty"sv, "grapheme_break_property"sv, "s_grapheme_break_properties"sv); append_prop_search("WordBreakProperty"sv, "word_break_property"sv, "s_word_break_properties"sv); append_prop_search("SentenceBreakProperty"sv, "sentence_break_property"sv, "s_sentence_break_properties"sv); @@ -1564,7 +1555,6 @@ ErrorOr serenity_main(Main::Arguments arguments) TRY(parse_prop_list(*scripts_file, unicode_data.script_list)); TRY(parse_prop_list(*script_extensions_file, unicode_data.script_extensions, true)); TRY(parse_block_display_names(*blocks_file, unicode_data)); - TRY(parse_prop_list(*blocks_file, unicode_data.block_list, false, true)); TRY(parse_name_aliases(*name_alias_file, unicode_data)); TRY(parse_prop_list(*grapheme_break_file, unicode_data.grapheme_break_props)); TRY(parse_prop_list(*word_break_file, unicode_data.word_break_props)); @@ -1574,7 +1564,6 @@ ErrorOr serenity_main(Main::Arguments arguments) TRY(parse_unicode_data(*unicode_data_file, unicode_data)); TRY(parse_value_alias_list(*prop_value_alias_file, "gc"sv, unicode_data.general_categories.keys(), unicode_data.general_category_aliases)); TRY(parse_value_alias_list(*prop_value_alias_file, "sc"sv, unicode_data.script_list.keys(), unicode_data.script_aliases, false)); - TRY(parse_value_alias_list(*prop_value_alias_file, "blk"sv, unicode_data.block_list.keys(), unicode_data.block_aliases, false, true)); TRY(normalize_script_extensions(unicode_data.script_extensions, unicode_data.script_list, unicode_data.script_aliases)); TRY(generate_unicode_data_header(*generated_header_file, unicode_data)); diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index 211b4369df6..d2ea8a54fe6 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -727,34 +727,6 @@ TEST_CASE(script) TEST_CASE(block) { - auto block = [](StringView name) { - auto block = Unicode::block_from_string(name); - VERIFY(block.has_value()); - return *block; - }; - - auto no_block = block("No_Block"sv); - auto block_nb = block("NB"sv); - EXPECT_EQ(no_block, block_nb); - - auto block_basic_latin = block("Basic_Latin"sv); - auto block_ascii = block("ASCII"sv); - EXPECT_EQ(block_basic_latin, block_ascii); - - auto block_greek_coptic = block("Greek_And_Coptic"sv); - auto block_greek = block("Greek"sv); - EXPECT_EQ(block_greek_coptic, block_greek); - - auto block_variation = block("Variation_Selectors_Supplement"sv); - auto block_vs_sup = block("VS_Sup"sv); - EXPECT_EQ(block_variation, block_vs_sup); - - for (u32 code_point = 0x0000; code_point <= 0x007F; ++code_point) - EXPECT(Unicode::code_point_has_block(code_point, block_basic_latin)); - - for (u32 code_point = 0xE0100; code_point <= 0xE01EF; ++code_point) - EXPECT(Unicode::code_point_has_block(code_point, block_variation)); - for (u32 code_point = 0x0000; code_point <= 0x007F; ++code_point) EXPECT_EQ("Basic Latin"sv, Unicode::code_point_block_display_name(code_point).value()); diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index 098f53d8b84..744b7464dcd 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -143,9 +143,6 @@ Optional