diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index 8a3cbac7c6a..b3e0f4c6b6c 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -113,6 +113,7 @@ struct UnicodeData { u32 simple_uppercase_mapping_size { 0 }; u32 simple_lowercase_mapping_size { 0 }; + u32 simple_titlecase_mapping_size { 0 }; Vector special_casing; u32 code_points_with_special_casing { 0 }; @@ -674,6 +675,7 @@ static ErrorOr parse_unicode_data(Core::Stream::BufferedFile& file, Unicod unicode_data.code_points_with_non_zero_combining_class += data.canonical_combining_class != 0; unicode_data.simple_uppercase_mapping_size += data.simple_uppercase_mapping.has_value(); unicode_data.simple_lowercase_mapping_size += data.simple_lowercase_mapping.has_value(); + unicode_data.simple_titlecase_mapping_size += data.simple_titlecase_mapping.has_value(); unicode_data.code_points_with_decomposition_mapping += data.decomposition_mapping.has_value(); unicode_data.code_points_with_special_casing += has_special_casing; @@ -978,6 +980,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { }); append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; }); append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; }); + append_code_point_mappings("titlecase"sv, "CodePointMapping"sv, unicode_data.simple_titlecase_mapping_size, [](auto const& data) { return data.simple_titlecase_mapping; }); append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; }); append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; }); @@ -1138,6 +1141,7 @@ u32 @method@(u32 code_point) append_code_point_mapping_search("canonical_combining_class"sv, "s_combining_class_mappings"sv, "0"sv); append_code_point_mapping_search("to_unicode_uppercase"sv, "s_uppercase_mappings"sv, "code_point"sv); append_code_point_mapping_search("to_unicode_lowercase"sv, "s_lowercase_mappings"sv, "code_point"sv); + append_code_point_mapping_search("to_unicode_titlecase"sv, "s_titlecase_mappings"sv, "code_point"sv); generator.append(R"~~~( Span special_case_mapping(u32 code_point) diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index d6eb43e103e..dd6f5c4654e 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -48,6 +48,32 @@ TEST_CASE(to_unicode_uppercase) EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u); EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u); EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu); + + // Code points whose uppercase and titlecase mappings actually differ. + EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c6u), 0x01c4u); // "dž" to "DŽ" + EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c9u), 0x01c7u); // "lj" to "LJ" + EXPECT_EQ(Unicode::to_unicode_uppercase(0x01ccu), 0x01cau); // "nj" to "NJ" + EXPECT_EQ(Unicode::to_unicode_uppercase(0x01f3u), 0x01f1u); // "dz" to "DZ" +} + +TEST_CASE(to_unicode_titlecase) +{ + compare_to_ascii(toupper, Unicode::to_unicode_titlecase); + + EXPECT_EQ(Unicode::to_unicode_titlecase(0x03c9u), 0x03a9u); // "ω" to "Ω" + EXPECT_EQ(Unicode::to_unicode_titlecase(0x03a9u), 0x03a9u); // "Ω" to "Ω" + + // Code points encoded by ranges in UnicodeData.txt + EXPECT_EQ(Unicode::to_unicode_titlecase(0x3400u), 0x3400u); + EXPECT_EQ(Unicode::to_unicode_titlecase(0x3401u), 0x3401u); + EXPECT_EQ(Unicode::to_unicode_titlecase(0x3402u), 0x3402u); + EXPECT_EQ(Unicode::to_unicode_titlecase(0x4dbfu), 0x4dbfu); + + // Code points whose uppercase and titlecase mappings actually differ. + EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c6u), 0x01c5u); // "dž" to "Dž" + EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj" + EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj" + EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz" } TEST_CASE(to_unicode_lowercase_unconditional_special_casing) diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index 4b569ff6f4f..4ab8b9a6915 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -38,6 +38,11 @@ u32 __attribute__((weak)) to_unicode_uppercase(u32 code_point) return to_ascii_uppercase(code_point); } +u32 __attribute__((weak)) to_unicode_titlecase(u32 code_point) +{ + return to_ascii_uppercase(code_point); +} + ErrorOr to_unicode_lowercase_full(StringView string, Optional const& locale) { StringBuilder builder; diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.h b/Userland/Libraries/LibUnicode/CharacterTypes.h index 553a1b7b49b..04ce644d301 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.h +++ b/Userland/Libraries/LibUnicode/CharacterTypes.h @@ -38,6 +38,7 @@ u32 canonical_combining_class(u32 code_point); // Use the full-string transformations for full case folding. u32 to_unicode_lowercase(u32 code_point); u32 to_unicode_uppercase(u32 code_point); +u32 to_unicode_titlecase(u32 code_point); ErrorOr to_unicode_lowercase_full(StringView, Optional const& locale = {}); ErrorOr to_unicode_uppercase_full(StringView, Optional const& locale = {});