LibUnicode+Tests: Remove now unused to_unicode_*_full methods

Relocating all of the tests for these in LibUnicode over to the AK
String testsuite.
This commit is contained in:
Shannon Booth 2023-11-27 22:47:08 +13:00 committed by Tim Flynn
parent d1ed04a6cb
commit d777b279e3
Notes: sideshowbarker 2024-07-17 05:19:06 +09:00
4 changed files with 426 additions and 510 deletions

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -15,6 +16,7 @@
#include <AK/Try.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>
#include <ctype.h>
TEST_CASE(construct_empty)
{
@ -263,65 +265,439 @@ TEST_CASE(reverse)
test_reverse("ab😀cd"sv, "dc😀ba"sv);
}
TEST_CASE(to_lowercase)
TEST_CASE(to_lowercase_unconditional_special_casing)
{
{
auto string = "Aa"_string;
auto result = MUST(string.to_lowercase());
EXPECT_EQ(result, "aa"sv);
}
{
auto string = "Ωω"_string;
auto result = MUST(string.to_lowercase());
EXPECT_EQ(result, "ωω"sv);
}
{
auto string = "İi̇"_string;
auto result = MUST(string.to_lowercase());
EXPECT_EQ(result, "i̇i̇"sv);
}
// LATIN SMALL LETTER SHARP S
auto result = MUST("\u00DF"_string.to_lowercase());
EXPECT_EQ(result, "\u00DF");
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST("\u0130"_string.to_lowercase());
EXPECT_EQ(result, "\u0069\u0307");
// LATIN SMALL LIGATURE FF
result = MUST("\uFB00"_string.to_lowercase());
EXPECT_EQ(result, "\uFB00");
// LATIN SMALL LIGATURE FI
result = MUST("\uFB01"_string.to_lowercase());
EXPECT_EQ(result, "\uFB01");
// LATIN SMALL LIGATURE FL
result = MUST("\uFB02"_string.to_lowercase());
EXPECT_EQ(result, "\uFB02");
// LATIN SMALL LIGATURE FFI
result = MUST("\uFB03"_string.to_lowercase());
EXPECT_EQ(result, "\uFB03");
// LATIN SMALL LIGATURE FFL
result = MUST("\uFB04"_string.to_lowercase());
EXPECT_EQ(result, "\uFB04");
// LATIN SMALL LIGATURE LONG S T
result = MUST("\uFB05"_string.to_lowercase());
EXPECT_EQ(result, "\uFB05");
// LATIN SMALL LIGATURE ST
result = MUST("\uFB06"_string.to_lowercase());
EXPECT_EQ(result, "\uFB06");
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FB7"_string.to_lowercase());
EXPECT_EQ(result, "\u1FB7");
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FC7"_string.to_lowercase());
EXPECT_EQ(result, "\u1FC7");
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FF7"_string.to_lowercase());
EXPECT_EQ(result, "\u1FF7");
}
TEST_CASE(to_uppercase)
TEST_CASE(to_lowercase_special_casing_sigma)
{
{
auto string = "Aa"_string;
auto result = MUST(string.to_uppercase());
EXPECT_EQ(result, "AA"sv);
}
{
auto string = "Ωω"_string;
auto result = MUST(string.to_uppercase());
EXPECT_EQ(result, "ΩΩ"sv);
}
{
auto string = "ʼn"_string;
auto result = MUST(string.to_uppercase());
EXPECT_EQ(result, "ʼN"sv);
}
auto result = MUST("ABCI"_string.to_lowercase());
EXPECT_EQ(result, "abci");
// Sigma preceded by A
result = MUST("A\u03A3"_string.to_lowercase());
EXPECT_EQ(result, "a\u03C2");
// Sigma preceded by FEMININE ORDINAL INDICATOR
result = MUST("\u00AA\u03A3"_string.to_lowercase());
EXPECT_EQ(result, "\u00AA\u03C2");
// Sigma preceded by ROMAN NUMERAL ONE
result = MUST("\u2160\u03A3"_string.to_lowercase());
EXPECT_EQ(result, "\u2170\u03C2");
// Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
result = MUST("\u0345\u03A3"_string.to_lowercase());
EXPECT_EQ(result, "\u0345\u03C3");
// Sigma preceded by A and FULL STOP
result = MUST("A.\u03A3"_string.to_lowercase());
EXPECT_EQ(result, "a.\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
result = MUST("A\u180E\u03A3"_string.to_lowercase());
EXPECT_EQ(result, "a\u180E\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
result = MUST("A\u180E\u03A3B"_string.to_lowercase());
EXPECT_EQ(result, "a\u180E\u03C3b");
// Sigma followed by A
result = MUST("\u03A3A"_string.to_lowercase());
EXPECT_EQ(result, "\u03C3a");
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
result = MUST("A\u03A3\u180E"_string.to_lowercase());
EXPECT_EQ(result, "a\u03C2\u180E");
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
result = MUST("A\u03A3\u180EB"_string.to_lowercase());
EXPECT_EQ(result, "a\u03C3\u180Eb");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
result = MUST("A\u180E\u03A3\u180E"_string.to_lowercase());
EXPECT_EQ(result, "a\u180E\u03C2\u180E");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
result = MUST("A\u180E\u03A3\u180EB"_string.to_lowercase());
EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
}
TEST_CASE(to_lowercase_special_casing_i)
{
// LATIN CAPITAL LETTER I
auto result = MUST("I"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST("I"_string.to_lowercase("az"sv));
EXPECT_EQ(result, "\u0131"sv);
result = MUST("I"_string.to_lowercase("tr"sv));
EXPECT_EQ(result, "\u0131"sv);
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST("\u0130"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "\u0069\u0307"sv);
result = MUST("\u0130"_string.to_lowercase("az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST("\u0130"_string.to_lowercase("tr"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
result = MUST("I\u0307"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "i\u0307"sv);
result = MUST("I\u0307"_string.to_lowercase("az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST("I\u0307"_string.to_lowercase("tr"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER I followed by combining class 0 and COMBINING DOT ABOVE
result = MUST("IA\u0307"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "ia\u0307"sv);
result = MUST("IA\u0307"_string.to_lowercase("az"sv));
EXPECT_EQ(result, "\u0131a\u0307"sv);
result = MUST("IA\u0307"_string.to_lowercase("tr"sv));
EXPECT_EQ(result, "\u0131a\u0307"sv);
}
TEST_CASE(to_lowercase_special_casing_more_above)
{
// LATIN CAPITAL LETTER I
auto result = MUST("I"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST("I"_string.to_lowercase("lt"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER J
result = MUST("J"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "j"sv);
result = MUST("J"_string.to_lowercase("lt"sv));
EXPECT_EQ(result, "j"sv);
// LATIN CAPITAL LETTER I WITH OGONEK
result = MUST("\u012e"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "\u012f"sv);
result = MUST("\u012e"_string.to_lowercase("lt"sv));
EXPECT_EQ(result, "\u012f"sv);
// LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT
result = MUST("I\u0300"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "i\u0300"sv);
result = MUST("I\u0300"_string.to_lowercase("lt"sv));
EXPECT_EQ(result, "i\u0307\u0300"sv);
// LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT
result = MUST("J\u0300"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "j\u0300"sv);
result = MUST("J\u0300"_string.to_lowercase("lt"sv));
EXPECT_EQ(result, "j\u0307\u0300"sv);
// LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT
result = MUST("\u012e\u0300"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "\u012f\u0300"sv);
result = MUST("\u012e\u0300"_string.to_lowercase("lt"sv));
EXPECT_EQ(result, "\u012f\u0307\u0300"sv);
}
TEST_CASE(to_lowercase_special_casing_not_before_dot)
{
// LATIN CAPITAL LETTER I
auto result = MUST("I"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST("I"_string.to_lowercase("az"sv));
EXPECT_EQ(result, "\u0131"sv);
result = MUST("I"_string.to_lowercase("tr"sv));
EXPECT_EQ(result, "\u0131"sv);
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
result = MUST("I\u0307"_string.to_lowercase("en"sv));
EXPECT_EQ(result, "i\u0307"sv);
result = MUST("I\u0307"_string.to_lowercase("az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST("I\u0307"_string.to_lowercase("tr"sv));
EXPECT_EQ(result, "i"sv);
}
TEST_CASE(to_uppercase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST("\u00DF"_string.to_uppercase());
EXPECT_EQ(result, "\u0053\u0053");
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST("\u0130"_string.to_uppercase());
EXPECT_EQ(result, "\u0130");
// LATIN SMALL LIGATURE FF
result = MUST("\uFB00"_string.to_uppercase());
EXPECT_EQ(result, "\u0046\u0046");
// LATIN SMALL LIGATURE FI
result = MUST("\uFB01"_string.to_uppercase());
EXPECT_EQ(result, "\u0046\u0049");
// LATIN SMALL LIGATURE FL
result = MUST("\uFB02"_string.to_uppercase());
EXPECT_EQ(result, "\u0046\u004C");
// LATIN SMALL LIGATURE FFI
result = MUST("\uFB03"_string.to_uppercase());
EXPECT_EQ(result, "\u0046\u0046\u0049");
// LATIN SMALL LIGATURE FFL
result = MUST("\uFB04"_string.to_uppercase());
EXPECT_EQ(result, "\u0046\u0046\u004C");
// LATIN SMALL LIGATURE LONG S T
result = MUST("\uFB05"_string.to_uppercase());
EXPECT_EQ(result, "\u0053\u0054");
// LATIN SMALL LIGATURE ST
result = MUST("\uFB06"_string.to_uppercase());
EXPECT_EQ(result, "\u0053\u0054");
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
result = MUST("\u0390"_string.to_uppercase());
EXPECT_EQ(result, "\u0399\u0308\u0301");
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
result = MUST("\u03B0"_string.to_uppercase());
EXPECT_EQ(result, "\u03A5\u0308\u0301");
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FB7"_string.to_uppercase());
EXPECT_EQ(result, "\u0391\u0342\u0399");
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FC7"_string.to_uppercase());
EXPECT_EQ(result, "\u0397\u0342\u0399");
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FF7"_string.to_uppercase());
EXPECT_EQ(result, "\u03A9\u0342\u0399");
}
TEST_CASE(to_uppercase_special_casing_soft_dotted)
{
// LATIN SMALL LETTER I
auto result = MUST("i"_string.to_uppercase("en"sv));
EXPECT_EQ(result, "I"sv);
result = MUST("i"_string.to_uppercase("lt"sv));
EXPECT_EQ(result, "I"sv);
// LATIN SMALL LETTER J
result = MUST("j"_string.to_uppercase("en"sv));
EXPECT_EQ(result, "J"sv);
result = MUST("j"_string.to_uppercase("lt"sv));
EXPECT_EQ(result, "J"sv);
// LATIN SMALL LETTER I followed by COMBINING DOT ABOVE
result = MUST("i\u0307"_string.to_uppercase("en"sv));
EXPECT_EQ(result, "I\u0307"sv);
result = MUST("i\u0307"_string.to_uppercase("lt"sv));
EXPECT_EQ(result, "I"sv);
// LATIN SMALL LETTER J followed by COMBINING DOT ABOVE
result = MUST("j\u0307"_string.to_uppercase("en"sv));
EXPECT_EQ(result, "J\u0307"sv);
result = MUST("j\u0307"_string.to_uppercase("lt"sv));
EXPECT_EQ(result, "J"sv);
}
TEST_CASE(to_titlecase)
{
{
auto string = "foo bar baz"_string;
auto result = MUST(string.to_titlecase());
EXPECT_EQ(result, "Foo Bar Baz"sv);
EXPECT_EQ(MUST(""_string.to_titlecase()), ""sv);
EXPECT_EQ(MUST(" "_string.to_titlecase()), " "sv);
EXPECT_EQ(MUST(" - "_string.to_titlecase()), " - "sv);
EXPECT_EQ(MUST("a"_string.to_titlecase()), "A"sv);
EXPECT_EQ(MUST("A"_string.to_titlecase()), "A"sv);
EXPECT_EQ(MUST(" a"_string.to_titlecase()), " A"sv);
EXPECT_EQ(MUST("a "_string.to_titlecase()), "A "sv);
EXPECT_EQ(MUST("ab"_string.to_titlecase()), "Ab"sv);
EXPECT_EQ(MUST("Ab"_string.to_titlecase()), "Ab"sv);
EXPECT_EQ(MUST("aB"_string.to_titlecase()), "Ab"sv);
EXPECT_EQ(MUST("AB"_string.to_titlecase()), "Ab"sv);
EXPECT_EQ(MUST(" ab"_string.to_titlecase()), " Ab"sv);
EXPECT_EQ(MUST("ab "_string.to_titlecase()), "Ab "sv);
EXPECT_EQ(MUST("foo bar baz"_string.to_titlecase()), "Foo Bar Baz"sv);
EXPECT_EQ(MUST("foo \n \r bar \t baz"_string.to_titlecase()), "Foo \n \r Bar \t Baz"sv);
EXPECT_EQ(MUST("f\"oo\" b'ar'"_string.to_titlecase()), "F\"Oo\" B'ar'"sv);
EXPECT_EQ(MUST("123dollars"_string.to_titlecase()), "123Dollars"sv);
}
TEST_CASE(to_casefold)
{
for (u8 code_point = 0; code_point < 0x80; ++code_point) {
auto ascii = tolower(code_point);
auto unicode = MUST(MUST(String::from_utf8({ reinterpret_cast<char const*>(&code_point), 1 })).to_casefold());
EXPECT_EQ(unicode.bytes_as_string_view().length(), 1u);
EXPECT_EQ(unicode.bytes_as_string_view()[0], ascii);
}
{
auto string = "foo \n \r bar \t baz"_string;
auto result = MUST(string.to_titlecase());
EXPECT_EQ(result, "Foo \n \r Bar \t Baz"sv);
}
{
auto string = "f\"oo\" b'ar'"_string;
auto result = MUST(string.to_titlecase());
EXPECT_EQ(result, "F\"Oo\" B'ar'"sv);
}
{
auto string = "123dollars"_string;
auto result = MUST(string.to_titlecase());
EXPECT_EQ(result, "123Dollars"sv);
// LATIN SMALL LETTER SHARP S
auto result = MUST("\u00DF"_string.to_casefold());
EXPECT_EQ(result, "\u0073\u0073"sv);
// GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
result = MUST("\u1FB3"_string.to_casefold());
EXPECT_EQ(result, "\u03B1\u03B9"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI
result = MUST("\u1FB6"_string.to_casefold());
EXPECT_EQ(result, "\u03B1\u0342"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FB7"_string.to_casefold());
EXPECT_EQ(result, "\u03B1\u0342\u03B9"sv);
}
TEST_CASE(to_titlecase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST("\u00DF"_string.to_titlecase());
EXPECT_EQ(result, "\u0053\u0073"sv);
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST("\u0130"_string.to_titlecase());
EXPECT_EQ(result, "\u0130"sv);
// LATIN SMALL LIGATURE FF
result = MUST("\uFB00"_string.to_titlecase());
EXPECT_EQ(result, "\u0046\u0066"sv);
// LATIN SMALL LIGATURE FI
result = MUST("\uFB01"_string.to_titlecase());
EXPECT_EQ(result, "\u0046\u0069"sv);
// LATIN SMALL LIGATURE FL
result = MUST("\uFB02"_string.to_titlecase());
EXPECT_EQ(result, "\u0046\u006C"sv);
// LATIN SMALL LIGATURE FFI
result = MUST("\uFB03"_string.to_titlecase());
EXPECT_EQ(result, "\u0046\u0066\u0069"sv);
// LATIN SMALL LIGATURE FFL
result = MUST("\uFB04"_string.to_titlecase());
EXPECT_EQ(result, "\u0046\u0066\u006C"sv);
// LATIN SMALL LIGATURE LONG S T
result = MUST("\uFB05"_string.to_titlecase());
EXPECT_EQ(result, "\u0053\u0074"sv);
// LATIN SMALL LIGATURE ST
result = MUST("\uFB06"_string.to_titlecase());
EXPECT_EQ(result, "\u0053\u0074"sv);
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
result = MUST("\u0390"_string.to_titlecase());
EXPECT_EQ(result, "\u0399\u0308\u0301"sv);
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
result = MUST("\u03B0"_string.to_titlecase());
EXPECT_EQ(result, "\u03A5\u0308\u0301"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FB7"_string.to_titlecase());
EXPECT_EQ(result, "\u0391\u0342\u0345"sv);
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FC7"_string.to_titlecase());
EXPECT_EQ(result, "\u0397\u0342\u0345"sv);
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST("\u1FF7"_string.to_titlecase());
EXPECT_EQ(result, "\u03A9\u0342\u0345"sv);
}
TEST_CASE(to_titlecase_special_casing_i)
{
// LATIN SMALL LETTER I
auto result = MUST("i"_string.to_titlecase("en"sv));
EXPECT_EQ(result, "I"sv);
result = MUST("i"_string.to_titlecase("az"sv));
EXPECT_EQ(result, "\u0130"sv);
result = MUST("i"_string.to_titlecase("tr"sv));
EXPECT_EQ(result, "\u0130"sv);
}
BENCHMARK_CASE(casefold)
{
for (size_t i = 0; i < 50'000; ++i) {
__test_to_casefold();
}
}

View File

@ -74,54 +74,6 @@ TEST_CASE(to_unicode_titlecase)
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj"
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj"
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz"
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(""sv)), ""sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" "sv)), " "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" - "sv)), " - "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a"sv)), "A"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("A"sv)), "A"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" a"sv)), " A"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a "sv)), "A "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("Ab"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("aB"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("AB"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" ab"sv)), " Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab "sv)), "Ab "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo bar baz"sv)), "Foo Bar Baz"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo \n \r bar \t baz"sv)), "Foo \n \r Bar \t Baz"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("f\"oo\" b'ar'"sv)), "F\"Oo\" B'ar'"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("123dollars"sv)), "123Dollars"sv);
}
TEST_CASE(to_unicode_casefold)
{
for (u8 code_point = 0; code_point < 0x80; ++code_point) {
auto ascii = tolower(code_point);
auto unicode = MUST(Unicode::to_unicode_casefold_full({ reinterpret_cast<char const*>(&code_point), 1 }));
EXPECT_EQ(unicode.bytes_as_string_view().length(), 1u);
EXPECT_EQ(unicode.bytes_as_string_view()[0], ascii);
}
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_casefold_full("\u00DF"sv));
EXPECT_EQ(result, "\u0073\u0073"sv);
// GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_casefold_full("\u1FB3"sv));
EXPECT_EQ(result, "\u03B1\u03B9"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI
result = MUST(Unicode::to_unicode_casefold_full("\u1FB6"sv));
EXPECT_EQ(result, "\u03B1\u0342"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_casefold_full("\u1FB7"sv));
EXPECT_EQ(result, "\u03B1\u0342\u03B9"sv);
}
BENCHMARK_CASE(casing)
@ -130,388 +82,9 @@ BENCHMARK_CASE(casing)
__test_to_unicode_lowercase();
__test_to_unicode_uppercase();
__test_to_unicode_titlecase();
__test_to_unicode_casefold();
}
}
TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_lowercase_full("\u00DF"sv));
EXPECT_EQ(result, "\u00DF");
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv));
EXPECT_EQ(result, "\u0069\u0307");
// LATIN SMALL LIGATURE FF
result = MUST(Unicode::to_unicode_lowercase_full("\uFB00"sv));
EXPECT_EQ(result, "\uFB00");
// LATIN SMALL LIGATURE FI
result = MUST(Unicode::to_unicode_lowercase_full("\uFB01"sv));
EXPECT_EQ(result, "\uFB01");
// LATIN SMALL LIGATURE FL
result = MUST(Unicode::to_unicode_lowercase_full("\uFB02"sv));
EXPECT_EQ(result, "\uFB02");
// LATIN SMALL LIGATURE FFI
result = MUST(Unicode::to_unicode_lowercase_full("\uFB03"sv));
EXPECT_EQ(result, "\uFB03");
// LATIN SMALL LIGATURE FFL
result = MUST(Unicode::to_unicode_lowercase_full("\uFB04"sv));
EXPECT_EQ(result, "\uFB04");
// LATIN SMALL LIGATURE LONG S T
result = MUST(Unicode::to_unicode_lowercase_full("\uFB05"sv));
EXPECT_EQ(result, "\uFB05");
// LATIN SMALL LIGATURE ST
result = MUST(Unicode::to_unicode_lowercase_full("\uFB06"sv));
EXPECT_EQ(result, "\uFB06");
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u1FB7"sv));
EXPECT_EQ(result, "\u1FB7");
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u1FC7"sv));
EXPECT_EQ(result, "\u1FC7");
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u1FF7"sv));
EXPECT_EQ(result, "\u1FF7");
}
TEST_CASE(to_unicode_lowercase_special_casing_sigma)
{
auto result = MUST(Unicode::to_unicode_lowercase_full("ABCI"sv));
EXPECT_EQ(result, "abci");
// Sigma preceded by A
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3"sv));
EXPECT_EQ(result, "a\u03C2");
// Sigma preceded by FEMININE ORDINAL INDICATOR
result = MUST(Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv));
EXPECT_EQ(result, "\u00AA\u03C2");
// Sigma preceded by ROMAN NUMERAL ONE
result = MUST(Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv));
EXPECT_EQ(result, "\u2170\u03C2");
// Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv));
EXPECT_EQ(result, "\u0345\u03C3");
// Sigma preceded by A and FULL STOP
result = MUST(Unicode::to_unicode_lowercase_full("A.\u03A3"sv));
EXPECT_EQ(result, "a.\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv));
EXPECT_EQ(result, "a\u180E\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3B"sv));
EXPECT_EQ(result, "a\u180E\u03C3b");
// Sigma followed by A
result = MUST(Unicode::to_unicode_lowercase_full("\u03A3A"sv));
EXPECT_EQ(result, "\u03C3a");
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180E"sv));
EXPECT_EQ(result, "a\u03C2\u180E");
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180EB"sv));
EXPECT_EQ(result, "a\u03C3\u180Eb");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180E"sv));
EXPECT_EQ(result, "a\u180E\u03C2\u180E");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180EB"sv));
EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
}
TEST_CASE(to_unicode_lowercase_special_casing_i)
{
// LATIN CAPITAL LETTER I
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
EXPECT_EQ(result, "\u0131"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
EXPECT_EQ(result, "\u0131"sv);
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "en"sv));
EXPECT_EQ(result, "\u0069\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "tr"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
EXPECT_EQ(result, "i\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER I followed by combining class 0 and COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "en"sv));
EXPECT_EQ(result, "ia\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "az"sv));
EXPECT_EQ(result, "\u0131a\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "tr"sv));
EXPECT_EQ(result, "\u0131a\u0307"sv);
}
TEST_CASE(to_unicode_lowercase_special_casing_more_above)
{
// LATIN CAPITAL LETTER I
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "lt"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER J
result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "en"sv));
EXPECT_EQ(result, "j"sv);
result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "lt"sv));
EXPECT_EQ(result, "j"sv);
// LATIN CAPITAL LETTER I WITH OGONEK
result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "en"sv));
EXPECT_EQ(result, "\u012f"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "lt"sv));
EXPECT_EQ(result, "\u012f"sv);
// LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT
result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "en"sv));
EXPECT_EQ(result, "i\u0300"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "lt"sv));
EXPECT_EQ(result, "i\u0307\u0300"sv);
// LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT
result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "en"sv));
EXPECT_EQ(result, "j\u0300"sv);
result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "lt"sv));
EXPECT_EQ(result, "j\u0307\u0300"sv);
// LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT
result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "en"sv));
EXPECT_EQ(result, "\u012f\u0300"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "lt"sv));
EXPECT_EQ(result, "\u012f\u0307\u0300"sv);
}
TEST_CASE(to_unicode_lowercase_special_casing_not_before_dot)
{
// LATIN CAPITAL LETTER I
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
EXPECT_EQ(result, "\u0131"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
EXPECT_EQ(result, "\u0131"sv);
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
EXPECT_EQ(result, "i\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
EXPECT_EQ(result, "i"sv);
}
TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_uppercase_full("\u00DF"sv));
EXPECT_EQ(result, "\u0053\u0053");
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_uppercase_full("\u0130"sv));
EXPECT_EQ(result, "\u0130");
// LATIN SMALL LIGATURE FF
result = MUST(Unicode::to_unicode_uppercase_full("\uFB00"sv));
EXPECT_EQ(result, "\u0046\u0046");
// LATIN SMALL LIGATURE FI
result = MUST(Unicode::to_unicode_uppercase_full("\uFB01"sv));
EXPECT_EQ(result, "\u0046\u0049");
// LATIN SMALL LIGATURE FL
result = MUST(Unicode::to_unicode_uppercase_full("\uFB02"sv));
EXPECT_EQ(result, "\u0046\u004C");
// LATIN SMALL LIGATURE FFI
result = MUST(Unicode::to_unicode_uppercase_full("\uFB03"sv));
EXPECT_EQ(result, "\u0046\u0046\u0049");
// LATIN SMALL LIGATURE FFL
result = MUST(Unicode::to_unicode_uppercase_full("\uFB04"sv));
EXPECT_EQ(result, "\u0046\u0046\u004C");
// LATIN SMALL LIGATURE LONG S T
result = MUST(Unicode::to_unicode_uppercase_full("\uFB05"sv));
EXPECT_EQ(result, "\u0053\u0054");
// LATIN SMALL LIGATURE ST
result = MUST(Unicode::to_unicode_uppercase_full("\uFB06"sv));
EXPECT_EQ(result, "\u0053\u0054");
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_uppercase_full("\u0390"sv));
EXPECT_EQ(result, "\u0399\u0308\u0301");
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_uppercase_full("\u03B0"sv));
EXPECT_EQ(result, "\u03A5\u0308\u0301");
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_uppercase_full("\u1FB7"sv));
EXPECT_EQ(result, "\u0391\u0342\u0399");
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_uppercase_full("\u1FC7"sv));
EXPECT_EQ(result, "\u0397\u0342\u0399");
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_uppercase_full("\u1FF7"sv));
EXPECT_EQ(result, "\u03A9\u0342\u0399");
}
TEST_CASE(to_unicode_uppercase_special_casing_soft_dotted)
{
// LATIN SMALL LETTER I
auto result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "en"sv));
EXPECT_EQ(result, "I"sv);
result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "lt"sv));
EXPECT_EQ(result, "I"sv);
// LATIN SMALL LETTER J
result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "en"sv));
EXPECT_EQ(result, "J"sv);
result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "lt"sv));
EXPECT_EQ(result, "J"sv);
// LATIN SMALL LETTER I followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "en"sv));
EXPECT_EQ(result, "I\u0307"sv);
result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "lt"sv));
EXPECT_EQ(result, "I"sv);
// LATIN SMALL LETTER J followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "en"sv));
EXPECT_EQ(result, "J\u0307"sv);
result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "lt"sv));
EXPECT_EQ(result, "J"sv);
}
TEST_CASE(to_unicode_titlecase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_titlecase_full("\u00DF"sv));
EXPECT_EQ(result, "\u0053\u0073"sv);
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_titlecase_full("\u0130"sv));
EXPECT_EQ(result, "\u0130"sv);
// LATIN SMALL LIGATURE FF
result = MUST(Unicode::to_unicode_titlecase_full("\uFB00"sv));
EXPECT_EQ(result, "\u0046\u0066"sv);
// LATIN SMALL LIGATURE FI
result = MUST(Unicode::to_unicode_titlecase_full("\uFB01"sv));
EXPECT_EQ(result, "\u0046\u0069"sv);
// LATIN SMALL LIGATURE FL
result = MUST(Unicode::to_unicode_titlecase_full("\uFB02"sv));
EXPECT_EQ(result, "\u0046\u006C"sv);
// LATIN SMALL LIGATURE FFI
result = MUST(Unicode::to_unicode_titlecase_full("\uFB03"sv));
EXPECT_EQ(result, "\u0046\u0066\u0069"sv);
// LATIN SMALL LIGATURE FFL
result = MUST(Unicode::to_unicode_titlecase_full("\uFB04"sv));
EXPECT_EQ(result, "\u0046\u0066\u006C"sv);
// LATIN SMALL LIGATURE LONG S T
result = MUST(Unicode::to_unicode_titlecase_full("\uFB05"sv));
EXPECT_EQ(result, "\u0053\u0074"sv);
// LATIN SMALL LIGATURE ST
result = MUST(Unicode::to_unicode_titlecase_full("\uFB06"sv));
EXPECT_EQ(result, "\u0053\u0074"sv);
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_titlecase_full("\u0390"sv));
EXPECT_EQ(result, "\u0399\u0308\u0301"sv);
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_titlecase_full("\u03B0"sv));
EXPECT_EQ(result, "\u03A5\u0308\u0301"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_titlecase_full("\u1FB7"sv));
EXPECT_EQ(result, "\u0391\u0342\u0345"sv);
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_titlecase_full("\u1FC7"sv));
EXPECT_EQ(result, "\u0397\u0342\u0345"sv);
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_titlecase_full("\u1FF7"sv));
EXPECT_EQ(result, "\u03A9\u0342\u0345"sv);
}
TEST_CASE(to_unicode_titlecase_special_casing_i)
{
// LATIN SMALL LETTER I
auto result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "en"sv));
EXPECT_EQ(result, "I"sv);
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "az"sv));
EXPECT_EQ(result, "\u0130"sv);
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "tr"sv));
EXPECT_EQ(result, "\u0130"sv);
}
TEST_CASE(general_category)
{
auto general_category = [](StringView name) {

View File

@ -41,34 +41,6 @@ u32 __attribute__((weak)) to_unicode_titlecase(u32 code_point)
return to_ascii_uppercase(code_point);
}
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView string, Optional<StringView> const& locale)
{
StringBuilder builder;
TRY(Detail::build_lowercase_string(Utf8View { string }, builder, locale));
return builder.to_deprecated_string();
}
ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView string, Optional<StringView> const& locale)
{
StringBuilder builder;
TRY(Detail::build_uppercase_string(Utf8View { string }, builder, locale));
return builder.to_deprecated_string();
}
ErrorOr<String> to_unicode_titlecase_full(StringView string, Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation)
{
StringBuilder builder;
TRY(Detail::build_titlecase_string(Utf8View { string }, builder, locale, trailing_code_point_transformation));
return builder.to_string();
}
ErrorOr<String> to_unicode_casefold_full(StringView string)
{
StringBuilder builder;
TRY(Detail::build_casefold_string(Utf8View { string }, builder));
return builder.to_string();
}
template<typename ViewType>
class CasefoldStringComparator {
public:

View File

@ -48,11 +48,6 @@ u32 to_unicode_lowercase(u32 code_point);
u32 to_unicode_uppercase(u32 code_point);
u32 to_unicode_titlecase(u32 code_point);
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView, Optional<StringView> const& locale = {});
ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView, Optional<StringView> const& locale = {});
ErrorOr<String> to_unicode_titlecase_full(StringView, Optional<StringView> const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase);
ErrorOr<String> to_unicode_casefold_full(StringView);
template<typename ViewType>
bool equals_ignoring_case(ViewType, ViewType);