LibUnicode: Handle code points that are both cased and case-ignorable

Apparently, some code points fit both categories, for example U+0345
(COMBINING GREEK YPOGEGRAMMENI). Handle this fact when determining if
a code point is a final code point in a string.
This commit is contained in:
Timothy Flynn 2021-07-27 18:47:41 -04:00 committed by Andreas Kling
parent dff156b7c6
commit c4bfda7f7f
Notes: sideshowbarker 2024-07-18 07:57:03 +09:00
2 changed files with 5 additions and 1 deletions

View File

@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
EXPECT_EQ(result, "\u2170\u03C2");
// Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
result = Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv);
EXPECT_EQ(result, "\u0345\u03C3");
// Sigma preceded by A and FULL STOP
result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
EXPECT_EQ(result, "a.\u03C2");

View File

@ -95,7 +95,7 @@ static bool is_final_code_point(Utf8View const& string, size_t index, size_t byt
if (!unicode_data.has_value())
return false;
if (is_cased_letter(*unicode_data))
if (is_cased_letter(*unicode_data) && !is_case_ignorable(*unicode_data))
++cased_letter_count;
else if (!is_case_ignorable(*unicode_data))
cased_letter_count = 0;