LibUnicode: Fix Hangul syllable composition for specific cases

This fixes `combine_hangul_code_points` which would try to combine
a LVT syllable with a trailing consonant, resulting in a wrong
character.

Also added a test for this specific case.
This commit is contained in:
matcool 2022-10-06 14:09:51 -03:00 committed by Tim Flynn
parent ce0e4b71a3
commit 104b51b912
Notes: sideshowbarker 2024-07-17 06:11:17 +09:00
2 changed files with 3 additions and 1 deletions

View File

@ -63,6 +63,7 @@ TEST_CASE(normalize_nfc)
EXPECT_EQ(normalize("\u1103\u1161\u11B0"sv, NormalizationForm::NFC), ""sv);
EXPECT_EQ(normalize("\u1100\uAC00\u11A8"sv, NormalizationForm::NFC), "\u1100\uAC01"sv);
EXPECT_EQ(normalize("\u1103\u1161\u11B0\u11B0"sv, NormalizationForm::NFC), "\u11B0");
}
TEST_CASE(normalize_nfkd)

View File

@ -111,7 +111,8 @@ static u32 combine_hangul_code_points(u32 a, u32 b)
auto const leading_vowel_index = leading_index * HANGUL_BLOCK_COUNT + vowel_index * HANGUL_TRAILING_COUNT;
return HANGUL_SYLLABLE_BASE + leading_vowel_index;
}
if (is_hangul_code_point(a) && is_hangul_trailing(b)) {
// LV characters are the first in each "T block", so use this check to avoid combining LVT with T.
if (is_hangul_code_point(a) && (a - HANGUL_SYLLABLE_BASE) % HANGUL_TRAILING_COUNT == 0 && is_hangul_trailing(b)) {
return a + b - HANGUL_TRAILING_BASE;
}
return 0;