From 104b51b9122f0cb142a07bd9a1dc2106fe817519 Mon Sep 17 00:00:00 2001 From: matcool <26722564+matcool@users.noreply.github.com> Date: Thu, 6 Oct 2022 14:09:51 -0300 Subject: [PATCH] LibUnicode: Fix Hangul syllable composition for specific cases This fixes `combine_hangul_code_points` which would try to combine a LVT syllable with a trailing consonant, resulting in a wrong character. Also added a test for this specific case. --- Tests/LibUnicode/TestUnicodeNormalization.cpp | 1 + Userland/Libraries/LibUnicode/Normalize.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Tests/LibUnicode/TestUnicodeNormalization.cpp b/Tests/LibUnicode/TestUnicodeNormalization.cpp index 8157b6d9660..daef39d7652 100644 --- a/Tests/LibUnicode/TestUnicodeNormalization.cpp +++ b/Tests/LibUnicode/TestUnicodeNormalization.cpp @@ -63,6 +63,7 @@ TEST_CASE(normalize_nfc) EXPECT_EQ(normalize("\u1103\u1161\u11B0"sv, NormalizationForm::NFC), "닭"sv); EXPECT_EQ(normalize("\u1100\uAC00\u11A8"sv, NormalizationForm::NFC), "\u1100\uAC01"sv); + EXPECT_EQ(normalize("\u1103\u1161\u11B0\u11B0"sv, NormalizationForm::NFC), "닭\u11B0"); } TEST_CASE(normalize_nfkd) diff --git a/Userland/Libraries/LibUnicode/Normalize.cpp b/Userland/Libraries/LibUnicode/Normalize.cpp index 8f5a59f7c3c..bc1ab70221e 100644 --- a/Userland/Libraries/LibUnicode/Normalize.cpp +++ b/Userland/Libraries/LibUnicode/Normalize.cpp @@ -111,7 +111,8 @@ static u32 combine_hangul_code_points(u32 a, u32 b) auto const leading_vowel_index = leading_index * HANGUL_BLOCK_COUNT + vowel_index * HANGUL_TRAILING_COUNT; return HANGUL_SYLLABLE_BASE + leading_vowel_index; } - if (is_hangul_code_point(a) && is_hangul_trailing(b)) { + // LV characters are the first in each "T block", so use this check to avoid combining LVT with T. + if (is_hangul_code_point(a) && (a - HANGUL_SYLLABLE_BASE) % HANGUL_TRAILING_COUNT == 0 && is_hangul_trailing(b)) { return a + b - HANGUL_TRAILING_BASE; } return 0;