LibUnicode: Canonicalize the subtag "yes" to "true"

This commit is contained in:
Timothy Flynn 2021-08-30 15:51:08 -04:00 committed by Linus Groh
parent 556374a904
commit f907a7dc38
Notes: sideshowbarker 2024-07-18 04:57:39 +09:00
2 changed files with 18 additions and 0 deletions

View File

@ -302,6 +302,10 @@ TEST_CASE(canonicalize_unicode_locale_id)
test("EN-U-CCC-BBB-2K-AAA-1K-BBB"sv, "en-u-bbb-ccc-1k-bbb-2k-aaa"sv); test("EN-U-CCC-BBB-2K-AAA-1K-BBB"sv, "en-u-bbb-ccc-1k-bbb-2k-aaa"sv);
test("en-u-1k-true"sv, "en-u-1k"sv); test("en-u-1k-true"sv, "en-u-1k"sv);
test("EN-U-1K-TRUE"sv, "en-u-1k"sv); test("EN-U-1K-TRUE"sv, "en-u-1k"sv);
test("en-u-kb-yes"sv, "en-u-kb"sv);
test("EN-U-KB-YES"sv, "en-u-kb"sv);
test("en-u-ka-yes"sv, "en-u-ka-yes"sv);
test("EN-U-KA-YES"sv, "en-u-ka-yes"sv);
test("en-t-en"sv, "en-t-en"sv); test("en-t-en"sv, "en-t-en"sv);
test("EN-T-EN"sv, "en-t-en"sv); test("EN-T-EN"sv, "en-t-en"sv);
@ -315,6 +319,8 @@ TEST_CASE(canonicalize_unicode_locale_id)
test("EN-T-EN-K2-BBB-K1-AAA"sv, "en-t-en-k1-aaa-k2-bbb"sv); test("EN-T-EN-K2-BBB-K1-AAA"sv, "en-t-en-k1-aaa-k2-bbb"sv);
test("en-t-k1-true"sv, "en-t-k1-true"sv); test("en-t-k1-true"sv, "en-t-k1-true"sv);
test("EN-T-K1-TRUE"sv, "en-t-k1-true"sv); test("EN-T-K1-TRUE"sv, "en-t-k1-true"sv);
test("en-t-k1-yes"sv, "en-t-k1-yes"sv);
test("EN-T-K1-YES"sv, "en-t-k1-yes"sv);
test("en-0-aaa"sv, "en-0-aaa"sv); test("en-0-aaa"sv, "en-0-aaa"sv);
test("EN-0-AAA"sv, "en-0-aaa"sv); test("EN-0-AAA"sv, "en-0-aaa"sv);

View File

@ -480,6 +480,17 @@ Optional<LocaleID> parse_unicode_locale_id(StringView locale)
return locale_id; return locale_id;
} }
static void perform_hard_coded_key_value_substitutions(String& key, String& value)
{
// FIXME: In the XML export of CLDR, there are some aliases defined in the following file:
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/collation.xml
//
// There doesn't seem to be a counterpart in the JSON export. Since there aren't many such
// aliases, until an XML parser is implemented, those aliases are implemented here.
if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv))
value = "true"sv;
}
static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
{ {
auto canonicalize_language = [](LanguageID& language_id, bool force_lowercase) { auto canonicalize_language = [](LanguageID& language_id, bool force_lowercase) {
@ -542,6 +553,7 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
for (auto& value : raw_values) { for (auto& value : raw_values) {
value = value.to_lowercase(); value = value.to_lowercase();
perform_hard_coded_key_value_substitutions(key, value);
// Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec // Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec
// because, for tvalues, that would result in invalid syntax: // because, for tvalues, that would result in invalid syntax: