LibUnicode: Parse the CLDR's defaultContent.json locale list

This file contains the list of locales which default to their parent
locale's values. In the core CLDR dataset, these locales have their own
files, but they are empty (except for identity data). For example:

https://github.com/unicode-org/cldr/blob/main/common/main/en_US.xml

In the JSON export, these files are excluded, so we currently are not
recognizing these locales just by iterating the locale files.

This is a prerequisite for upgrading to CLDR version 40. One of these
default-content locales is the popular "en-US" locale, which defaults to
"en" values. We were previously inferring the existence of this locale
from the "en-US-POSIX" locale (many implementations, including ours,
strip variants such as POSIX). However, v40 removes the "en-US-POSIX"
locale entirely, meaning that without this change, we wouldn't know that
"en-US" exists (we would default to "en").

For more detail on this and other v40 changes, see:
https://cldr.unicode.org/index/downloads/cldr-40#h.nssoo2lq3cba
This commit is contained in:
Timothy Flynn 2021-11-09 13:32:23 -05:00 committed by Andreas Kling
parent 91881be4b0
commit 357c97dfa8
Notes: sideshowbarker 2024-07-18 01:21:46 +09:00
2 changed files with 42 additions and 1 deletions

View File

@ -464,6 +464,39 @@ static void parse_numeric_keywords(String locale_numbers_path, UnicodeLocaleData
locale_data.keywords.append(key);
}
static void parse_default_content_locales(String core_path, UnicodeLocaleData& locale_data)
{
LexicalPath default_content_path(move(core_path));
default_content_path = default_content_path.append("defaultContent.json"sv);
VERIFY(Core::File::exists(default_content_path.string()));
auto default_content_file_or_error = Core::File::open(default_content_path.string(), Core::OpenMode::ReadOnly);
VERIFY(!default_content_file_or_error.is_error());
auto default_content = JsonParser(default_content_file_or_error.value()->read_all()).parse();
VERIFY(default_content.has_value());
auto const& default_content_array = default_content->as_object().get("defaultContent"sv);
default_content_array.as_array().for_each([&](JsonValue const& value) {
auto locale = value.as_string();
StringView default_locale = locale;
while (true) {
if (locale_data.locales.contains(default_locale))
break;
auto pos = default_locale.find_last('-');
if (!pos.has_value())
return;
default_locale = default_locale.substring_view(0, *pos);
}
locale_data.locales.set(locale, locale_data.locales.get(default_locale).value());
});
}
static Core::DirIterator path_to_dir_iterator(String path)
{
LexicalPath lexical_path(move(path));
@ -486,7 +519,7 @@ static void parse_all_locales(String core_path, String locale_names_path, String
auto misc_iterator = path_to_dir_iterator(move(misc_path));
auto numbers_iterator = path_to_dir_iterator(move(numbers_path));
LexicalPath core_supplemental_path(move(core_path));
LexicalPath core_supplemental_path(core_path);
core_supplemental_path = core_supplemental_path.append("supplemental"sv);
VERIFY(Core::File::is_directory(core_supplemental_path.string()));
@ -558,6 +591,8 @@ static void parse_all_locales(String core_path, String locale_names_path, String
parse_locale_currencies(numbers_path, locale_data, locale);
parse_numeric_keywords(numbers_path, locale_data, locale);
}
parse_default_content_locales(move(core_path), locale_data);
}
static String format_identifier(StringView owner, String identifier)

View File

@ -456,4 +456,10 @@ TEST_CASE(canonicalize_unicode_locale_id)
test("ZH-XIANG"sv, "hsn"sv);
test("ja-latn-hepburn-heploc"sv, "ja-Latn-alalc97"sv);
test("JA-LATN-HEPBURN-HEPLOC"sv, "ja-Latn-alalc97"sv);
// Default content.
test("en-us"sv, "en-US"sv);
test("EN-US"sv, "en-US"sv);
test("zh-Hans-CN"sv, "zh-Hans-CN"sv);
test("ZH-HANS-CN"sv, "zh-Hans-CN"sv);
}