LibJS+LibLocale: Replace Unicode keyword lookups with ICU

Note: All of the changes to the test files in this patch are now aligned
with both Chrome and Firefox.
This commit is contained in:
Timothy Flynn 2024-06-15 20:23:53 -04:00 committed by Andreas Kling
parent a1464342e1
commit 1bcc29d0d1
Notes: sideshowbarker 2024-07-17 09:56:35 +09:00
22 changed files with 400 additions and 570 deletions

View File

@ -9,15 +9,9 @@ set(CLDR_VERSION_FILE "${CLDR_PATH}/version.txt")
set(CLDR_ZIP_URL "https://github.com/unicode-org/cldr-json/releases/download/${CLDR_VERSION}/cldr-${CLDR_VERSION}-json-modern.zip") set(CLDR_ZIP_URL "https://github.com/unicode-org/cldr-json/releases/download/${CLDR_VERSION}/cldr-${CLDR_VERSION}-json-modern.zip")
set(CLDR_ZIP_PATH "${CLDR_PATH}/cldr.zip") set(CLDR_ZIP_PATH "${CLDR_PATH}/cldr.zip")
set(CLDR_BCP47_SOURCE cldr-bcp47)
set(CLDR_BCP47_PATH "${CLDR_PATH}/${CLDR_BCP47_SOURCE}")
set(CLDR_CORE_SOURCE cldr-core) set(CLDR_CORE_SOURCE cldr-core)
set(CLDR_CORE_PATH "${CLDR_PATH}/${CLDR_CORE_SOURCE}") set(CLDR_CORE_PATH "${CLDR_PATH}/${CLDR_CORE_SOURCE}")
set(CLDR_DATES_SOURCE cldr-dates-modern)
set(CLDR_DATES_PATH "${CLDR_PATH}/${CLDR_DATES_SOURCE}")
set(CLDR_NUMBERS_SOURCE cldr-numbers-modern) set(CLDR_NUMBERS_SOURCE cldr-numbers-modern)
set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}") set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}")
@ -26,9 +20,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
if (ENABLE_NETWORK_DOWNLOADS) if (ENABLE_NETWORK_DOWNLOADS)
download_file("${CLDR_ZIP_URL}" "${CLDR_ZIP_PATH}" SHA256 "${CLDR_SHA256}") download_file("${CLDR_ZIP_URL}" "${CLDR_ZIP_PATH}" SHA256 "${CLDR_SHA256}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_BCP47_SOURCE}/**" "${CLDR_BCP47_PATH}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}")
else() else()
message(STATUS "Skipping download of ${CLDR_ZIP_URL}, expecting the archive to have been extracted to ${CLDR_PATH}") message(STATUS "Skipping download of ${CLDR_ZIP_URL}, expecting the archive to have been extracted to ${CLDR_PATH}")
@ -43,7 +35,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
"${CLDR_VERSION_FILE}" "${CLDR_VERSION_FILE}"
"${LOCALE_DATA_HEADER}" "${LOCALE_DATA_HEADER}"
"${LOCALE_DATA_IMPLEMENTATION}" "${LOCALE_DATA_IMPLEMENTATION}"
arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" arguments -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}"
) )
set(LOCALE_DATA_SOURCES set(LOCALE_DATA_SOURCES

View File

@ -33,26 +33,14 @@ static ByteString format_identifier(StringView owner, ByteString identifier)
return identifier; return identifier;
} }
using KeywordList = Vector<size_t>;
struct LocaleData { struct LocaleData {
size_t calendar_keywords { 0 };
size_t collation_case_keywords { 0 };
size_t collation_numeric_keywords { 0 };
size_t number_system_keywords { 0 };
size_t text_layout { 0 };
}; };
struct CLDR { struct CLDR {
UniqueStringStorage unique_strings; UniqueStringStorage unique_strings;
UniqueStorage<KeywordList> unique_keyword_lists;
HashMap<ByteString, LocaleData> locales; HashMap<ByteString, LocaleData> locales;
Vector<Alias> locale_aliases; Vector<Alias> locale_aliases;
HashMap<ByteString, Vector<ByteString>> keywords;
HashMap<ByteString, Vector<Alias>> keyword_aliases;
HashMap<ByteString, ByteString> keyword_names;
}; };
// Some parsing is expected to fail. For example, the CLDR contains language mappings // Some parsing is expected to fail. For example, the CLDR contains language mappings
@ -81,175 +69,6 @@ ErrorOr<JsonValue const*> read_json_file_with_cache(ByteString const& path)
return &parsed_json_cache.get(path).value(); return &parsed_json_cache.get(path).value();
} }
static ErrorOr<void> parse_unicode_extension_keywords(ByteString bcp47_path, CLDR& cldr)
{
constexpr auto desired_keywords = Array { "ca"sv, "co"sv, "hc"sv, "kf"sv, "kn"sv, "nu"sv };
auto keywords = TRY(read_json_file(bcp47_path));
auto const& keyword_object = keywords.as_object().get_object("keyword"sv).value();
auto unicode_object = keyword_object.get_object("u"sv);
if (!unicode_object.has_value())
return {};
unicode_object->for_each_member([&](auto const& key, auto const& value) {
if (!desired_keywords.span().contains_slow(key))
return;
auto const& name = value.as_object().get_byte_string("_alias"sv).value();
cldr.keyword_names.set(key, name);
auto& keywords = cldr.keywords.ensure(key);
// FIXME: ECMA-402 requires the list of supported collation types to include "default", but
// that type does not appear in collation.json.
if (key == "co" && !keywords.contains_slow("default"sv))
keywords.append("default"sv);
value.as_object().for_each_member([&](auto const& keyword, auto const& properties) {
if (!properties.is_object())
return;
// Filter out values not permitted by ECMA-402.
// https://tc39.es/ecma402/#sec-intl-collator-internal-slots
if (key == "co"sv && keyword.is_one_of("search"sv, "standard"sv))
return;
// https://tc39.es/ecma402/#sec-intl.numberformat-internal-slots
if (key == "nu"sv && keyword.is_one_of("finance"sv, "native"sv, "traditio"sv))
return;
if (auto const& preferred = properties.as_object().get_byte_string("_preferred"sv); preferred.has_value()) {
cldr.keyword_aliases.ensure(key).append({ preferred.value(), keyword });
return;
}
if (auto const& alias = properties.as_object().get_byte_string("_alias"sv); alias.has_value())
cldr.keyword_aliases.ensure(key).append({ keyword, alias.value() });
keywords.append(keyword);
});
});
return {};
}
static Optional<ByteString> find_keyword_alias(StringView key, StringView calendar, CLDR& cldr)
{
auto it = cldr.keyword_aliases.find(key);
if (it == cldr.keyword_aliases.end())
return {};
auto alias = it->value.find_if([&](auto const& alias) { return calendar == alias.alias; });
if (alias == it->value.end())
return {};
return alias->name;
}
static ErrorOr<void> parse_number_system_keywords(ByteString locale_numbers_path, CLDR& cldr, LocaleData& locale)
{
LexicalPath numbers_path(move(locale_numbers_path));
numbers_path = numbers_path.append("numbers.json"sv);
auto numbers = TRY(read_json_file(numbers_path.string()));
auto const& main_object = numbers.as_object().get_object("main"sv).value();
auto const& locale_object = main_object.get_object(numbers_path.parent().basename()).value();
auto const& locale_numbers_object = locale_object.get_object("numbers"sv).value();
auto const& default_numbering_system_object = locale_numbers_object.get_byte_string("defaultNumberingSystem"sv).value();
auto const& other_numbering_systems_object = locale_numbers_object.get_object("otherNumberingSystems"sv).value();
KeywordList keywords {};
auto append_numbering_system = [&](ByteString system_name) {
if (auto system_alias = find_keyword_alias("nu"sv, system_name, cldr); system_alias.has_value())
system_name = system_alias.release_value();
auto index = cldr.unique_strings.ensure(move(system_name));
if (!keywords.contains_slow(index))
keywords.append(move(index));
};
append_numbering_system(default_numbering_system_object);
other_numbering_systems_object.for_each_member([&](auto const&, JsonValue const& value) {
append_numbering_system(value.as_string());
});
locale_numbers_object.for_each_member([&](auto const& key, JsonValue const& value) {
if (!key.starts_with("defaultNumberingSystem-alt-"sv))
return;
append_numbering_system(value.as_string());
});
locale.number_system_keywords = cldr.unique_keyword_lists.ensure(move(keywords));
return {};
}
static ErrorOr<void> parse_calendar_keywords(ByteString locale_dates_path, CLDR& cldr, LocaleData& locale)
{
KeywordList keywords {};
TRY(Core::Directory::for_each_entry(locale_dates_path, Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
if (!entry.name.starts_with("ca-"sv))
return IterationDecision::Continue;
// The generic calendar is not a supported Unicode calendar key, so skip it:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar#unicode_calendar_keys
if (entry.name == "ca-generic.json"sv)
return IterationDecision::Continue;
auto locale_calendars_path = LexicalPath::join(directory.path().string(), entry.name).string();
LexicalPath calendars_path(move(locale_calendars_path));
auto calendars = TRY(read_json_file(calendars_path.string()));
auto const& main_object = calendars.as_object().get_object("main"sv).value();
auto const& locale_object = main_object.get_object(calendars_path.parent().basename()).value();
auto const& dates_object = locale_object.get_object("dates"sv).value();
auto const& calendars_object = dates_object.get_object("calendars"sv).value();
calendars_object.for_each_member([&](auto calendar_name, JsonValue const&) {
if (auto calendar_alias = find_keyword_alias("ca"sv, calendar_name, cldr); calendar_alias.has_value())
calendar_name = calendar_alias.release_value();
keywords.append(cldr.unique_strings.ensure(calendar_name));
});
return IterationDecision::Continue;
}));
locale.calendar_keywords = cldr.unique_keyword_lists.ensure(move(keywords));
return {};
}
static void fill_in_collation_keywords(CLDR& cldr, LocaleData& locale)
{
// FIXME: If collation data becomes available in the CLDR, parse per-locale ordering from there.
auto create_list_with_default_first = [&](auto key, auto default_value) {
auto& values = cldr.keywords.find(key)->value;
quick_sort(values, [&](auto const& lhs, auto const& rhs) {
if (lhs == default_value)
return true;
if (rhs == default_value)
return false;
return lhs < rhs;
});
KeywordList keywords;
keywords.ensure_capacity(values.size());
for (auto const& value : values)
keywords.append(cldr.unique_strings.ensure(value));
return cldr.unique_keyword_lists.ensure(move(keywords));
};
static auto kf_index = create_list_with_default_first("kf"sv, "upper"sv);
static auto kn_index = create_list_with_default_first("kn"sv, "true"sv);
locale.collation_case_keywords = kf_index;
locale.collation_numeric_keywords = kn_index;
}
static ErrorOr<void> parse_default_content_locales(ByteString core_path, CLDR& cldr) static ErrorOr<void> parse_default_content_locales(ByteString core_path, CLDR& cldr)
{ {
LexicalPath default_content_path(move(core_path)); LexicalPath default_content_path(move(core_path));
@ -324,7 +143,7 @@ static ErrorOr<void> define_aliases_without_scripts(CLDR& cldr)
return {}; return {};
} }
static ErrorOr<void> parse_all_locales(ByteString bcp47_path, ByteString core_path, ByteString numbers_path, ByteString dates_path, CLDR& cldr) static ErrorOr<void> parse_all_locales(ByteString core_path, ByteString numbers_path, CLDR& cldr)
{ {
LexicalPath core_supplemental_path(core_path); LexicalPath core_supplemental_path(core_path);
core_supplemental_path = core_supplemental_path.append("supplemental"sv); core_supplemental_path = core_supplemental_path.append("supplemental"sv);
@ -343,28 +162,11 @@ static ErrorOr<void> parse_all_locales(ByteString bcp47_path, ByteString core_pa
return builder.to_byte_string(); return builder.to_byte_string();
}; };
TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/bcp47", bcp47_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
auto bcp47_path = LexicalPath::join(directory.path().string(), entry.name).string();
TRY(parse_unicode_extension_keywords(move(bcp47_path), cldr));
return IterationDecision::Continue;
}));
TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", numbers_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> { TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", numbers_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
auto numbers_path = LexicalPath::join(directory.path().string(), entry.name).string(); auto numbers_path = LexicalPath::join(directory.path().string(), entry.name).string();
auto language = TRY(remove_variants_from_path(numbers_path)); auto language = TRY(remove_variants_from_path(numbers_path));
auto& locale = cldr.locales.ensure(language); cldr.locales.ensure(language);
TRY(parse_number_system_keywords(numbers_path, cldr, locale));
fill_in_collation_keywords(cldr, locale);
return IterationDecision::Continue;
}));
TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", dates_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
auto dates_path = LexicalPath::join(directory.path().string(), entry.name).string();
auto language = TRY(remove_variants_from_path(dates_path));
auto& locale = cldr.locales.ensure(language);
TRY(parse_calendar_keywords(dates_path, cldr, locale));
return IterationDecision::Continue; return IterationDecision::Continue;
})); }));
@ -388,20 +190,7 @@ namespace Locale {
)~~~"); )~~~");
auto locales = cldr.locales.keys(); auto locales = cldr.locales.keys();
auto keywords = cldr.keywords.keys();
generate_enum(generator, format_identifier, "Locale"sv, "None"sv, locales, cldr.locale_aliases); generate_enum(generator, format_identifier, "Locale"sv, "None"sv, locales, cldr.locale_aliases);
generate_enum(generator, format_identifier, "Key"sv, {}, keywords);
for (auto& keyword : cldr.keywords) {
auto const& keyword_name = cldr.keyword_names.find(keyword.key)->value;
auto enum_name = ByteString::formatted("Keyword{}", format_identifier({}, keyword_name));
if (auto aliases = cldr.keyword_aliases.find(keyword.key); aliases != cldr.keyword_aliases.end())
generate_enum(generator, format_identifier, enum_name, {}, keyword.value, aliases->value);
else
generate_enum(generator, format_identifier, enum_name, {}, keyword.value);
}
generator.append(R"~~~( generator.append(R"~~~(
} }
@ -436,80 +225,9 @@ static ErrorOr<void> generate_unicode_locale_implementation(Core::InputBufferedF
namespace Locale { namespace Locale {
)~~~"); )~~~");
cldr.unique_strings.generate(generator);
generate_available_values(generator, "get_available_calendars"sv, cldr.keywords.find("ca"sv)->value, cldr.keyword_aliases.find("ca"sv)->value,
[](auto calendar) {
// FIXME: Remove this filter when we support all calendars.
return calendar.is_one_of("gregory"sv, "iso8601"sv);
});
generate_available_values(generator, "get_available_collation_case_orderings"sv, cldr.keywords.find("kf"sv)->value, cldr.keyword_aliases.find("kf"sv)->value);
generate_available_values(generator, "get_available_collation_numeric_orderings"sv, cldr.keywords.find("kn"sv)->value, cldr.keyword_aliases.find("kn"sv)->value);
generate_available_values(generator, "get_available_collation_types"sv, cldr.keywords.find("co"sv)->value, cldr.keyword_aliases.find("co"sv)->value,
[](auto collation) {
// FIXME: Remove this filter when we support all collation types.
return collation == "default"sv;
});
generate_available_values(generator, "get_available_hour_cycles"sv, cldr.keywords.find("hc"sv)->value);
generate_available_values(generator, "get_available_number_systems"sv, cldr.keywords.find("nu"sv)->value);
generator.append(R"~~~(
ReadonlySpan<StringView> get_available_keyword_values(StringView key)
{
auto key_value = key_from_string(key);
if (!key_value.has_value())
return {};
switch (*key_value) {
case Key::Ca:
return get_available_calendars();
case Key::Co:
return get_available_collation_types();
case Key::Hc:
return get_available_hour_cycles();
case Key::Kf:
return get_available_collation_case_orderings();
case Key::Kn:
return get_available_collation_numeric_orderings();
case Key::Nu:
return get_available_number_systems();
}
VERIFY_NOT_REACHED();
}
)~~~");
cldr.unique_keyword_lists.generate(generator, string_index_type, "s_keyword_lists"sv);
auto append_mapping = [&](auto const& keys, auto const& map, auto type, auto name, auto mapping_getter) {
generator.set("type", type);
generator.set("name", name);
generator.set("size", ByteString::number(keys.size()));
generator.append(R"~~~(
static constexpr Array<@type@, @size@> @name@ { {)~~~");
bool first = true;
for (auto const& key : keys) {
auto const& value = map.find(key)->value;
auto mapping = mapping_getter(value);
generator.append(first ? " "sv : ", "sv);
generator.append(ByteString::number(mapping));
first = false;
}
generator.append(" } };");
};
auto locales = cldr.locales.keys(); auto locales = cldr.locales.keys();
quick_sort(locales); quick_sort(locales);
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_calendar_keywords"sv, [&](auto const& locale) { return locale.calendar_keywords; });
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_case_keywords"sv, [&](auto const& locale) { return locale.collation_case_keywords; });
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_numeric_keywords"sv, [&](auto const& locale) { return locale.collation_numeric_keywords; });
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_number_system_keywords"sv, [&](auto const& locale) { return locale.number_system_keywords; });
auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) -> ErrorOr<void> { auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) -> ErrorOr<void> {
HashValueMap<ByteString> hashes; HashValueMap<ByteString> hashes;
TRY(hashes.try_ensure_capacity(values.size())); TRY(hashes.try_ensure_capacity(values.size()));
@ -525,106 +243,8 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
}; };
TRY(append_from_string("Locale"sv, "locale"sv, cldr.locales.keys(), cldr.locale_aliases)); TRY(append_from_string("Locale"sv, "locale"sv, cldr.locales.keys(), cldr.locale_aliases));
TRY(append_from_string("Key"sv, "key"sv, cldr.keywords.keys()));
for (auto const& keyword : cldr.keywords) {
auto const& keyword_name = cldr.keyword_names.find(keyword.key)->value;
auto enum_name = ByteString::formatted("Keyword{}", format_identifier({}, keyword_name));
auto enum_snake = ByteString::formatted("keyword_{}", keyword.key);
if (auto aliases = cldr.keyword_aliases.find(keyword.key); aliases != cldr.keyword_aliases.end())
TRY(append_from_string(enum_name, enum_snake, keyword.value, aliases->value));
else
TRY(append_from_string(enum_name, enum_snake, keyword.value));
}
generator.append(R"~~~( generator.append(R"~~~(
static ReadonlySpan<@string_index_type@> find_keyword_indices(StringView locale, StringView key)
{
auto locale_value = locale_from_string(locale);
if (!locale_value.has_value())
return {};
auto key_value = key_from_string(key);
if (!key_value.has_value())
return {};
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
size_t keywords_index = 0;
switch (*key_value) {
case Key::Ca:
keywords_index = s_calendar_keywords.at(locale_index);
break;
case Key::Kf:
keywords_index = s_collation_case_keywords.at(locale_index);
break;
case Key::Kn:
keywords_index = s_collation_numeric_keywords.at(locale_index);
break;
case Key::Nu:
keywords_index = s_number_system_keywords.at(locale_index);
break;
default:
VERIFY_NOT_REACHED();
}
return s_keyword_lists.at(keywords_index);
}
Optional<StringView> get_preferred_keyword_value_for_locale(StringView locale, StringView key)
{
// Hour cycle keywords are region-based rather than locale-based, so they need to be handled specially.
// FIXME: Calendar keywords are also region-based, and will need to be handled here when we support non-Gregorian calendars:
// https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/calendarPreferenceData.json
if (key == "hc"sv) {
if (auto hour_cycle = default_hour_cycle(locale); hour_cycle.has_value())
return hour_cycle_to_string(*hour_cycle);
return {};
}
// FIXME: Generate locale-preferred collation data when available in the CLDR.
if (key == "co"sv) {
auto collations = get_available_collation_types();
if (collations.is_empty())
return OptionalNone {};
return Optional<StringView> { collations[0] };
}
auto keyword_indices = find_keyword_indices(locale, key);
if (keyword_indices.is_empty())
return OptionalNone {};
return Optional<StringView> { decode_string(keyword_indices[0]) };
}
Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
{
// Hour cycle keywords are region-based rather than locale-based, so they need to be handled specially.
// FIXME: Calendar keywords are also region-based, and will need to be handled here when we support non-Gregorian calendars:
// https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/calendarPreferenceData.json
if (key == "hc"sv) {
if (auto hour_cycle = default_hour_cycle(locale); hour_cycle.has_value())
return { hour_cycle_to_string(*hour_cycle) };
return {};
}
// FIXME: Generate locale-preferred collation data when available in the CLDR.
if (key == "co"sv)
return Vector<StringView> { get_available_collation_types() };
auto keyword_indices = find_keyword_indices(locale, key);
Vector<StringView> keywords;
keywords.ensure_capacity(keyword_indices.size());
for (auto keyword : keyword_indices)
keywords.unchecked_append(decode_string(keyword));
return keywords;
}
} }
)~~~"); )~~~");
@ -636,25 +256,21 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
{ {
StringView generated_header_path; StringView generated_header_path;
StringView generated_implementation_path; StringView generated_implementation_path;
StringView bcp47_path;
StringView core_path; StringView core_path;
StringView numbers_path; StringView numbers_path;
StringView dates_path;
Core::ArgsParser args_parser; Core::ArgsParser args_parser;
args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
args_parser.add_option(bcp47_path, "Path to cldr-bcp47 directory", "bcp47-path", 'b', "bcp47-path");
args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path"); args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
args_parser.parse(arguments); args_parser.parse(arguments);
auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write)); auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
CLDR cldr; CLDR cldr;
TRY(parse_all_locales(bcp47_path, core_path, numbers_path, dates_path, cldr)); TRY(parse_all_locales(core_path, numbers_path, cldr));
TRY(generate_unicode_locale_header(*generated_header_file, cldr)); TRY(generate_unicode_locale_header(*generated_header_file, cldr));
TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr)); TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr));

View File

@ -17,6 +17,7 @@
#include <LibJS/Runtime/Intl/Locale.h> #include <LibJS/Runtime/Intl/Locale.h>
#include <LibJS/Runtime/ValueInlines.h> #include <LibJS/Runtime/ValueInlines.h>
#include <LibLocale/Locale.h> #include <LibLocale/Locale.h>
#include <LibLocale/UnicodeKeywords.h>
namespace JS::Intl { namespace JS::Intl {
@ -415,15 +416,11 @@ LocaleResult resolve_locale(Vector<String> const& requested_locales, LocaleOptio
// b. Assert: Type(foundLocaleData) is Record. // b. Assert: Type(foundLocaleData) is Record.
// c. Let keyLocaleData be foundLocaleData.[[<key>]]. // c. Let keyLocaleData be foundLocaleData.[[<key>]].
// d. Assert: Type(keyLocaleData) is List. // d. Assert: Type(keyLocaleData) is List.
auto key_locale_data = ::Locale::get_available_keyword_values(key); auto key_locale_data = ::Locale::available_keyword_values(found_locale, key);
// e. Let value be keyLocaleData[0]. // e. Let value be keyLocaleData[0].
// f. Assert: Type(value) is either String or Null. // f. Assert: Type(value) is either String or Null.
// NOTE: ECMA-402 assumes keyLocaleData is sorted by locale preference. Our list is sorted auto value = key_locale_data[0];
// alphabetically, so we get the locale's preferred value from LibUnicode.
Optional<String> value;
if (auto preference = ::Locale::get_preferred_keyword_value_for_locale(found_locale, key); preference.has_value())
value = MUST(String::from_utf8(*preference));
// g. Let supportedExtensionAddition be "". // g. Let supportedExtensionAddition be "".
Optional<::Locale::Keyword> supported_extension_addition {}; Optional<::Locale::Keyword> supported_extension_addition {};
@ -482,7 +479,7 @@ LocaleResult resolve_locale(Vector<String> const& requested_locales, LocaleOptio
// iv. If SameValue(optionsValue, value) is false and keyLocaleData contains optionsValue, then // iv. If SameValue(optionsValue, value) is false and keyLocaleData contains optionsValue, then
if (options_value.has_value() && (options_value != value) && key_locale_data.contains_slow(*options_value)) { if (options_value.has_value() && (options_value != value) && key_locale_data.contains_slow(*options_value)) {
// 1. Let value be optionsValue. // 1. Let value be optionsValue.
value = move(options_value); value = options_value.release_value();
// 2. Let supportedExtensionAddition be "". // 2. Let supportedExtensionAddition be "".
supported_extension_addition.clear(); supported_extension_addition.clear();

View File

@ -23,6 +23,7 @@
#include <LibLocale/DateTimeFormat.h> #include <LibLocale/DateTimeFormat.h>
#include <LibLocale/Locale.h> #include <LibLocale/Locale.h>
#include <LibLocale/NumberFormat.h> #include <LibLocale/NumberFormat.h>
#include <LibLocale/UnicodeKeywords.h>
namespace JS::Intl { namespace JS::Intl {
@ -121,23 +122,22 @@ JS_DEFINE_NATIVE_FUNCTION(Intl::supported_values_of)
// 2. If key is "calendar", then // 2. If key is "calendar", then
if (key == "calendar"sv) { if (key == "calendar"sv) {
// a. Let list be ! AvailableCanonicalCalendars( ). // a. Let list be ! AvailableCanonicalCalendars( ).
list = ::Locale::get_available_calendars(); list = ::Locale::available_calendars().span();
} }
// 3. Else if key is "collation", then // 3. Else if key is "collation", then
else if (key == "collation"sv) { else if (key == "collation"sv) {
// a. Let list be ! AvailableCanonicalCollations( ). // a. Let list be ! AvailableCanonicalCollations( ).
list = ::Locale::get_available_collation_types(); list = ::Locale::available_collations().span();
} }
// 4. Else if key is "currency", then // 4. Else if key is "currency", then
else if (key == "currency"sv) { else if (key == "currency"sv) {
// a. Let list be ! AvailableCanonicalCurrencies( ). // a. Let list be ! AvailableCanonicalCurrencies( ).
static auto const currencies = ::Locale::available_currencies(); list = ::Locale::available_currencies().span();
list = currencies.span();
} }
// 5. Else if key is "numberingSystem", then // 5. Else if key is "numberingSystem", then
else if (key == "numberingSystem"sv) { else if (key == "numberingSystem"sv) {
// a. Let list be ! AvailableCanonicalNumberingSystems( ). // a. Let list be ! AvailableCanonicalNumberingSystems( ).
list = ::Locale::get_available_number_systems(); list = ::Locale::available_number_systems().span();
} }
// 6. Else if key is "timeZone", then // 6. Else if key is "timeZone", then
else if (key == "timeZone"sv) { else if (key == "timeZone"sv) {

View File

@ -10,6 +10,7 @@
#include <LibJS/Runtime/Intl/Locale.h> #include <LibJS/Runtime/Intl/Locale.h>
#include <LibLocale/DateTimeFormat.h> #include <LibLocale/DateTimeFormat.h>
#include <LibLocale/Locale.h> #include <LibLocale/Locale.h>
#include <LibLocale/UnicodeKeywords.h>
#include <LibTimeZone/TimeZone.h> #include <LibTimeZone/TimeZone.h>
namespace JS::Intl { namespace JS::Intl {
@ -38,7 +39,7 @@ Locale::Locale(Object& prototype)
} }
// 1.1.1 CreateArrayFromListOrRestricted ( list , restricted ) // 1.1.1 CreateArrayFromListOrRestricted ( list , restricted )
static NonnullGCPtr<Array> create_array_from_list_or_restricted(VM& vm, Vector<StringView> list, Optional<String> restricted) static NonnullGCPtr<Array> create_array_from_list_or_restricted(VM& vm, Vector<String> list, Optional<String> restricted)
{ {
auto& realm = *vm.current_realm(); auto& realm = *vm.current_realm();
@ -49,8 +50,8 @@ static NonnullGCPtr<Array> create_array_from_list_or_restricted(VM& vm, Vector<S
} }
// 2. Return ! CreateArrayFromList( list ). // 2. Return ! CreateArrayFromList( list ).
return Array::create_from<StringView>(realm, list, [&vm](auto value) { return Array::create_from<String>(realm, list, [&vm](auto value) {
return PrimitiveString::create(vm, MUST(String::from_utf8(value))); return PrimitiveString::create(vm, move(value));
}); });
} }
@ -67,7 +68,7 @@ NonnullGCPtr<Array> calendars_of_locale(VM& vm, Locale const& locale_object)
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique canonical calendar identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for date and time formatting in locale. // 4. Let list be a List of 1 or more unique canonical calendar identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for date and time formatting in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "ca"sv); auto list = ::Locale::available_calendars(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ). // 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted)); return create_array_from_list_or_restricted(vm, move(list), move(restricted));
@ -86,7 +87,7 @@ NonnullGCPtr<Array> collations_of_locale(VM& vm, Locale const& locale_object)
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique canonical collation identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, ordered as if an Array of the same values had been sorted, using %Array.prototype.sort% using undefined as comparefn, of those in common use for string comparison in locale. The values "standard" and "search" must be excluded from list. // 4. Let list be a List of 1 or more unique canonical collation identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, ordered as if an Array of the same values had been sorted, using %Array.prototype.sort% using undefined as comparefn, of those in common use for string comparison in locale. The values "standard" and "search" must be excluded from list.
auto list = ::Locale::get_keywords_for_locale(locale, "co"sv); auto list = ::Locale::available_collations(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ). // 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted)); return create_array_from_list_or_restricted(vm, move(list), move(restricted));
@ -105,7 +106,7 @@ NonnullGCPtr<Array> hour_cycles_of_locale(VM& vm, Locale const& locale_object)
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique hour cycle identifiers, which must be lower case String values indicating either the 12-hour format ("h11", "h12") or the 24-hour format ("h23", "h24"), sorted in descending preference of those in common use for date and time formatting in locale. // 4. Let list be a List of 1 or more unique hour cycle identifiers, which must be lower case String values indicating either the 12-hour format ("h11", "h12") or the 24-hour format ("h23", "h24"), sorted in descending preference of those in common use for date and time formatting in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "hc"sv); auto list = ::Locale::available_hour_cycles(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ). // 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted)); return create_array_from_list_or_restricted(vm, move(list), move(restricted));
@ -124,7 +125,7 @@ NonnullGCPtr<Array> numbering_systems_of_locale(VM& vm, Locale const& locale_obj
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique canonical numbering system identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for formatting numeric values in locale. // 4. Let list be a List of 1 or more unique canonical numbering system identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for formatting numeric values in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "nu"sv); auto list = ::Locale::available_number_systems(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ). // 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted)); return create_array_from_list_or_restricted(vm, move(list), move(restricted));

View File

@ -67,14 +67,14 @@ describe("correct behavior", () => {
}); });
test("numeric option limited to known 'kn' values", () => { test("numeric option limited to known 'kn' values", () => {
["true", "foo"].forEach(numeric => { ["false", "foo"].forEach(numeric => {
const en = new Intl.Collator(`en-u-kn-${numeric}`); const en = new Intl.Collator(`en-u-kn-${numeric}`);
expect(en.resolvedOptions().numeric).toBeTrue(); expect(en.resolvedOptions().numeric).toBeFalse();
}); });
["true", "foo"].forEach(numeric => { ["false", "foo"].forEach(numeric => {
const el = new Intl.Collator(`el-u-kn-${numeric}`); const el = new Intl.Collator(`el-u-kn-${numeric}`);
expect(el.resolvedOptions().numeric).toBeTrue(); expect(el.resolvedOptions().numeric).toBeFalse();
}); });
}); });
@ -95,14 +95,14 @@ describe("correct behavior", () => {
}); });
test("caseFirst option limited to known 'kf' values", () => { test("caseFirst option limited to known 'kf' values", () => {
["upper", "foo"].forEach(caseFirst => { ["false", "foo"].forEach(caseFirst => {
const en = Intl.Collator(`en-u-kf-${caseFirst}`); const en = Intl.Collator(`en-u-kf-${caseFirst}`);
expect(en.resolvedOptions().caseFirst).toBe("upper"); expect(en.resolvedOptions().caseFirst).toBe("false");
}); });
["upper", "foo"].forEach(caseFirst => { ["false", "foo"].forEach(caseFirst => {
const el = Intl.Collator(`el-u-kf-${caseFirst}`); const el = Intl.Collator(`el-u-kf-${caseFirst}`);
expect(el.resolvedOptions().caseFirst).toBe("upper"); expect(el.resolvedOptions().caseFirst).toBe("false");
}); });
}); });
}); });

View File

@ -59,8 +59,8 @@ describe("correct behavior", () => {
}); });
test("numberingSystem option overrides locale extension", () => { test("numberingSystem option overrides locale extension", () => {
const el = Intl.DateTimeFormat("el-u-nu-latn", { numberingSystem: "grek" }); const el = Intl.DateTimeFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("grek"); expect(el.resolvedOptions().numberingSystem).toBe("adlm");
}); });
test("numberingSystem option limited to known 'nu' values", () => { test("numberingSystem option limited to known 'nu' values", () => {
@ -74,12 +74,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn"); expect(en.resolvedOptions().numberingSystem).toBe("latn");
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.DateTimeFormat("el", { numberingSystem: numberingSystem }); const el = Intl.DateTimeFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.DateTimeFormat(`el-u-nu-${numberingSystem}`); const el = Intl.DateTimeFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });

View File

@ -28,8 +28,8 @@ describe("correct behavior", () => {
}); });
test("numberingSystem option overrides locale extension", () => { test("numberingSystem option overrides locale extension", () => {
const el = new Intl.DurationFormat("el-u-nu-latn", { numberingSystem: "grek" }); const el = new Intl.DurationFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("grek"); expect(el.resolvedOptions().numberingSystem).toBe("adlm");
}); });
test("numberingSystem option limited to known 'nu' values", () => { test("numberingSystem option limited to known 'nu' values", () => {
@ -43,12 +43,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn"); expect(en.resolvedOptions().numberingSystem).toBe("latn");
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.DurationFormat("el", { numberingSystem: numberingSystem }); const el = new Intl.DurationFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.DurationFormat(`el-u-nu-${numberingSystem}`); const el = new Intl.DurationFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });

View File

@ -7,25 +7,29 @@ describe("errors", () => {
}); });
describe("normal behavior", () => { describe("normal behavior", () => {
test("basic functionality", () => { const testCalendars = (locale, expected) => {
expect(Array.isArray(new Intl.Locale("en").getCalendars())).toBeTrue(); const result = locale.getCalendars();
expect(new Intl.Locale("en").getCalendars()).toEqual(["gregory"]); expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ar").getCalendars())).toBeTrue(); for (const entry of expected) {
expect(new Intl.Locale("ar").getCalendars()).toEqual(["gregory"]); expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testCalendars(new Intl.Locale("en"), ["gregory"]);
testCalendars(new Intl.Locale("ar"), ["gregory"]);
}); });
test("extension keyword overrides default data", () => { test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-ca-islamicc").getCalendars()).toEqual(["islamic-civil"]); testCalendars(new Intl.Locale("en-u-ca-islamicc"), ["islamic-civil"]);
expect(new Intl.Locale("en", { calendar: "dangi" }).getCalendars()).toEqual(["dangi"]); testCalendars(new Intl.Locale("en", { calendar: "dangi" }), ["dangi"]);
expect(new Intl.Locale("ar-u-ca-ethiopic-amete-alem").getCalendars()).toEqual(["ethioaa"]); testCalendars(new Intl.Locale("ar-u-ca-ethiopic-amete-alem"), ["ethioaa"]);
expect(new Intl.Locale("ar", { calendar: "hebrew" }).getCalendars()).toEqual(["hebrew"]); testCalendars(new Intl.Locale("ar", { calendar: "hebrew" }), ["hebrew"]);
// Invalid calendars also take precedence. // Invalid calendars also take precedence.
expect(new Intl.Locale("en-u-ca-ladybird").getCalendars()).toEqual(["ladybird"]); testCalendars(new Intl.Locale("en-u-ca-ladybird"), ["ladybird"]);
expect(new Intl.Locale("en", { calendar: "ladybird" }).getCalendars()).toEqual([ testCalendars(new Intl.Locale("en", { calendar: "ladybird" }), ["ladybird"]);
"ladybird",
]);
}); });
}); });

View File

@ -7,27 +7,29 @@ describe("errors", () => {
}); });
describe("normal behavior", () => { describe("normal behavior", () => {
test("basic functionality", () => { const testCollations = (locale, expected) => {
expect(Array.isArray(new Intl.Locale("en").getCollations())).toBeTrue(); const result = locale.getCollations();
expect(new Intl.Locale("en").getCollations()).toEqual(["default"]); expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ar").getCollations())).toBeTrue(); for (const entry of expected) {
expect(new Intl.Locale("ar").getCollations()).toEqual(["default"]); expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testCollations(new Intl.Locale("en"), ["default"]);
testCollations(new Intl.Locale("ar"), ["default"]);
}); });
test("extension keyword overrides default data", () => { test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-co-compat").getCollations()).toEqual(["compat"]); testCollations(new Intl.Locale("en-u-co-compat"), ["compat"]);
expect(new Intl.Locale("en", { collation: "compat" }).getCollations()).toEqual(["compat"]); testCollations(new Intl.Locale("en", { collation: "compat" }), ["compat"]);
expect(new Intl.Locale("ar-u-co-reformed").getCollations()).toEqual(["reformed"]); testCollations(new Intl.Locale("ar-u-co-reformed"), ["reformed"]);
expect(new Intl.Locale("ar", { collation: "reformed" }).getCollations()).toEqual([ testCollations(new Intl.Locale("ar", { collation: "reformed" }), ["reformed"]);
"reformed",
]);
// Invalid getCollations() also take precedence. // Invalid getCollations() also take precedence.
expect(new Intl.Locale("en-u-co-ladybird").getCollations()).toEqual(["ladybird"]); testCollations(new Intl.Locale("en-u-co-ladybird"), ["ladybird"]);
expect(new Intl.Locale("en", { collation: "ladybird" }).getCollations()).toEqual([ testCollations(new Intl.Locale("en", { collation: "ladybird" }), ["ladybird"]);
"ladybird",
]);
}); });
}); });

View File

@ -7,23 +7,29 @@ describe("errors", () => {
}); });
describe("normal behavior", () => { describe("normal behavior", () => {
test("basic functionality", () => { const testHourCycles = (locale, expected) => {
expect(Array.isArray(new Intl.Locale("en").getHourCycles())).toBeTrue(); const result = locale.getHourCycles();
expect(new Intl.Locale("en").getHourCycles()).toContain("h12"); expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ha").getHourCycles())).toBeTrue(); for (const entry of expected) {
expect(new Intl.Locale("ha").getHourCycles()).toContain("h23"); expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testHourCycles(new Intl.Locale("en"), ["h12"]);
testHourCycles(new Intl.Locale("ha"), ["h23"]);
}); });
test("extension keyword overrides default data", () => { test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-hc-h24").getHourCycles()).toEqual(["h24"]); testHourCycles(new Intl.Locale("en-u-hc-h24"), ["h24"]);
expect(new Intl.Locale("en", { hourCycle: "h24" }).getHourCycles()).toEqual(["h24"]); testHourCycles(new Intl.Locale("en", { collation: "h24" }), ["h24"]);
expect(new Intl.Locale("ar-u-hc-h24").getHourCycles()).toEqual(["h24"]); testHourCycles(new Intl.Locale("ar-u-hc-h24"), ["h24"]);
expect(new Intl.Locale("ar", { hourCycle: "h24" }).getHourCycles()).toEqual(["h24"]); testHourCycles(new Intl.Locale("ar", { collation: "h24" }), ["h24"]);
// Invalid hourCycles also take precedence when specified in the locale string. Unlike other // Invalid hourCycles also take precedence when specified in the locale string. Unlike other
// properties, Locale("en", { hourCycle: "ladybird" }) will explicitly throw. // properties, Locale("en", { hourCycle: "ladybird" }) will explicitly throw.
expect(new Intl.Locale("en-u-hc-ladybird").getHourCycles()).toEqual(["ladybird"]); testHourCycles(new Intl.Locale("en-u-hc-ladybird"), ["ladybird"]);
}); });
}); });

View File

@ -7,29 +7,29 @@ describe("errors", () => {
}); });
describe("normal behavior", () => { describe("normal behavior", () => {
test("basic functionality", () => { const testNumberingSystems = (locale, expected) => {
expect(Array.isArray(new Intl.Locale("en").getNumberingSystems())).toBeTrue(); const result = locale.getNumberingSystems();
expect(new Intl.Locale("en").getNumberingSystems()).toEqual(["latn"]); expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ar").getNumberingSystems())).toBeTrue(); for (const entry of expected) {
expect(new Intl.Locale("ar").getNumberingSystems()).toEqual(["arab", "latn"]); expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testNumberingSystems(new Intl.Locale("en"), ["latn"]);
testNumberingSystems(new Intl.Locale("ar"), ["arab", "latn"]);
}); });
test("extension keyword overrides default data", () => { test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-nu-deva").getNumberingSystems()).toEqual(["deva"]); testNumberingSystems(new Intl.Locale("en-u-nu-deva"), ["deva"]);
expect(new Intl.Locale("en", { numberingSystem: "deva" }).getNumberingSystems()).toEqual([ testNumberingSystems(new Intl.Locale("en", { numberingSystem: "deva" }), ["deva"]);
"deva",
]);
expect(new Intl.Locale("ar-u-nu-bali").getNumberingSystems()).toEqual(["bali"]); testNumberingSystems(new Intl.Locale("ar-u-nu-bali"), ["bali"]);
expect(new Intl.Locale("ar", { numberingSystem: "bali" }).getNumberingSystems()).toEqual([ testNumberingSystems(new Intl.Locale("ar", { numberingSystem: "bali" }), ["bali"]);
"bali",
]);
// Invalid numberingSystems also take precedence. // Invalid numberingSystems also take precedence.
expect(new Intl.Locale("en-u-nu-ladybird").getNumberingSystems()).toEqual(["ladybird"]); testNumberingSystems(new Intl.Locale("en-u-nu-ladybird"), ["ladybird"]);
expect( testNumberingSystems(new Intl.Locale("en", { numberingSystem: "ladybird" }), ["ladybird"]);
new Intl.Locale("en", { numberingSystem: "ladybird" }).getNumberingSystems()
).toEqual(["ladybird"]);
}); });
}); });

View File

@ -31,8 +31,8 @@ describe("correct behavior", () => {
}); });
test("numberingSystem option overrides locale extension", () => { test("numberingSystem option overrides locale extension", () => {
const el = Intl.NumberFormat("el-u-nu-latn", { numberingSystem: "grek" }); const el = Intl.NumberFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("grek"); expect(el.resolvedOptions().numberingSystem).toBe("adlm");
}); });
test("numberingSystem option limited to known 'nu' values", () => { test("numberingSystem option limited to known 'nu' values", () => {
@ -46,12 +46,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn"); expect(en.resolvedOptions().numberingSystem).toBe("latn");
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.NumberFormat("el", { numberingSystem: numberingSystem }); const el = Intl.NumberFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.NumberFormat(`el-u-nu-${numberingSystem}`); const el = Intl.NumberFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });

View File

@ -31,8 +31,8 @@ describe("correct behavior", () => {
}); });
test("numberingSystem option overrides locale extension", () => { test("numberingSystem option overrides locale extension", () => {
const el = new Intl.RelativeTimeFormat("el-u-nu-latn", { numberingSystem: "grek" }); const el = new Intl.RelativeTimeFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("grek"); expect(el.resolvedOptions().numberingSystem).toBe("adlm");
}); });
test("numberingSystem option limited to known 'nu' values", () => { test("numberingSystem option limited to known 'nu' values", () => {
@ -46,12 +46,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn"); expect(en.resolvedOptions().numberingSystem).toBe("latn");
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.RelativeTimeFormat("el", { numberingSystem: numberingSystem }); const el = new Intl.RelativeTimeFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });
["latn", "grek"].forEach(numberingSystem => { ["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.RelativeTimeFormat(`el-u-nu-${numberingSystem}`); const el = new Intl.RelativeTimeFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem); expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
}); });

View File

@ -19,6 +19,7 @@ set(SOURCES
NumberFormat.cpp NumberFormat.cpp
PluralRules.cpp PluralRules.cpp
RelativeTimeFormat.cpp RelativeTimeFormat.cpp
UnicodeKeywords.cpp
) )
serenity_lib(LibLocale locale) serenity_lib(LibLocale locale)

View File

@ -12,13 +12,6 @@ namespace Locale {
enum class CalendarPatternStyle : u8; enum class CalendarPatternStyle : u8;
enum class HourCycle : u8; enum class HourCycle : u8;
enum class Key : u8;
enum class KeywordCalendar : u8;
enum class KeywordCollation : u8;
enum class KeywordColCaseFirst : u8;
enum class KeywordColNumeric : u8;
enum class KeywordHours : u8;
enum class KeywordNumbers : u8;
enum class Locale : u16; enum class Locale : u16;
enum class PluralCategory : u8; enum class PluralCategory : u8;
enum class Style : u8; enum class Style : u8;

View File

@ -13,6 +13,7 @@
#include <unicode/dtptngen.h> #include <unicode/dtptngen.h>
#include <unicode/locdspnm.h> #include <unicode/locdspnm.h>
#include <unicode/numsys.h>
#include <unicode/tznames.h> #include <unicode/tznames.h>
#include <unicode/unistr.h> #include <unicode/unistr.h>
@ -74,6 +75,23 @@ icu::LocaleDisplayNames& LocaleData::dialect_display_names()
return *m_dialect_display_names; return *m_dialect_display_names;
} }
icu::NumberingSystem& LocaleData::numbering_system()
{
if (!m_numbering_system) {
UErrorCode status = U_ZERO_ERROR;
m_numbering_system = adopt_own_if_nonnull(icu::NumberingSystem::createInstance(locale(), status));
if (icu_failure(status)) {
status = U_ZERO_ERROR;
m_numbering_system = adopt_own_if_nonnull(icu::NumberingSystem::createInstance("und", status));
VERIFY(icu_success(status));
}
}
return *m_numbering_system;
}
icu::DateTimePatternGenerator& LocaleData::date_time_pattern_generator() icu::DateTimePatternGenerator& LocaleData::date_time_pattern_generator()
{ {
if (!m_date_time_pattern_generator) { if (!m_date_time_pattern_generator) {

View File

@ -22,6 +22,7 @@
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
class DateTimePatternGenerator; class DateTimePatternGenerator;
class LocaleDisplayNames; class LocaleDisplayNames;
class NumberingSystem;
class TimeZoneNames; class TimeZoneNames;
class UnicodeString; class UnicodeString;
U_NAMESPACE_END U_NAMESPACE_END
@ -39,6 +40,8 @@ public:
icu::LocaleDisplayNames& standard_display_names(); icu::LocaleDisplayNames& standard_display_names();
icu::LocaleDisplayNames& dialect_display_names(); icu::LocaleDisplayNames& dialect_display_names();
icu::NumberingSystem& numbering_system();
icu::DateTimePatternGenerator& date_time_pattern_generator(); icu::DateTimePatternGenerator& date_time_pattern_generator();
icu::TimeZoneNames& time_zone_names(); icu::TimeZoneNames& time_zone_names();
@ -54,9 +57,9 @@ private:
OwnPtr<icu::LocaleDisplayNames> m_standard_display_names; OwnPtr<icu::LocaleDisplayNames> m_standard_display_names;
OwnPtr<icu::LocaleDisplayNames> m_dialect_display_names; OwnPtr<icu::LocaleDisplayNames> m_dialect_display_names;
OwnPtr<icu::NumberingSystem> m_numbering_system;
OwnPtr<icu::DateTimePatternGenerator> m_date_time_pattern_generator; OwnPtr<icu::DateTimePatternGenerator> m_date_time_pattern_generator;
OwnPtr<icu::TimeZoneNames> m_time_zone_names; OwnPtr<icu::TimeZoneNames> m_time_zone_names;
Optional<DigitalFormat> m_digital_format; Optional<DigitalFormat> m_digital_format;
}; };

View File

@ -9,16 +9,12 @@
#include <AK/AllOf.h> #include <AK/AllOf.h>
#include <AK/GenericLexer.h> #include <AK/GenericLexer.h>
#include <AK/QuickSort.h> #include <AK/QuickSort.h>
#include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <LibLocale/DateTimeFormat.h>
#include <LibLocale/ICU.h> #include <LibLocale/ICU.h>
#include <LibLocale/Locale.h> #include <LibLocale/Locale.h>
#include <LibUnicode/CharacterTypes.h>
#include <unicode/localebuilder.h> #include <unicode/localebuilder.h>
#include <unicode/locid.h> #include <unicode/locid.h>
#include <unicode/ucurr.h>
namespace Locale { namespace Locale {
@ -548,52 +544,7 @@ StringView style_to_string(Style style)
} }
} }
ReadonlySpan<StringView> __attribute__((weak)) get_available_keyword_values(StringView) { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_calendars() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_case_orderings() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_numeric_orderings() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_types() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_hour_cycles() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_number_systems() { return {}; }
Optional<Locale> __attribute__((weak)) locale_from_string(StringView) { return {}; } Optional<Locale> __attribute__((weak)) locale_from_string(StringView) { return {}; }
Optional<Key> __attribute__((weak)) key_from_string(StringView) { return {}; }
Optional<KeywordCalendar> __attribute__((weak)) keyword_ca_from_string(StringView) { return {}; }
Optional<KeywordCollation> __attribute__((weak)) keyword_co_from_string(StringView) { return {}; }
Optional<KeywordHours> __attribute__((weak)) keyword_hc_from_string(StringView) { return {}; }
Optional<KeywordColCaseFirst> __attribute__((weak)) keyword_kf_from_string(StringView) { return {}; }
Optional<KeywordColNumeric> __attribute__((weak)) keyword_kn_from_string(StringView) { return {}; }
Optional<KeywordNumbers> __attribute__((weak)) keyword_nu_from_string(StringView) { return {}; }
Vector<StringView> __attribute__((weak)) get_keywords_for_locale(StringView, StringView) { return {}; }
Optional<StringView> __attribute__((weak)) get_preferred_keyword_value_for_locale(StringView, StringView) { return {}; }
Vector<String> available_currencies()
{
UErrorCode status = U_ZERO_ERROR;
auto* currencies = ucurr_openISOCurrencies(UCURR_ALL, &status);
ScopeGuard guard { [&]() { uenum_close(currencies); } };
if (icu_failure(status))
return {};
Vector<String> result;
while (true) {
i32 length = 0;
char const* next = uenum_next(currencies, &length, &status);
if (icu_failure(status))
return {};
if (next == nullptr)
break;
// https://unicode-org.atlassian.net/browse/ICU-21687
if (StringView currency { next, static_cast<size_t>(length) }; currency != "LSM"sv)
result.append(MUST(String::from_utf8(currency)));
}
return result;
}
static void apply_extensions_to_locale(icu::Locale& locale, icu::Locale const& locale_with_extensions) static void apply_extensions_to_locale(icu::Locale& locale, icu::Locale const& locale_with_extensions)
{ {

View File

@ -133,29 +133,10 @@ void canonicalize_unicode_extension_values(StringView key, String& value);
StringView default_locale(); StringView default_locale();
bool is_locale_available(StringView locale); bool is_locale_available(StringView locale);
ReadonlySpan<StringView> get_available_keyword_values(StringView key);
ReadonlySpan<StringView> get_available_calendars();
ReadonlySpan<StringView> get_available_collation_case_orderings();
ReadonlySpan<StringView> get_available_collation_numeric_orderings();
ReadonlySpan<StringView> get_available_collation_types();
ReadonlySpan<StringView> get_available_hour_cycles();
ReadonlySpan<StringView> get_available_number_systems();
Vector<String> available_currencies();
Style style_from_string(StringView style); Style style_from_string(StringView style);
StringView style_to_string(Style style); StringView style_to_string(Style style);
Optional<Locale> locale_from_string(StringView locale); Optional<Locale> locale_from_string(StringView locale);
Optional<Key> key_from_string(StringView key);
Optional<KeywordCalendar> keyword_ca_from_string(StringView ca);
Optional<KeywordCollation> keyword_co_from_string(StringView co);
Optional<KeywordHours> keyword_hc_from_string(StringView hc);
Optional<KeywordColCaseFirst> keyword_kf_from_string(StringView kf);
Optional<KeywordColNumeric> keyword_kn_from_string(StringView kn);
Optional<KeywordNumbers> keyword_nu_from_string(StringView nu);
Vector<StringView> get_keywords_for_locale(StringView locale, StringView key);
Optional<StringView> get_preferred_keyword_value_for_locale(StringView locale, StringView key);
Optional<String> add_likely_subtags(StringView); Optional<String> add_likely_subtags(StringView);
Optional<String> remove_likely_subtags(StringView); Optional<String> remove_likely_subtags(StringView);

View File

@ -0,0 +1,231 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#define AK_DONT_REPLACE_STD
#include <AK/QuickSort.h>
#include <AK/ScopeGuard.h>
#include <LibLocale/DateTimeFormat.h>
#include <LibLocale/ICU.h>
#include <LibLocale/UnicodeKeywords.h>
#include <unicode/calendar.h>
#include <unicode/coll.h>
#include <unicode/locid.h>
#include <unicode/numsys.h>
#include <unicode/ucurr.h>
namespace Locale {
template<typename Filter>
static Vector<String> icu_string_enumeration_to_list(OwnPtr<icu::StringEnumeration> enumeration, Filter&& filter)
{
UErrorCode status = U_ZERO_ERROR;
Vector<String> result;
if (!enumeration)
return {};
while (true) {
i32 length = 0;
auto const* keyword = enumeration->next(&length, status);
if (icu_failure(status) || keyword == nullptr)
break;
if (!filter(keyword))
continue;
result.append(MUST(String::from_utf8({ keyword, static_cast<size_t>(length) })));
}
return result;
}
static Vector<String> icu_string_enumeration_to_list(OwnPtr<icu::StringEnumeration> enumeration)
{
return icu_string_enumeration_to_list(move(enumeration), [](char const*) { return true; });
}
Vector<String> available_keyword_values(StringView locale, StringView key)
{
if (key == "ca"sv)
return available_calendars(locale);
if (key == "co"sv)
return available_collations(locale);
if (key == "hc"sv)
return available_hour_cycles(locale);
if (key == "kf"sv)
return available_collation_case_orderings();
if (key == "kn"sv)
return available_collation_numeric_orderings();
if (key == "nu"sv)
return available_number_systems(locale);
TODO();
}
Vector<String> const& available_calendars()
{
static auto calendars = []() {
auto calendars = available_calendars("und"sv);
quick_sort(calendars);
return calendars;
}();
return calendars;
}
Vector<String> available_calendars(StringView locale)
{
UErrorCode status = U_ZERO_ERROR;
auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value())
return {};
auto keywords = adopt_own_if_nonnull(icu::Calendar::getKeywordValuesForLocale("calendar", locale_data->locale(), 0, status));
if (icu_failure(status))
return {};
auto calendars = icu_string_enumeration_to_list(move(keywords));
for (auto& calendar : calendars) {
if (calendar == "gregorian"sv)
calendar = "gregory"_string;
else if (calendar == "ethiopic-amete-alem"sv)
calendar = "ethioaa"_string;
}
return calendars;
}
Vector<String> const& available_currencies()
{
static auto currencies = []() -> Vector<String> {
UErrorCode status = U_ZERO_ERROR;
auto* currencies = ucurr_openISOCurrencies(UCURR_ALL, &status);
ScopeGuard guard { [&]() { uenum_close(currencies); } };
if (icu_failure(status))
return {};
Vector<String> result;
while (true) {
i32 length = 0;
char const* next = uenum_next(currencies, &length, &status);
if (icu_failure(status))
return {};
if (next == nullptr)
break;
// https://unicode-org.atlassian.net/browse/ICU-21687
if (StringView currency { next, static_cast<size_t>(length) }; currency != "LSM"sv)
result.append(MUST(String::from_utf8(currency)));
}
quick_sort(result);
return result;
}();
return currencies;
}
Vector<String> const& available_collation_case_orderings()
{
static Vector<String> case_orderings { "false"_string, "lower"_string, "upper"_string };
return case_orderings;
}
Vector<String> const& available_collation_numeric_orderings()
{
static Vector<String> case_orderings { "false"_string, "true"_string };
return case_orderings;
}
Vector<String> const& available_collations()
{
// FIXME: Implement this when we fully support Intl.Collator.
static Vector<String> collations { "default"_string };
return collations;
}
Vector<String> available_collations(StringView)
{
// FIXME: Implement this when we fully support Intl.Collator.
return available_collations();
}
Vector<String> const& available_hour_cycles()
{
static Vector<String> case_orderings { "h11"_string, "h12"_string, "h23"_string, "h24"_string };
return case_orderings;
}
Vector<String> available_hour_cycles(StringView locale)
{
auto preferred_hour_cycle = default_hour_cycle(locale);
if (!preferred_hour_cycle.has_value())
return available_hour_cycles();
Vector<String> hour_cycles;
hour_cycles.append(MUST(String::from_utf8(hour_cycle_to_string(*preferred_hour_cycle))));
for (auto const& hour_cycle : available_hour_cycles()) {
if (hour_cycle != hour_cycles[0])
hour_cycles.append(hour_cycle);
}
return hour_cycles;
}
Vector<String> const& available_number_systems()
{
static auto number_systems = []() -> Vector<String> {
UErrorCode status = U_ZERO_ERROR;
auto keywords = adopt_own_if_nonnull(icu::NumberingSystem::getAvailableNames(status));
if (icu_failure(status))
return {};
auto number_systems = icu_string_enumeration_to_list(move(keywords), [&](char const* keyword) {
auto system = adopt_own_if_nonnull(icu::NumberingSystem::createInstanceByName(keyword, status));
if (icu_failure(status))
return false;
return !static_cast<bool>(system->isAlgorithmic());
});
quick_sort(number_systems);
return number_systems;
}();
return number_systems;
}
Vector<String> available_number_systems(StringView locale)
{
auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value())
return {};
Vector<String> number_systems;
auto const* preferred_number_system = locale_data->numbering_system().getName();
number_systems.append(MUST(String::from_utf8({ preferred_number_system, strlen(preferred_number_system) })));
for (auto const& number_system : available_number_systems()) {
if (number_system != number_systems[0])
number_systems.append(number_system);
}
return number_systems;
}
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Vector.h>
namespace Locale {
Vector<String> available_keyword_values(StringView locale, StringView key);
Vector<String> const& available_calendars();
Vector<String> available_calendars(StringView locale);
Vector<String> const& available_currencies();
Vector<String> const& available_collation_case_orderings();
Vector<String> const& available_collation_numeric_orderings();
Vector<String> const& available_collations();
Vector<String> available_collations(StringView locale);
Vector<String> const& available_hour_cycles();
Vector<String> available_hour_cycles(StringView locale);
Vector<String> const& available_number_systems();
Vector<String> available_number_systems(StringView locale);
}