LibJS+LibLocale: Replace Unicode keyword lookups with ICU

Note: All of the changes to the test files in this patch are now aligned
with both Chrome and Firefox.
This commit is contained in:
Timothy Flynn 2024-06-15 20:23:53 -04:00 committed by Andreas Kling
parent a1464342e1
commit 1bcc29d0d1
Notes: sideshowbarker 2024-07-17 09:56:35 +09:00
22 changed files with 400 additions and 570 deletions

View File

@ -9,15 +9,9 @@ set(CLDR_VERSION_FILE "${CLDR_PATH}/version.txt")
set(CLDR_ZIP_URL "https://github.com/unicode-org/cldr-json/releases/download/${CLDR_VERSION}/cldr-${CLDR_VERSION}-json-modern.zip")
set(CLDR_ZIP_PATH "${CLDR_PATH}/cldr.zip")
set(CLDR_BCP47_SOURCE cldr-bcp47)
set(CLDR_BCP47_PATH "${CLDR_PATH}/${CLDR_BCP47_SOURCE}")
set(CLDR_CORE_SOURCE cldr-core)
set(CLDR_CORE_PATH "${CLDR_PATH}/${CLDR_CORE_SOURCE}")
set(CLDR_DATES_SOURCE cldr-dates-modern)
set(CLDR_DATES_PATH "${CLDR_PATH}/${CLDR_DATES_SOURCE}")
set(CLDR_NUMBERS_SOURCE cldr-numbers-modern)
set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}")
@ -26,9 +20,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
if (ENABLE_NETWORK_DOWNLOADS)
download_file("${CLDR_ZIP_URL}" "${CLDR_ZIP_PATH}" SHA256 "${CLDR_SHA256}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_BCP47_SOURCE}/**" "${CLDR_BCP47_PATH}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}")
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}")
else()
message(STATUS "Skipping download of ${CLDR_ZIP_URL}, expecting the archive to have been extracted to ${CLDR_PATH}")
@ -43,7 +35,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
"${CLDR_VERSION_FILE}"
"${LOCALE_DATA_HEADER}"
"${LOCALE_DATA_IMPLEMENTATION}"
arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}"
arguments -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}"
)
set(LOCALE_DATA_SOURCES

View File

@ -33,26 +33,14 @@ static ByteString format_identifier(StringView owner, ByteString identifier)
return identifier;
}
using KeywordList = Vector<size_t>;
struct LocaleData {
size_t calendar_keywords { 0 };
size_t collation_case_keywords { 0 };
size_t collation_numeric_keywords { 0 };
size_t number_system_keywords { 0 };
size_t text_layout { 0 };
};
struct CLDR {
UniqueStringStorage unique_strings;
UniqueStorage<KeywordList> unique_keyword_lists;
HashMap<ByteString, LocaleData> locales;
Vector<Alias> locale_aliases;
HashMap<ByteString, Vector<ByteString>> keywords;
HashMap<ByteString, Vector<Alias>> keyword_aliases;
HashMap<ByteString, ByteString> keyword_names;
};
// Some parsing is expected to fail. For example, the CLDR contains language mappings
@ -81,175 +69,6 @@ ErrorOr<JsonValue const*> read_json_file_with_cache(ByteString const& path)
return &parsed_json_cache.get(path).value();
}
static ErrorOr<void> parse_unicode_extension_keywords(ByteString bcp47_path, CLDR& cldr)
{
constexpr auto desired_keywords = Array { "ca"sv, "co"sv, "hc"sv, "kf"sv, "kn"sv, "nu"sv };
auto keywords = TRY(read_json_file(bcp47_path));
auto const& keyword_object = keywords.as_object().get_object("keyword"sv).value();
auto unicode_object = keyword_object.get_object("u"sv);
if (!unicode_object.has_value())
return {};
unicode_object->for_each_member([&](auto const& key, auto const& value) {
if (!desired_keywords.span().contains_slow(key))
return;
auto const& name = value.as_object().get_byte_string("_alias"sv).value();
cldr.keyword_names.set(key, name);
auto& keywords = cldr.keywords.ensure(key);
// FIXME: ECMA-402 requires the list of supported collation types to include "default", but
// that type does not appear in collation.json.
if (key == "co" && !keywords.contains_slow("default"sv))
keywords.append("default"sv);
value.as_object().for_each_member([&](auto const& keyword, auto const& properties) {
if (!properties.is_object())
return;
// Filter out values not permitted by ECMA-402.
// https://tc39.es/ecma402/#sec-intl-collator-internal-slots
if (key == "co"sv && keyword.is_one_of("search"sv, "standard"sv))
return;
// https://tc39.es/ecma402/#sec-intl.numberformat-internal-slots
if (key == "nu"sv && keyword.is_one_of("finance"sv, "native"sv, "traditio"sv))
return;
if (auto const& preferred = properties.as_object().get_byte_string("_preferred"sv); preferred.has_value()) {
cldr.keyword_aliases.ensure(key).append({ preferred.value(), keyword });
return;
}
if (auto const& alias = properties.as_object().get_byte_string("_alias"sv); alias.has_value())
cldr.keyword_aliases.ensure(key).append({ keyword, alias.value() });
keywords.append(keyword);
});
});
return {};
}
static Optional<ByteString> find_keyword_alias(StringView key, StringView calendar, CLDR& cldr)
{
auto it = cldr.keyword_aliases.find(key);
if (it == cldr.keyword_aliases.end())
return {};
auto alias = it->value.find_if([&](auto const& alias) { return calendar == alias.alias; });
if (alias == it->value.end())
return {};
return alias->name;
}
static ErrorOr<void> parse_number_system_keywords(ByteString locale_numbers_path, CLDR& cldr, LocaleData& locale)
{
LexicalPath numbers_path(move(locale_numbers_path));
numbers_path = numbers_path.append("numbers.json"sv);
auto numbers = TRY(read_json_file(numbers_path.string()));
auto const& main_object = numbers.as_object().get_object("main"sv).value();
auto const& locale_object = main_object.get_object(numbers_path.parent().basename()).value();
auto const& locale_numbers_object = locale_object.get_object("numbers"sv).value();
auto const& default_numbering_system_object = locale_numbers_object.get_byte_string("defaultNumberingSystem"sv).value();
auto const& other_numbering_systems_object = locale_numbers_object.get_object("otherNumberingSystems"sv).value();
KeywordList keywords {};
auto append_numbering_system = [&](ByteString system_name) {
if (auto system_alias = find_keyword_alias("nu"sv, system_name, cldr); system_alias.has_value())
system_name = system_alias.release_value();
auto index = cldr.unique_strings.ensure(move(system_name));
if (!keywords.contains_slow(index))
keywords.append(move(index));
};
append_numbering_system(default_numbering_system_object);
other_numbering_systems_object.for_each_member([&](auto const&, JsonValue const& value) {
append_numbering_system(value.as_string());
});
locale_numbers_object.for_each_member([&](auto const& key, JsonValue const& value) {
if (!key.starts_with("defaultNumberingSystem-alt-"sv))
return;
append_numbering_system(value.as_string());
});
locale.number_system_keywords = cldr.unique_keyword_lists.ensure(move(keywords));
return {};
}
static ErrorOr<void> parse_calendar_keywords(ByteString locale_dates_path, CLDR& cldr, LocaleData& locale)
{
KeywordList keywords {};
TRY(Core::Directory::for_each_entry(locale_dates_path, Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
if (!entry.name.starts_with("ca-"sv))
return IterationDecision::Continue;
// The generic calendar is not a supported Unicode calendar key, so skip it:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar#unicode_calendar_keys
if (entry.name == "ca-generic.json"sv)
return IterationDecision::Continue;
auto locale_calendars_path = LexicalPath::join(directory.path().string(), entry.name).string();
LexicalPath calendars_path(move(locale_calendars_path));
auto calendars = TRY(read_json_file(calendars_path.string()));
auto const& main_object = calendars.as_object().get_object("main"sv).value();
auto const& locale_object = main_object.get_object(calendars_path.parent().basename()).value();
auto const& dates_object = locale_object.get_object("dates"sv).value();
auto const& calendars_object = dates_object.get_object("calendars"sv).value();
calendars_object.for_each_member([&](auto calendar_name, JsonValue const&) {
if (auto calendar_alias = find_keyword_alias("ca"sv, calendar_name, cldr); calendar_alias.has_value())
calendar_name = calendar_alias.release_value();
keywords.append(cldr.unique_strings.ensure(calendar_name));
});
return IterationDecision::Continue;
}));
locale.calendar_keywords = cldr.unique_keyword_lists.ensure(move(keywords));
return {};
}
static void fill_in_collation_keywords(CLDR& cldr, LocaleData& locale)
{
// FIXME: If collation data becomes available in the CLDR, parse per-locale ordering from there.
auto create_list_with_default_first = [&](auto key, auto default_value) {
auto& values = cldr.keywords.find(key)->value;
quick_sort(values, [&](auto const& lhs, auto const& rhs) {
if (lhs == default_value)
return true;
if (rhs == default_value)
return false;
return lhs < rhs;
});
KeywordList keywords;
keywords.ensure_capacity(values.size());
for (auto const& value : values)
keywords.append(cldr.unique_strings.ensure(value));
return cldr.unique_keyword_lists.ensure(move(keywords));
};
static auto kf_index = create_list_with_default_first("kf"sv, "upper"sv);
static auto kn_index = create_list_with_default_first("kn"sv, "true"sv);
locale.collation_case_keywords = kf_index;
locale.collation_numeric_keywords = kn_index;
}
static ErrorOr<void> parse_default_content_locales(ByteString core_path, CLDR& cldr)
{
LexicalPath default_content_path(move(core_path));
@ -324,7 +143,7 @@ static ErrorOr<void> define_aliases_without_scripts(CLDR& cldr)
return {};
}
static ErrorOr<void> parse_all_locales(ByteString bcp47_path, ByteString core_path, ByteString numbers_path, ByteString dates_path, CLDR& cldr)
static ErrorOr<void> parse_all_locales(ByteString core_path, ByteString numbers_path, CLDR& cldr)
{
LexicalPath core_supplemental_path(core_path);
core_supplemental_path = core_supplemental_path.append("supplemental"sv);
@ -343,28 +162,11 @@ static ErrorOr<void> parse_all_locales(ByteString bcp47_path, ByteString core_pa
return builder.to_byte_string();
};
TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/bcp47", bcp47_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
auto bcp47_path = LexicalPath::join(directory.path().string(), entry.name).string();
TRY(parse_unicode_extension_keywords(move(bcp47_path), cldr));
return IterationDecision::Continue;
}));
TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", numbers_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
auto numbers_path = LexicalPath::join(directory.path().string(), entry.name).string();
auto language = TRY(remove_variants_from_path(numbers_path));
auto& locale = cldr.locales.ensure(language);
TRY(parse_number_system_keywords(numbers_path, cldr, locale));
fill_in_collation_keywords(cldr, locale);
return IterationDecision::Continue;
}));
TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", dates_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr<IterationDecision> {
auto dates_path = LexicalPath::join(directory.path().string(), entry.name).string();
auto language = TRY(remove_variants_from_path(dates_path));
auto& locale = cldr.locales.ensure(language);
TRY(parse_calendar_keywords(dates_path, cldr, locale));
cldr.locales.ensure(language);
return IterationDecision::Continue;
}));
@ -388,20 +190,7 @@ namespace Locale {
)~~~");
auto locales = cldr.locales.keys();
auto keywords = cldr.keywords.keys();
generate_enum(generator, format_identifier, "Locale"sv, "None"sv, locales, cldr.locale_aliases);
generate_enum(generator, format_identifier, "Key"sv, {}, keywords);
for (auto& keyword : cldr.keywords) {
auto const& keyword_name = cldr.keyword_names.find(keyword.key)->value;
auto enum_name = ByteString::formatted("Keyword{}", format_identifier({}, keyword_name));
if (auto aliases = cldr.keyword_aliases.find(keyword.key); aliases != cldr.keyword_aliases.end())
generate_enum(generator, format_identifier, enum_name, {}, keyword.value, aliases->value);
else
generate_enum(generator, format_identifier, enum_name, {}, keyword.value);
}
generator.append(R"~~~(
}
@ -436,80 +225,9 @@ static ErrorOr<void> generate_unicode_locale_implementation(Core::InputBufferedF
namespace Locale {
)~~~");
cldr.unique_strings.generate(generator);
generate_available_values(generator, "get_available_calendars"sv, cldr.keywords.find("ca"sv)->value, cldr.keyword_aliases.find("ca"sv)->value,
[](auto calendar) {
// FIXME: Remove this filter when we support all calendars.
return calendar.is_one_of("gregory"sv, "iso8601"sv);
});
generate_available_values(generator, "get_available_collation_case_orderings"sv, cldr.keywords.find("kf"sv)->value, cldr.keyword_aliases.find("kf"sv)->value);
generate_available_values(generator, "get_available_collation_numeric_orderings"sv, cldr.keywords.find("kn"sv)->value, cldr.keyword_aliases.find("kn"sv)->value);
generate_available_values(generator, "get_available_collation_types"sv, cldr.keywords.find("co"sv)->value, cldr.keyword_aliases.find("co"sv)->value,
[](auto collation) {
// FIXME: Remove this filter when we support all collation types.
return collation == "default"sv;
});
generate_available_values(generator, "get_available_hour_cycles"sv, cldr.keywords.find("hc"sv)->value);
generate_available_values(generator, "get_available_number_systems"sv, cldr.keywords.find("nu"sv)->value);
generator.append(R"~~~(
ReadonlySpan<StringView> get_available_keyword_values(StringView key)
{
auto key_value = key_from_string(key);
if (!key_value.has_value())
return {};
switch (*key_value) {
case Key::Ca:
return get_available_calendars();
case Key::Co:
return get_available_collation_types();
case Key::Hc:
return get_available_hour_cycles();
case Key::Kf:
return get_available_collation_case_orderings();
case Key::Kn:
return get_available_collation_numeric_orderings();
case Key::Nu:
return get_available_number_systems();
}
VERIFY_NOT_REACHED();
}
)~~~");
cldr.unique_keyword_lists.generate(generator, string_index_type, "s_keyword_lists"sv);
auto append_mapping = [&](auto const& keys, auto const& map, auto type, auto name, auto mapping_getter) {
generator.set("type", type);
generator.set("name", name);
generator.set("size", ByteString::number(keys.size()));
generator.append(R"~~~(
static constexpr Array<@type@, @size@> @name@ { {)~~~");
bool first = true;
for (auto const& key : keys) {
auto const& value = map.find(key)->value;
auto mapping = mapping_getter(value);
generator.append(first ? " "sv : ", "sv);
generator.append(ByteString::number(mapping));
first = false;
}
generator.append(" } };");
};
auto locales = cldr.locales.keys();
quick_sort(locales);
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_calendar_keywords"sv, [&](auto const& locale) { return locale.calendar_keywords; });
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_case_keywords"sv, [&](auto const& locale) { return locale.collation_case_keywords; });
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_numeric_keywords"sv, [&](auto const& locale) { return locale.collation_numeric_keywords; });
append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_number_system_keywords"sv, [&](auto const& locale) { return locale.number_system_keywords; });
auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) -> ErrorOr<void> {
HashValueMap<ByteString> hashes;
TRY(hashes.try_ensure_capacity(values.size()));
@ -525,106 +243,8 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
};
TRY(append_from_string("Locale"sv, "locale"sv, cldr.locales.keys(), cldr.locale_aliases));
TRY(append_from_string("Key"sv, "key"sv, cldr.keywords.keys()));
for (auto const& keyword : cldr.keywords) {
auto const& keyword_name = cldr.keyword_names.find(keyword.key)->value;
auto enum_name = ByteString::formatted("Keyword{}", format_identifier({}, keyword_name));
auto enum_snake = ByteString::formatted("keyword_{}", keyword.key);
if (auto aliases = cldr.keyword_aliases.find(keyword.key); aliases != cldr.keyword_aliases.end())
TRY(append_from_string(enum_name, enum_snake, keyword.value, aliases->value));
else
TRY(append_from_string(enum_name, enum_snake, keyword.value));
}
generator.append(R"~~~(
static ReadonlySpan<@string_index_type@> find_keyword_indices(StringView locale, StringView key)
{
auto locale_value = locale_from_string(locale);
if (!locale_value.has_value())
return {};
auto key_value = key_from_string(key);
if (!key_value.has_value())
return {};
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
size_t keywords_index = 0;
switch (*key_value) {
case Key::Ca:
keywords_index = s_calendar_keywords.at(locale_index);
break;
case Key::Kf:
keywords_index = s_collation_case_keywords.at(locale_index);
break;
case Key::Kn:
keywords_index = s_collation_numeric_keywords.at(locale_index);
break;
case Key::Nu:
keywords_index = s_number_system_keywords.at(locale_index);
break;
default:
VERIFY_NOT_REACHED();
}
return s_keyword_lists.at(keywords_index);
}
Optional<StringView> get_preferred_keyword_value_for_locale(StringView locale, StringView key)
{
// Hour cycle keywords are region-based rather than locale-based, so they need to be handled specially.
// FIXME: Calendar keywords are also region-based, and will need to be handled here when we support non-Gregorian calendars:
// https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/calendarPreferenceData.json
if (key == "hc"sv) {
if (auto hour_cycle = default_hour_cycle(locale); hour_cycle.has_value())
return hour_cycle_to_string(*hour_cycle);
return {};
}
// FIXME: Generate locale-preferred collation data when available in the CLDR.
if (key == "co"sv) {
auto collations = get_available_collation_types();
if (collations.is_empty())
return OptionalNone {};
return Optional<StringView> { collations[0] };
}
auto keyword_indices = find_keyword_indices(locale, key);
if (keyword_indices.is_empty())
return OptionalNone {};
return Optional<StringView> { decode_string(keyword_indices[0]) };
}
Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
{
// Hour cycle keywords are region-based rather than locale-based, so they need to be handled specially.
// FIXME: Calendar keywords are also region-based, and will need to be handled here when we support non-Gregorian calendars:
// https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/calendarPreferenceData.json
if (key == "hc"sv) {
if (auto hour_cycle = default_hour_cycle(locale); hour_cycle.has_value())
return { hour_cycle_to_string(*hour_cycle) };
return {};
}
// FIXME: Generate locale-preferred collation data when available in the CLDR.
if (key == "co"sv)
return Vector<StringView> { get_available_collation_types() };
auto keyword_indices = find_keyword_indices(locale, key);
Vector<StringView> keywords;
keywords.ensure_capacity(keyword_indices.size());
for (auto keyword : keyword_indices)
keywords.unchecked_append(decode_string(keyword));
return keywords;
}
}
)~~~");
@ -636,25 +256,21 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
{
StringView generated_header_path;
StringView generated_implementation_path;
StringView bcp47_path;
StringView core_path;
StringView numbers_path;
StringView dates_path;
Core::ArgsParser args_parser;
args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
args_parser.add_option(bcp47_path, "Path to cldr-bcp47 directory", "bcp47-path", 'b', "bcp47-path");
args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path");
args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path");
args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
args_parser.parse(arguments);
auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
CLDR cldr;
TRY(parse_all_locales(bcp47_path, core_path, numbers_path, dates_path, cldr));
TRY(parse_all_locales(core_path, numbers_path, cldr));
TRY(generate_unicode_locale_header(*generated_header_file, cldr));
TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr));

View File

@ -17,6 +17,7 @@
#include <LibJS/Runtime/Intl/Locale.h>
#include <LibJS/Runtime/ValueInlines.h>
#include <LibLocale/Locale.h>
#include <LibLocale/UnicodeKeywords.h>
namespace JS::Intl {
@ -415,15 +416,11 @@ LocaleResult resolve_locale(Vector<String> const& requested_locales, LocaleOptio
// b. Assert: Type(foundLocaleData) is Record.
// c. Let keyLocaleData be foundLocaleData.[[<key>]].
// d. Assert: Type(keyLocaleData) is List.
auto key_locale_data = ::Locale::get_available_keyword_values(key);
auto key_locale_data = ::Locale::available_keyword_values(found_locale, key);
// e. Let value be keyLocaleData[0].
// f. Assert: Type(value) is either String or Null.
// NOTE: ECMA-402 assumes keyLocaleData is sorted by locale preference. Our list is sorted
// alphabetically, so we get the locale's preferred value from LibUnicode.
Optional<String> value;
if (auto preference = ::Locale::get_preferred_keyword_value_for_locale(found_locale, key); preference.has_value())
value = MUST(String::from_utf8(*preference));
auto value = key_locale_data[0];
// g. Let supportedExtensionAddition be "".
Optional<::Locale::Keyword> supported_extension_addition {};
@ -482,7 +479,7 @@ LocaleResult resolve_locale(Vector<String> const& requested_locales, LocaleOptio
// iv. If SameValue(optionsValue, value) is false and keyLocaleData contains optionsValue, then
if (options_value.has_value() && (options_value != value) && key_locale_data.contains_slow(*options_value)) {
// 1. Let value be optionsValue.
value = move(options_value);
value = options_value.release_value();
// 2. Let supportedExtensionAddition be "".
supported_extension_addition.clear();

View File

@ -23,6 +23,7 @@
#include <LibLocale/DateTimeFormat.h>
#include <LibLocale/Locale.h>
#include <LibLocale/NumberFormat.h>
#include <LibLocale/UnicodeKeywords.h>
namespace JS::Intl {
@ -121,23 +122,22 @@ JS_DEFINE_NATIVE_FUNCTION(Intl::supported_values_of)
// 2. If key is "calendar", then
if (key == "calendar"sv) {
// a. Let list be ! AvailableCanonicalCalendars( ).
list = ::Locale::get_available_calendars();
list = ::Locale::available_calendars().span();
}
// 3. Else if key is "collation", then
else if (key == "collation"sv) {
// a. Let list be ! AvailableCanonicalCollations( ).
list = ::Locale::get_available_collation_types();
list = ::Locale::available_collations().span();
}
// 4. Else if key is "currency", then
else if (key == "currency"sv) {
// a. Let list be ! AvailableCanonicalCurrencies( ).
static auto const currencies = ::Locale::available_currencies();
list = currencies.span();
list = ::Locale::available_currencies().span();
}
// 5. Else if key is "numberingSystem", then
else if (key == "numberingSystem"sv) {
// a. Let list be ! AvailableCanonicalNumberingSystems( ).
list = ::Locale::get_available_number_systems();
list = ::Locale::available_number_systems().span();
}
// 6. Else if key is "timeZone", then
else if (key == "timeZone"sv) {

View File

@ -10,6 +10,7 @@
#include <LibJS/Runtime/Intl/Locale.h>
#include <LibLocale/DateTimeFormat.h>
#include <LibLocale/Locale.h>
#include <LibLocale/UnicodeKeywords.h>
#include <LibTimeZone/TimeZone.h>
namespace JS::Intl {
@ -38,7 +39,7 @@ Locale::Locale(Object& prototype)
}
// 1.1.1 CreateArrayFromListOrRestricted ( list , restricted )
static NonnullGCPtr<Array> create_array_from_list_or_restricted(VM& vm, Vector<StringView> list, Optional<String> restricted)
static NonnullGCPtr<Array> create_array_from_list_or_restricted(VM& vm, Vector<String> list, Optional<String> restricted)
{
auto& realm = *vm.current_realm();
@ -49,8 +50,8 @@ static NonnullGCPtr<Array> create_array_from_list_or_restricted(VM& vm, Vector<S
}
// 2. Return ! CreateArrayFromList( list ).
return Array::create_from<StringView>(realm, list, [&vm](auto value) {
return PrimitiveString::create(vm, MUST(String::from_utf8(value)));
return Array::create_from<String>(realm, list, [&vm](auto value) {
return PrimitiveString::create(vm, move(value));
});
}
@ -67,7 +68,7 @@ NonnullGCPtr<Array> calendars_of_locale(VM& vm, Locale const& locale_object)
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique canonical calendar identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for date and time formatting in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "ca"sv);
auto list = ::Locale::available_calendars(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted));
@ -86,7 +87,7 @@ NonnullGCPtr<Array> collations_of_locale(VM& vm, Locale const& locale_object)
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique canonical collation identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, ordered as if an Array of the same values had been sorted, using %Array.prototype.sort% using undefined as comparefn, of those in common use for string comparison in locale. The values "standard" and "search" must be excluded from list.
auto list = ::Locale::get_keywords_for_locale(locale, "co"sv);
auto list = ::Locale::available_collations(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted));
@ -105,7 +106,7 @@ NonnullGCPtr<Array> hour_cycles_of_locale(VM& vm, Locale const& locale_object)
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique hour cycle identifiers, which must be lower case String values indicating either the 12-hour format ("h11", "h12") or the 24-hour format ("h23", "h24"), sorted in descending preference of those in common use for date and time formatting in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "hc"sv);
auto list = ::Locale::available_hour_cycles(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted));
@ -124,7 +125,7 @@ NonnullGCPtr<Array> numbering_systems_of_locale(VM& vm, Locale const& locale_obj
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
// 4. Let list be a List of 1 or more unique canonical numbering system identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for formatting numeric values in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "nu"sv);
auto list = ::Locale::available_number_systems(locale);
// 5. Return ! CreateArrayFromListOrRestricted( list, restricted ).
return create_array_from_list_or_restricted(vm, move(list), move(restricted));

View File

@ -67,14 +67,14 @@ describe("correct behavior", () => {
});
test("numeric option limited to known 'kn' values", () => {
["true", "foo"].forEach(numeric => {
["false", "foo"].forEach(numeric => {
const en = new Intl.Collator(`en-u-kn-${numeric}`);
expect(en.resolvedOptions().numeric).toBeTrue();
expect(en.resolvedOptions().numeric).toBeFalse();
});
["true", "foo"].forEach(numeric => {
["false", "foo"].forEach(numeric => {
const el = new Intl.Collator(`el-u-kn-${numeric}`);
expect(el.resolvedOptions().numeric).toBeTrue();
expect(el.resolvedOptions().numeric).toBeFalse();
});
});
@ -95,14 +95,14 @@ describe("correct behavior", () => {
});
test("caseFirst option limited to known 'kf' values", () => {
["upper", "foo"].forEach(caseFirst => {
["false", "foo"].forEach(caseFirst => {
const en = Intl.Collator(`en-u-kf-${caseFirst}`);
expect(en.resolvedOptions().caseFirst).toBe("upper");
expect(en.resolvedOptions().caseFirst).toBe("false");
});
["upper", "foo"].forEach(caseFirst => {
["false", "foo"].forEach(caseFirst => {
const el = Intl.Collator(`el-u-kf-${caseFirst}`);
expect(el.resolvedOptions().caseFirst).toBe("upper");
expect(el.resolvedOptions().caseFirst).toBe("false");
});
});
});

View File

@ -59,8 +59,8 @@ describe("correct behavior", () => {
});
test("numberingSystem option overrides locale extension", () => {
const el = Intl.DateTimeFormat("el-u-nu-latn", { numberingSystem: "grek" });
expect(el.resolvedOptions().numberingSystem).toBe("grek");
const el = Intl.DateTimeFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("adlm");
});
test("numberingSystem option limited to known 'nu' values", () => {
@ -74,12 +74,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn");
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.DateTimeFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.DateTimeFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});

View File

@ -28,8 +28,8 @@ describe("correct behavior", () => {
});
test("numberingSystem option overrides locale extension", () => {
const el = new Intl.DurationFormat("el-u-nu-latn", { numberingSystem: "grek" });
expect(el.resolvedOptions().numberingSystem).toBe("grek");
const el = new Intl.DurationFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("adlm");
});
test("numberingSystem option limited to known 'nu' values", () => {
@ -43,12 +43,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn");
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.DurationFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.DurationFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});

View File

@ -7,25 +7,29 @@ describe("errors", () => {
});
describe("normal behavior", () => {
test("basic functionality", () => {
expect(Array.isArray(new Intl.Locale("en").getCalendars())).toBeTrue();
expect(new Intl.Locale("en").getCalendars()).toEqual(["gregory"]);
const testCalendars = (locale, expected) => {
const result = locale.getCalendars();
expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ar").getCalendars())).toBeTrue();
expect(new Intl.Locale("ar").getCalendars()).toEqual(["gregory"]);
for (const entry of expected) {
expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testCalendars(new Intl.Locale("en"), ["gregory"]);
testCalendars(new Intl.Locale("ar"), ["gregory"]);
});
test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-ca-islamicc").getCalendars()).toEqual(["islamic-civil"]);
expect(new Intl.Locale("en", { calendar: "dangi" }).getCalendars()).toEqual(["dangi"]);
testCalendars(new Intl.Locale("en-u-ca-islamicc"), ["islamic-civil"]);
testCalendars(new Intl.Locale("en", { calendar: "dangi" }), ["dangi"]);
expect(new Intl.Locale("ar-u-ca-ethiopic-amete-alem").getCalendars()).toEqual(["ethioaa"]);
expect(new Intl.Locale("ar", { calendar: "hebrew" }).getCalendars()).toEqual(["hebrew"]);
testCalendars(new Intl.Locale("ar-u-ca-ethiopic-amete-alem"), ["ethioaa"]);
testCalendars(new Intl.Locale("ar", { calendar: "hebrew" }), ["hebrew"]);
// Invalid calendars also take precedence.
expect(new Intl.Locale("en-u-ca-ladybird").getCalendars()).toEqual(["ladybird"]);
expect(new Intl.Locale("en", { calendar: "ladybird" }).getCalendars()).toEqual([
"ladybird",
]);
testCalendars(new Intl.Locale("en-u-ca-ladybird"), ["ladybird"]);
testCalendars(new Intl.Locale("en", { calendar: "ladybird" }), ["ladybird"]);
});
});

View File

@ -7,27 +7,29 @@ describe("errors", () => {
});
describe("normal behavior", () => {
test("basic functionality", () => {
expect(Array.isArray(new Intl.Locale("en").getCollations())).toBeTrue();
expect(new Intl.Locale("en").getCollations()).toEqual(["default"]);
const testCollations = (locale, expected) => {
const result = locale.getCollations();
expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ar").getCollations())).toBeTrue();
expect(new Intl.Locale("ar").getCollations()).toEqual(["default"]);
for (const entry of expected) {
expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testCollations(new Intl.Locale("en"), ["default"]);
testCollations(new Intl.Locale("ar"), ["default"]);
});
test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-co-compat").getCollations()).toEqual(["compat"]);
expect(new Intl.Locale("en", { collation: "compat" }).getCollations()).toEqual(["compat"]);
testCollations(new Intl.Locale("en-u-co-compat"), ["compat"]);
testCollations(new Intl.Locale("en", { collation: "compat" }), ["compat"]);
expect(new Intl.Locale("ar-u-co-reformed").getCollations()).toEqual(["reformed"]);
expect(new Intl.Locale("ar", { collation: "reformed" }).getCollations()).toEqual([
"reformed",
]);
testCollations(new Intl.Locale("ar-u-co-reformed"), ["reformed"]);
testCollations(new Intl.Locale("ar", { collation: "reformed" }), ["reformed"]);
// Invalid getCollations() also take precedence.
expect(new Intl.Locale("en-u-co-ladybird").getCollations()).toEqual(["ladybird"]);
expect(new Intl.Locale("en", { collation: "ladybird" }).getCollations()).toEqual([
"ladybird",
]);
testCollations(new Intl.Locale("en-u-co-ladybird"), ["ladybird"]);
testCollations(new Intl.Locale("en", { collation: "ladybird" }), ["ladybird"]);
});
});

View File

@ -7,23 +7,29 @@ describe("errors", () => {
});
describe("normal behavior", () => {
test("basic functionality", () => {
expect(Array.isArray(new Intl.Locale("en").getHourCycles())).toBeTrue();
expect(new Intl.Locale("en").getHourCycles()).toContain("h12");
const testHourCycles = (locale, expected) => {
const result = locale.getHourCycles();
expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ha").getHourCycles())).toBeTrue();
expect(new Intl.Locale("ha").getHourCycles()).toContain("h23");
for (const entry of expected) {
expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testHourCycles(new Intl.Locale("en"), ["h12"]);
testHourCycles(new Intl.Locale("ha"), ["h23"]);
});
test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-hc-h24").getHourCycles()).toEqual(["h24"]);
expect(new Intl.Locale("en", { hourCycle: "h24" }).getHourCycles()).toEqual(["h24"]);
testHourCycles(new Intl.Locale("en-u-hc-h24"), ["h24"]);
testHourCycles(new Intl.Locale("en", { collation: "h24" }), ["h24"]);
expect(new Intl.Locale("ar-u-hc-h24").getHourCycles()).toEqual(["h24"]);
expect(new Intl.Locale("ar", { hourCycle: "h24" }).getHourCycles()).toEqual(["h24"]);
testHourCycles(new Intl.Locale("ar-u-hc-h24"), ["h24"]);
testHourCycles(new Intl.Locale("ar", { collation: "h24" }), ["h24"]);
// Invalid hourCycles also take precedence when specified in the locale string. Unlike other
// properties, Locale("en", { hourCycle: "ladybird" }) will explicitly throw.
expect(new Intl.Locale("en-u-hc-ladybird").getHourCycles()).toEqual(["ladybird"]);
testHourCycles(new Intl.Locale("en-u-hc-ladybird"), ["ladybird"]);
});
});

View File

@ -7,29 +7,29 @@ describe("errors", () => {
});
describe("normal behavior", () => {
test("basic functionality", () => {
expect(Array.isArray(new Intl.Locale("en").getNumberingSystems())).toBeTrue();
expect(new Intl.Locale("en").getNumberingSystems()).toEqual(["latn"]);
const testNumberingSystems = (locale, expected) => {
const result = locale.getNumberingSystems();
expect(Array.isArray(result)).toBeTrue();
expect(Array.isArray(new Intl.Locale("ar").getNumberingSystems())).toBeTrue();
expect(new Intl.Locale("ar").getNumberingSystems()).toEqual(["arab", "latn"]);
for (const entry of expected) {
expect(result).toContain(entry);
}
};
test("basic functionality", () => {
testNumberingSystems(new Intl.Locale("en"), ["latn"]);
testNumberingSystems(new Intl.Locale("ar"), ["arab", "latn"]);
});
test("extension keyword overrides default data", () => {
expect(new Intl.Locale("en-u-nu-deva").getNumberingSystems()).toEqual(["deva"]);
expect(new Intl.Locale("en", { numberingSystem: "deva" }).getNumberingSystems()).toEqual([
"deva",
]);
testNumberingSystems(new Intl.Locale("en-u-nu-deva"), ["deva"]);
testNumberingSystems(new Intl.Locale("en", { numberingSystem: "deva" }), ["deva"]);
expect(new Intl.Locale("ar-u-nu-bali").getNumberingSystems()).toEqual(["bali"]);
expect(new Intl.Locale("ar", { numberingSystem: "bali" }).getNumberingSystems()).toEqual([
"bali",
]);
testNumberingSystems(new Intl.Locale("ar-u-nu-bali"), ["bali"]);
testNumberingSystems(new Intl.Locale("ar", { numberingSystem: "bali" }), ["bali"]);
// Invalid numberingSystems also take precedence.
expect(new Intl.Locale("en-u-nu-ladybird").getNumberingSystems()).toEqual(["ladybird"]);
expect(
new Intl.Locale("en", { numberingSystem: "ladybird" }).getNumberingSystems()
).toEqual(["ladybird"]);
testNumberingSystems(new Intl.Locale("en-u-nu-ladybird"), ["ladybird"]);
testNumberingSystems(new Intl.Locale("en", { numberingSystem: "ladybird" }), ["ladybird"]);
});
});

View File

@ -31,8 +31,8 @@ describe("correct behavior", () => {
});
test("numberingSystem option overrides locale extension", () => {
const el = Intl.NumberFormat("el-u-nu-latn", { numberingSystem: "grek" });
expect(el.resolvedOptions().numberingSystem).toBe("grek");
const el = Intl.NumberFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("adlm");
});
test("numberingSystem option limited to known 'nu' values", () => {
@ -46,12 +46,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn");
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.NumberFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = Intl.NumberFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});

View File

@ -31,8 +31,8 @@ describe("correct behavior", () => {
});
test("numberingSystem option overrides locale extension", () => {
const el = new Intl.RelativeTimeFormat("el-u-nu-latn", { numberingSystem: "grek" });
expect(el.resolvedOptions().numberingSystem).toBe("grek");
const el = new Intl.RelativeTimeFormat("el-u-nu-latn", { numberingSystem: "adlm" });
expect(el.resolvedOptions().numberingSystem).toBe("adlm");
});
test("numberingSystem option limited to known 'nu' values", () => {
@ -46,12 +46,12 @@ describe("correct behavior", () => {
expect(en.resolvedOptions().numberingSystem).toBe("latn");
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.RelativeTimeFormat("el", { numberingSystem: numberingSystem });
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});
["latn", "grek"].forEach(numberingSystem => {
["latn", "adlm"].forEach(numberingSystem => {
const el = new Intl.RelativeTimeFormat(`el-u-nu-${numberingSystem}`);
expect(el.resolvedOptions().numberingSystem).toBe(numberingSystem);
});

View File

@ -19,6 +19,7 @@ set(SOURCES
NumberFormat.cpp
PluralRules.cpp
RelativeTimeFormat.cpp
UnicodeKeywords.cpp
)
serenity_lib(LibLocale locale)

View File

@ -12,13 +12,6 @@ namespace Locale {
enum class CalendarPatternStyle : u8;
enum class HourCycle : u8;
enum class Key : u8;
enum class KeywordCalendar : u8;
enum class KeywordCollation : u8;
enum class KeywordColCaseFirst : u8;
enum class KeywordColNumeric : u8;
enum class KeywordHours : u8;
enum class KeywordNumbers : u8;
enum class Locale : u16;
enum class PluralCategory : u8;
enum class Style : u8;

View File

@ -13,6 +13,7 @@
#include <unicode/dtptngen.h>
#include <unicode/locdspnm.h>
#include <unicode/numsys.h>
#include <unicode/tznames.h>
#include <unicode/unistr.h>
@ -74,6 +75,23 @@ icu::LocaleDisplayNames& LocaleData::dialect_display_names()
return *m_dialect_display_names;
}
icu::NumberingSystem& LocaleData::numbering_system()
{
if (!m_numbering_system) {
UErrorCode status = U_ZERO_ERROR;
m_numbering_system = adopt_own_if_nonnull(icu::NumberingSystem::createInstance(locale(), status));
if (icu_failure(status)) {
status = U_ZERO_ERROR;
m_numbering_system = adopt_own_if_nonnull(icu::NumberingSystem::createInstance("und", status));
VERIFY(icu_success(status));
}
}
return *m_numbering_system;
}
icu::DateTimePatternGenerator& LocaleData::date_time_pattern_generator()
{
if (!m_date_time_pattern_generator) {

View File

@ -22,6 +22,7 @@
U_NAMESPACE_BEGIN
class DateTimePatternGenerator;
class LocaleDisplayNames;
class NumberingSystem;
class TimeZoneNames;
class UnicodeString;
U_NAMESPACE_END
@ -39,6 +40,8 @@ public:
icu::LocaleDisplayNames& standard_display_names();
icu::LocaleDisplayNames& dialect_display_names();
icu::NumberingSystem& numbering_system();
icu::DateTimePatternGenerator& date_time_pattern_generator();
icu::TimeZoneNames& time_zone_names();
@ -54,9 +57,9 @@ private:
OwnPtr<icu::LocaleDisplayNames> m_standard_display_names;
OwnPtr<icu::LocaleDisplayNames> m_dialect_display_names;
OwnPtr<icu::NumberingSystem> m_numbering_system;
OwnPtr<icu::DateTimePatternGenerator> m_date_time_pattern_generator;
OwnPtr<icu::TimeZoneNames> m_time_zone_names;
Optional<DigitalFormat> m_digital_format;
};

View File

@ -9,16 +9,12 @@
#include <AK/AllOf.h>
#include <AK/GenericLexer.h>
#include <AK/QuickSort.h>
#include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h>
#include <LibLocale/DateTimeFormat.h>
#include <LibLocale/ICU.h>
#include <LibLocale/Locale.h>
#include <LibUnicode/CharacterTypes.h>
#include <unicode/localebuilder.h>
#include <unicode/locid.h>
#include <unicode/ucurr.h>
namespace Locale {
@ -548,52 +544,7 @@ StringView style_to_string(Style style)
}
}
ReadonlySpan<StringView> __attribute__((weak)) get_available_keyword_values(StringView) { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_calendars() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_case_orderings() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_numeric_orderings() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_collation_types() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_hour_cycles() { return {}; }
ReadonlySpan<StringView> __attribute__((weak)) get_available_number_systems() { return {}; }
Optional<Locale> __attribute__((weak)) locale_from_string(StringView) { return {}; }
Optional<Key> __attribute__((weak)) key_from_string(StringView) { return {}; }
Optional<KeywordCalendar> __attribute__((weak)) keyword_ca_from_string(StringView) { return {}; }
Optional<KeywordCollation> __attribute__((weak)) keyword_co_from_string(StringView) { return {}; }
Optional<KeywordHours> __attribute__((weak)) keyword_hc_from_string(StringView) { return {}; }
Optional<KeywordColCaseFirst> __attribute__((weak)) keyword_kf_from_string(StringView) { return {}; }
Optional<KeywordColNumeric> __attribute__((weak)) keyword_kn_from_string(StringView) { return {}; }
Optional<KeywordNumbers> __attribute__((weak)) keyword_nu_from_string(StringView) { return {}; }
Vector<StringView> __attribute__((weak)) get_keywords_for_locale(StringView, StringView) { return {}; }
Optional<StringView> __attribute__((weak)) get_preferred_keyword_value_for_locale(StringView, StringView) { return {}; }
Vector<String> available_currencies()
{
UErrorCode status = U_ZERO_ERROR;
auto* currencies = ucurr_openISOCurrencies(UCURR_ALL, &status);
ScopeGuard guard { [&]() { uenum_close(currencies); } };
if (icu_failure(status))
return {};
Vector<String> result;
while (true) {
i32 length = 0;
char const* next = uenum_next(currencies, &length, &status);
if (icu_failure(status))
return {};
if (next == nullptr)
break;
// https://unicode-org.atlassian.net/browse/ICU-21687
if (StringView currency { next, static_cast<size_t>(length) }; currency != "LSM"sv)
result.append(MUST(String::from_utf8(currency)));
}
return result;
}
static void apply_extensions_to_locale(icu::Locale& locale, icu::Locale const& locale_with_extensions)
{

View File

@ -133,29 +133,10 @@ void canonicalize_unicode_extension_values(StringView key, String& value);
StringView default_locale();
bool is_locale_available(StringView locale);
ReadonlySpan<StringView> get_available_keyword_values(StringView key);
ReadonlySpan<StringView> get_available_calendars();
ReadonlySpan<StringView> get_available_collation_case_orderings();
ReadonlySpan<StringView> get_available_collation_numeric_orderings();
ReadonlySpan<StringView> get_available_collation_types();
ReadonlySpan<StringView> get_available_hour_cycles();
ReadonlySpan<StringView> get_available_number_systems();
Vector<String> available_currencies();
Style style_from_string(StringView style);
StringView style_to_string(Style style);
Optional<Locale> locale_from_string(StringView locale);
Optional<Key> key_from_string(StringView key);
Optional<KeywordCalendar> keyword_ca_from_string(StringView ca);
Optional<KeywordCollation> keyword_co_from_string(StringView co);
Optional<KeywordHours> keyword_hc_from_string(StringView hc);
Optional<KeywordColCaseFirst> keyword_kf_from_string(StringView kf);
Optional<KeywordColNumeric> keyword_kn_from_string(StringView kn);
Optional<KeywordNumbers> keyword_nu_from_string(StringView nu);
Vector<StringView> get_keywords_for_locale(StringView locale, StringView key);
Optional<StringView> get_preferred_keyword_value_for_locale(StringView locale, StringView key);
Optional<String> add_likely_subtags(StringView);
Optional<String> remove_likely_subtags(StringView);

View File

@ -0,0 +1,231 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#define AK_DONT_REPLACE_STD
#include <AK/QuickSort.h>
#include <AK/ScopeGuard.h>
#include <LibLocale/DateTimeFormat.h>
#include <LibLocale/ICU.h>
#include <LibLocale/UnicodeKeywords.h>
#include <unicode/calendar.h>
#include <unicode/coll.h>
#include <unicode/locid.h>
#include <unicode/numsys.h>
#include <unicode/ucurr.h>
namespace Locale {
template<typename Filter>
static Vector<String> icu_string_enumeration_to_list(OwnPtr<icu::StringEnumeration> enumeration, Filter&& filter)
{
UErrorCode status = U_ZERO_ERROR;
Vector<String> result;
if (!enumeration)
return {};
while (true) {
i32 length = 0;
auto const* keyword = enumeration->next(&length, status);
if (icu_failure(status) || keyword == nullptr)
break;
if (!filter(keyword))
continue;
result.append(MUST(String::from_utf8({ keyword, static_cast<size_t>(length) })));
}
return result;
}
static Vector<String> icu_string_enumeration_to_list(OwnPtr<icu::StringEnumeration> enumeration)
{
return icu_string_enumeration_to_list(move(enumeration), [](char const*) { return true; });
}
Vector<String> available_keyword_values(StringView locale, StringView key)
{
if (key == "ca"sv)
return available_calendars(locale);
if (key == "co"sv)
return available_collations(locale);
if (key == "hc"sv)
return available_hour_cycles(locale);
if (key == "kf"sv)
return available_collation_case_orderings();
if (key == "kn"sv)
return available_collation_numeric_orderings();
if (key == "nu"sv)
return available_number_systems(locale);
TODO();
}
Vector<String> const& available_calendars()
{
static auto calendars = []() {
auto calendars = available_calendars("und"sv);
quick_sort(calendars);
return calendars;
}();
return calendars;
}
Vector<String> available_calendars(StringView locale)
{
UErrorCode status = U_ZERO_ERROR;
auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value())
return {};
auto keywords = adopt_own_if_nonnull(icu::Calendar::getKeywordValuesForLocale("calendar", locale_data->locale(), 0, status));
if (icu_failure(status))
return {};
auto calendars = icu_string_enumeration_to_list(move(keywords));
for (auto& calendar : calendars) {
if (calendar == "gregorian"sv)
calendar = "gregory"_string;
else if (calendar == "ethiopic-amete-alem"sv)
calendar = "ethioaa"_string;
}
return calendars;
}
Vector<String> const& available_currencies()
{
static auto currencies = []() -> Vector<String> {
UErrorCode status = U_ZERO_ERROR;
auto* currencies = ucurr_openISOCurrencies(UCURR_ALL, &status);
ScopeGuard guard { [&]() { uenum_close(currencies); } };
if (icu_failure(status))
return {};
Vector<String> result;
while (true) {
i32 length = 0;
char const* next = uenum_next(currencies, &length, &status);
if (icu_failure(status))
return {};
if (next == nullptr)
break;
// https://unicode-org.atlassian.net/browse/ICU-21687
if (StringView currency { next, static_cast<size_t>(length) }; currency != "LSM"sv)
result.append(MUST(String::from_utf8(currency)));
}
quick_sort(result);
return result;
}();
return currencies;
}
Vector<String> const& available_collation_case_orderings()
{
static Vector<String> case_orderings { "false"_string, "lower"_string, "upper"_string };
return case_orderings;
}
Vector<String> const& available_collation_numeric_orderings()
{
static Vector<String> case_orderings { "false"_string, "true"_string };
return case_orderings;
}
Vector<String> const& available_collations()
{
// FIXME: Implement this when we fully support Intl.Collator.
static Vector<String> collations { "default"_string };
return collations;
}
Vector<String> available_collations(StringView)
{
// FIXME: Implement this when we fully support Intl.Collator.
return available_collations();
}
Vector<String> const& available_hour_cycles()
{
static Vector<String> case_orderings { "h11"_string, "h12"_string, "h23"_string, "h24"_string };
return case_orderings;
}
Vector<String> available_hour_cycles(StringView locale)
{
auto preferred_hour_cycle = default_hour_cycle(locale);
if (!preferred_hour_cycle.has_value())
return available_hour_cycles();
Vector<String> hour_cycles;
hour_cycles.append(MUST(String::from_utf8(hour_cycle_to_string(*preferred_hour_cycle))));
for (auto const& hour_cycle : available_hour_cycles()) {
if (hour_cycle != hour_cycles[0])
hour_cycles.append(hour_cycle);
}
return hour_cycles;
}
Vector<String> const& available_number_systems()
{
static auto number_systems = []() -> Vector<String> {
UErrorCode status = U_ZERO_ERROR;
auto keywords = adopt_own_if_nonnull(icu::NumberingSystem::getAvailableNames(status));
if (icu_failure(status))
return {};
auto number_systems = icu_string_enumeration_to_list(move(keywords), [&](char const* keyword) {
auto system = adopt_own_if_nonnull(icu::NumberingSystem::createInstanceByName(keyword, status));
if (icu_failure(status))
return false;
return !static_cast<bool>(system->isAlgorithmic());
});
quick_sort(number_systems);
return number_systems;
}();
return number_systems;
}
Vector<String> available_number_systems(StringView locale)
{
auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value())
return {};
Vector<String> number_systems;
auto const* preferred_number_system = locale_data->numbering_system().getName();
number_systems.append(MUST(String::from_utf8({ preferred_number_system, strlen(preferred_number_system) })));
for (auto const& number_system : available_number_systems()) {
if (number_system != number_systems[0])
number_systems.append(number_system);
}
return number_systems;
}
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Vector.h>
namespace Locale {
Vector<String> available_keyword_values(StringView locale, StringView key);
Vector<String> const& available_calendars();
Vector<String> available_calendars(StringView locale);
Vector<String> const& available_currencies();
Vector<String> const& available_collation_case_orderings();
Vector<String> const& available_collation_numeric_orderings();
Vector<String> const& available_collations();
Vector<String> available_collations(StringView locale);
Vector<String> const& available_hour_cycles();
Vector<String> available_hour_cycles(StringView locale);
Vector<String> const& available_number_systems();
Vector<String> available_number_systems(StringView locale);
}