From 5e2ee4447e41b9d194e034a32d4fc928118be6f1 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Fri, 14 Jun 2024 14:15:28 -0400 Subject: [PATCH] LibJS+LibLocale: Replace plural rules selection with ICU This uses ICU for all of the Intl.PluralRules prototypes, which lets us remove all data from our plural rules generator. Plural rules depend directly on internal data from the number formatter, so rather than creating a separate Locale::PluralRules class (which will make accessing that data awkward), this adds plural rules APIs to the existing Locale::NumberFormat. --- Meta/CMake/locale_data.cmake | 17 - .../CodeGenerators/LibLocale/CMakeLists.txt | 1 - .../LibLocale/GeneratePluralRulesData.cpp | 685 ------------------ .../LibJS/Runtime/Intl/PluralRules.cpp | 163 +---- .../LibJS/Runtime/Intl/PluralRules.h | 11 +- .../Runtime/Intl/PluralRulesConstructor.cpp | 103 ++- .../Runtime/Intl/PluralRulesConstructor.h | 4 +- .../Runtime/Intl/PluralRulesPrototype.cpp | 4 +- .../PluralRules.prototype.selectRange.js | 7 - Userland/Libraries/LibLocale/Forward.h | 1 - Userland/Libraries/LibLocale/NumberFormat.cpp | 88 +++ Userland/Libraries/LibLocale/NumberFormat.h | 6 + Userland/Libraries/LibLocale/PluralRules.cpp | 53 +- Userland/Libraries/LibLocale/PluralRules.h | 99 +-- 14 files changed, 216 insertions(+), 1026 deletions(-) delete mode 100644 Meta/Lagom/Tools/CodeGenerators/LibLocale/GeneratePluralRulesData.cpp diff --git a/Meta/CMake/locale_data.cmake b/Meta/CMake/locale_data.cmake index 21a47b82b95..e5fd642457c 100644 --- a/Meta/CMake/locale_data.cmake +++ b/Meta/CMake/locale_data.cmake @@ -18,9 +18,6 @@ set(CLDR_CORE_PATH "${CLDR_PATH}/${CLDR_CORE_SOURCE}") set(CLDR_DATES_SOURCE cldr-dates-modern) set(CLDR_DATES_PATH "${CLDR_PATH}/${CLDR_DATES_SOURCE}") -set(CLDR_LOCALES_SOURCE cldr-localenames-modern) -set(CLDR_LOCALES_PATH "${CLDR_PATH}/${CLDR_LOCALES_SOURCE}") - set(CLDR_NUMBERS_SOURCE cldr-numbers-modern) set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}") @@ -32,7 +29,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_BCP47_SOURCE}/**" "${CLDR_BCP47_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}") - extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_LOCALES_SOURCE}/**" "${CLDR_LOCALES_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}") else() message(STATUS "Skipping download of ${CLDR_ZIP_URL}, expecting the archive to have been extracted to ${CLDR_PATH}") @@ -41,9 +37,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) set(LOCALE_DATA_HEADER LocaleData.h) set(LOCALE_DATA_IMPLEMENTATION LocaleData.cpp) - set(PLURAL_RULES_DATA_HEADER PluralRulesData.h) - set(PLURAL_RULES_DATA_IMPLEMENTATION PluralRulesData.cpp) - invoke_generator( "LocaleData" Lagom::GenerateLocaleData @@ -52,19 +45,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${LOCALE_DATA_IMPLEMENTATION}" arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" ) - invoke_generator( - "PluralRulesData" - Lagom::GeneratePluralRulesData - "${CLDR_VERSION_FILE}" - "${PLURAL_RULES_DATA_HEADER}" - "${PLURAL_RULES_DATA_IMPLEMENTATION}" - arguments -r "${CLDR_CORE_PATH}" -l "${CLDR_LOCALES_PATH}" - ) set(LOCALE_DATA_SOURCES ${LOCALE_DATA_HEADER} ${LOCALE_DATA_IMPLEMENTATION} - ${PLURAL_RULES_DATA_HEADER} - ${PLURAL_RULES_DATA_IMPLEMENTATION} ) endif() diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt index f51f1ed909c..1a0a206154a 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt @@ -1,2 +1 @@ lagom_tool(GenerateLocaleData SOURCES GenerateLocaleData.cpp LIBS LibMain) -lagom_tool(GeneratePluralRulesData SOURCES GeneratePluralRulesData.cpp LIBS LibMain) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GeneratePluralRulesData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GeneratePluralRulesData.cpp deleted file mode 100644 index 7fa4f454073..00000000000 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GeneratePluralRulesData.cpp +++ /dev/null @@ -1,685 +0,0 @@ -/* - * Copyright (c) 2022, Tim Flynn - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common. -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static ByteString format_identifier(StringView owner, ByteString identifier) -{ - identifier = identifier.replace("-"sv, "_"sv, ReplaceMode::All); - - if (all_of(identifier, is_ascii_digit)) - return ByteString::formatted("{}_{}", owner[0], identifier); - if (is_ascii_lower_alpha(identifier[0])) - return ByteString::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1)); - return identifier; -} - -struct Relation { - using Range = Array; - using Comparator = Variant; - - enum class Type { - Equality, - Inequality, - }; - - ByteString const& modulus_variable_name() const - { - VERIFY(modulus.has_value()); - - if (!cached_modulus_variable_name.has_value()) - cached_modulus_variable_name = ByteString::formatted("mod_{}_{}", symbol, *modulus); - - return *cached_modulus_variable_name; - } - - ByteString const& exponential_variable_name() const - { - if (!cached_exponential_variable_name.has_value()) - cached_exponential_variable_name = ByteString::formatted("exp_{}", symbol); - - return *cached_exponential_variable_name; - } - - void generate_relation(SourceGenerator& generator) const - { - auto append_variable_name = [&]() { - if (modulus.has_value()) - generator.append(modulus_variable_name()); - else if (symbol == 'e' || symbol == 'c') - generator.append(exponential_variable_name()); - else - generator.append(ByteString::formatted("ops.{}", Locale::PluralOperands::symbol_to_variable_name(symbol))); - }; - - auto append_value = [&](u32 value) { - append_variable_name(); - generator.append(" == "sv); - generator.append(ByteString::number(value)); - }; - - auto append_range = [&](auto const& range) { - // This check avoids generating "0 <= unsigned_value", which is always true. - if (range[0] != 0 || Locale::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { - generator.append(ByteString::formatted("{} <= ", range[0])); - append_variable_name(); - generator.append(" && "sv); - } - - append_variable_name(); - generator.append(ByteString::formatted(" <= {}", range[1])); - }; - - if (type == Type::Inequality) - generator.append("!"sv); - - generator.append("("sv); - - bool first = true; - for (auto const& comparator : comparators) { - generator.append(first ? "("sv : " || ("sv); - - comparator.visit( - [&](u32 value) { append_value(value); }, - [&](Range const& range) { append_range(range); }); - - generator.append(")"sv); - first = false; - } - - generator.append(")"sv); - } - - void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const - { - // FIXME: How do we handle the exponential symbols? They seem unused by ECMA-402. - if (symbol == 'e' || symbol == 'c') { - if (auto variable = exponential_variable_name(); !generated_variables.contains(variable)) { - generated_variables.set(variable); - generator.set("variable"sv, move(variable)); - generator.append(R"~~~( - auto @variable@ = 0;)~~~"); - } - } - - if (!modulus.has_value()) - return; - - auto variable = modulus_variable_name(); - if (generated_variables.contains(variable)) - return; - - generated_variables.set(variable); - generator.set("variable"sv, move(variable)); - generator.set("operand"sv, Locale::PluralOperands::symbol_to_variable_name(symbol)); - generator.set("modulus"sv, ByteString::number(*modulus)); - - if (Locale::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { - generator.append(R"~~~( - auto @variable@ = fmod(ops.@operand@, @modulus@);)~~~"); - } else { - generator.append(R"~~~( - auto @variable@ = ops.@operand@ % @modulus@;)~~~"); - } - } - - Type type; - char symbol { 0 }; - Optional modulus; - Vector comparators; - -private: - mutable Optional cached_modulus_variable_name; - mutable Optional cached_exponential_variable_name; -}; - -struct Condition { - void generate_condition(SourceGenerator& generator) const - { - for (size_t i = 0; i < relations.size(); ++i) { - if (i > 0) - generator.append(" || "sv); - - auto const& conjunctions = relations[i]; - if (conjunctions.size() > 1) - generator.append("("sv); - - for (size_t j = 0; j < conjunctions.size(); ++j) { - if (j > 0) - generator.append(" && "sv); - conjunctions[j].generate_relation(generator); - } - - if (conjunctions.size() > 1) - generator.append(")"sv); - } - } - - void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const - { - for (auto const& conjunctions : relations) { - for (auto const& relation : conjunctions) - relation.generate_precomputed_variables(generator, generated_variables); - } - } - - Vector> relations; -}; - -struct Range { - ByteString start; - ByteString end; - ByteString category; -}; - -using Conditions = HashMap; -using Ranges = Vector; - -struct LocaleData { - static ByteString generated_method_name(StringView form, StringView locale) - { - return ByteString::formatted("{}_plurality_{}", form, format_identifier({}, locale)); - } - - Conditions& rules_for_form(StringView form) - { - if (form == "cardinal") - return cardinal_rules; - if (form == "ordinal") - return ordinal_rules; - VERIFY_NOT_REACHED(); - } - - Conditions cardinal_rules; - Conditions ordinal_rules; - Ranges plural_ranges; -}; - -struct CLDR { - UniqueStringStorage unique_strings; - - HashMap locales; -}; - -static Relation parse_relation(StringView relation) -{ - static constexpr auto equality_operator = " = "sv; - static constexpr auto inequality_operator = " != "sv; - static constexpr auto modulus_operator = " % "sv; - static constexpr auto range_operator = ".."sv; - static constexpr auto set_operator = ','; - - Relation parsed; - - StringView lhs; - StringView rhs; - - if (auto index = relation.find(equality_operator); index.has_value()) { - parsed.type = Relation::Type::Equality; - lhs = relation.substring_view(0, *index); - rhs = relation.substring_view(*index + equality_operator.length()); - } else if (auto index = relation.find(inequality_operator); index.has_value()) { - parsed.type = Relation::Type::Inequality; - lhs = relation.substring_view(0, *index); - rhs = relation.substring_view(*index + inequality_operator.length()); - } else { - VERIFY_NOT_REACHED(); - } - - if (auto index = lhs.find(modulus_operator); index.has_value()) { - auto symbol = lhs.substring_view(0, *index); - VERIFY(symbol.length() == 1); - - auto modulus = lhs.substring_view(*index + modulus_operator.length()).to_number(); - VERIFY(modulus.has_value()); - - parsed.symbol = symbol[0]; - parsed.modulus = move(modulus); - } else { - VERIFY(lhs.length() == 1); - parsed.symbol = lhs[0]; - } - - rhs.for_each_split_view(set_operator, SplitBehavior::Nothing, [&](auto set) { - if (auto index = set.find(range_operator); index.has_value()) { - auto range_begin = set.substring_view(0, *index).template to_number(); - VERIFY(range_begin.has_value()); - - auto range_end = set.substring_view(*index + range_operator.length()).template to_number(); - VERIFY(range_end.has_value()); - - parsed.comparators.empend(Array { *range_begin, *range_end }); - } else { - auto value = set.template to_number(); - VERIFY(value.has_value()); - - parsed.comparators.empend(*value); - } - }); - - return parsed; -} - -// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax -// -// A very simplified view of a plural rule is: -// -// condition.* ([@integer|@decimal] sample)+ -// -// The "sample" being series of integer or decimal values that fit the specified condition. The -// condition may be one or more binary expressions, chained together with "and" or "or" operators. -static void parse_condition(StringView category, StringView rule, Conditions& rules) -{ - static constexpr auto other_category = "other"sv; - static constexpr auto disjunction_keyword = " or "sv; - static constexpr auto conjunction_keyword = " and "sv; - - // We don't need the examples in the generated code, so we can drop them here. - auto example_index = rule.find('@'); - VERIFY(example_index.has_value()); - - auto condition = rule.substring_view(0, *example_index).trim_whitespace(); - - // Our implementation does not generate rules for the "other" category. We simply return "other" - // for values that do not match any rules. This will need to be revisited if this VERIFY fails. - if (condition.is_empty()) { - VERIFY(category == other_category); - return; - } - - auto& relation_list = rules.ensure(category); - - // The grammar for a condition (i.e. a chain of relations) is: - // - // condition = and_condition ('or' and_condition)* - // and_condition = relation ('and' relation)* - // - // This affords some simplicity in that disjunctions are never embedded within a conjunction. - condition.for_each_split_view(disjunction_keyword, SplitBehavior::Nothing, [&](auto disjunction) { - Vector conjunctions; - - disjunction.for_each_split_view(conjunction_keyword, SplitBehavior::Nothing, [&](auto relation) { - conjunctions.append(parse_relation(relation)); - }); - - relation_list.relations.append(move(conjunctions)); - }); -} - -static ErrorOr parse_plural_rules(ByteString core_supplemental_path, StringView file_name, CLDR& cldr) -{ - static constexpr auto form_prefix = "plurals-type-"sv; - static constexpr auto rule_prefix = "pluralRule-count-"sv; - - LexicalPath plurals_path(move(core_supplemental_path)); - plurals_path = plurals_path.append(file_name); - - auto plurals = TRY(read_json_file(plurals_path.string())); - auto const& supplemental_object = plurals.as_object().get_object("supplemental"sv).value(); - - supplemental_object.for_each_member([&](auto const& key, auto const& plurals_object) { - if (!key.starts_with(form_prefix)) - return; - - auto form = key.substring_view(form_prefix.length()); - - plurals_object.as_object().for_each_member([&](auto const& loc, auto const& rules) { - auto locale = cldr.locales.get(loc); - if (!locale.has_value()) - return; - - rules.as_object().for_each_member([&](auto const& key, auto const& condition) { - VERIFY(key.starts_with(rule_prefix)); - - auto category = key.substring_view(rule_prefix.length()); - parse_condition(category, condition.as_string(), locale->rules_for_form(form)); - }); - }); - }); - - return {}; -} - -// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Ranges -static ErrorOr parse_plural_ranges(ByteString core_supplemental_path, CLDR& cldr) -{ - static constexpr auto start_segment = "-start-"sv; - static constexpr auto end_segment = "-end-"sv; - - LexicalPath plural_ranges_path(move(core_supplemental_path)); - plural_ranges_path = plural_ranges_path.append("pluralRanges.json"sv); - - auto plural_ranges = TRY(read_json_file(plural_ranges_path.string())); - auto const& supplemental_object = plural_ranges.as_object().get_object("supplemental"sv).value(); - auto const& plurals_object = supplemental_object.get_object("plurals"sv).value(); - - plurals_object.for_each_member([&](auto const& loc, auto const& ranges_object) { - auto locale = cldr.locales.get(loc); - if (!locale.has_value()) - return; - - ranges_object.as_object().for_each_member([&](auto const& range, auto const& category) { - auto start_index = range.find(start_segment); - VERIFY(start_index.has_value()); - - auto end_index = range.find(end_segment); - VERIFY(end_index.has_value()); - - *start_index += start_segment.length(); - - auto start = range.substring(*start_index, *end_index - *start_index); - auto end = range.substring(*end_index + end_segment.length()); - - locale->plural_ranges.empend(move(start), move(end), category.as_string()); - }); - }); - - return {}; -} - -static ErrorOr parse_all_locales(ByteString core_path, ByteString locale_names_path, CLDR& cldr) -{ - LexicalPath core_supplemental_path(move(core_path)); - core_supplemental_path = core_supplemental_path.append("supplemental"sv); - VERIFY(FileSystem::is_directory(core_supplemental_path.string())); - - auto remove_variants_from_path = [&](ByteString path) -> ErrorOr { - auto parsed_locale = TRY(CanonicalLanguageID::parse(cldr.unique_strings, LexicalPath::basename(path))); - - StringBuilder builder; - builder.append(cldr.unique_strings.get(parsed_locale.language)); - if (auto script = cldr.unique_strings.get(parsed_locale.script); !script.is_empty()) - builder.appendff("-{}", script); - if (auto region = cldr.unique_strings.get(parsed_locale.region); !region.is_empty()) - builder.appendff("-{}", region); - - return builder.to_byte_string(); - }; - - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", locale_names_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto locale_path = LexicalPath::join(directory.path().string(), entry.name).string(); - auto language = TRY(remove_variants_from_path(locale_path)); - - cldr.locales.ensure(language); - return IterationDecision::Continue; - })); - - TRY(parse_plural_rules(core_supplemental_path.string(), "plurals.json"sv, cldr)); - TRY(parse_plural_rules(core_supplemental_path.string(), "ordinals.json"sv, cldr)); - TRY(parse_plural_ranges(core_supplemental_path.string(), cldr)); - return {}; -} - -static ErrorOr generate_unicode_locale_header(Core::InputBufferedFile& file, CLDR&) -{ - StringBuilder builder; - SourceGenerator generator { builder }; - - generator.append(R"~~~( -#pragma once - -#include - -namespace Locale { -)~~~"); - - generator.append(R"~~~( -} -)~~~"); - - TRY(file.write_until_depleted(generator.as_string_view().bytes())); - return {}; -} - -static ErrorOr generate_unicode_locale_implementation(Core::InputBufferedFile& file, CLDR& cldr) -{ - StringBuilder builder; - SourceGenerator generator { builder }; - - auto locales = cldr.locales.keys(); - quick_sort(locales); - - generator.append(R"~~~( -#include -#include -#include -#include -#include -#include - -namespace Locale { - -using PluralCategoryFunction = PluralCategory(*)(PluralOperands); -using PluralRangeFunction = PluralCategory(*)(PluralCategory, PluralCategory); - -static PluralCategory default_category(PluralOperands) -{ - return PluralCategory::Other; -} - -static PluralCategory default_range(PluralCategory, PluralCategory end) -{ - return end; -} - -)~~~"); - - auto append_rules = [&](auto form, auto const& locale, auto const& rules) { - if (rules.is_empty()) - return; - - generator.set("method"sv, LocaleData::generated_method_name(form, locale)); - HashTable generated_variables; - - generator.append(R"~~~( -static PluralCategory @method@([[maybe_unused]] PluralOperands ops) -{)~~~"); - - for (auto [category, condition] : rules) { - condition.generate_precomputed_variables(generator, generated_variables); - - generator.append(R"~~~( - if ()~~~"); - - generator.set("category"sv, format_identifier({}, category)); - condition.generate_condition(generator); - - generator.append(R"~~~() - return PluralCategory::@category@;)~~~"); - } - - generator.append(R"~~~( - return PluralCategory::Other; -} -)~~~"); - }; - - auto append_ranges = [&](auto const& locale, auto const& ranges) { - if (ranges.is_empty()) - return; - - generator.set("method"sv, LocaleData::generated_method_name("range"sv, locale)); - - generator.append(R"~~~( -static PluralCategory @method@(PluralCategory start, PluralCategory end) -{)~~~"); - - for (auto const& range : ranges) { - generator.set("start"sv, format_identifier({}, range.start)); - generator.set("end"sv, format_identifier({}, range.end)); - generator.set("category"sv, format_identifier({}, range.category)); - - generator.append(R"~~~( - if (start == PluralCategory::@start@ && end == PluralCategory::@end@) - return PluralCategory::@category@;)~~~"); - } - - generator.append(R"~~~( - return end; -} -)~~~"); - }; - - auto append_lookup_table = [&](auto type, auto form, auto default_, auto data_for_locale) { - generator.set("type"sv, type); - generator.set("form"sv, form); - generator.set("default"sv, default_); - generator.set("size"sv, ByteString::number(locales.size())); - - generator.append(R"~~~( -static constexpr Array<@type@, @size@> s_@form@_functions { {)~~~"); - - for (auto const& locale : locales) { - auto& rules = data_for_locale(cldr.locales.find(locale)->value, form); - - if (rules.is_empty()) { - generator.append(R"~~~( - @default@,)~~~"); - } else { - generator.set("method"sv, LocaleData::generated_method_name(form, locale)); - generator.append(R"~~~( - @method@,)~~~"); - } - } - - generator.append(R"~~~( -} }; -)~~~"); - }; - - auto append_categories = [&](auto const& name, auto const& rules) { - generator.set("name", name); - generator.set("size", ByteString::number(rules.size() + 1)); - - generator.append(R"~~~( -static constexpr Array @name@ { { PluralCategory::Other)~~~"); - - for (auto [category, condition] : rules) { - generator.set("category"sv, format_identifier({}, category)); - generator.append(", PluralCategory::@category@"sv); - } - - generator.append("} };"); - }; - - for (auto const& [locale, rules] : cldr.locales) { - append_rules("cardinal"sv, locale, rules.cardinal_rules); - append_rules("ordinal"sv, locale, rules.ordinal_rules); - append_ranges(locale, rules.plural_ranges); - } - - append_lookup_table("PluralCategoryFunction"sv, "cardinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); }); - append_lookup_table("PluralCategoryFunction"sv, "ordinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); }); - append_lookup_table("PluralRangeFunction"sv, "range"sv, "default_range"sv, [](auto& rules, auto) -> Ranges& { return rules.plural_ranges; }); - - generate_mapping(generator, locales, "PluralCategory"sv, "s_cardinal_categories"sv, "s_cardinal_categories_{}"sv, format_identifier, - [&](auto const& name, auto const& locale) { - auto& rules = cldr.locales.find(locale)->value; - append_categories(name, rules.rules_for_form("cardinal"sv)); - }); - - generate_mapping(generator, locales, "PluralCategory"sv, "s_ordinal_categories"sv, "s_ordinal_categories_{}"sv, format_identifier, - [&](auto const& name, auto const& locale) { - auto& rules = cldr.locales.find(locale)->value; - append_categories(name, rules.rules_for_form("ordinal"sv)); - }); - - generator.append(R"~~~( -PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return PluralCategory::Other; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - PluralCategoryFunction decider { nullptr }; - - switch (form) { - case PluralForm::Cardinal: - decider = s_cardinal_functions[locale_index]; - break; - case PluralForm::Ordinal: - decider = s_ordinal_functions[locale_index]; - break; - } - - return decider(move(operands)); -} - -ReadonlySpan available_plural_categories(StringView locale, PluralForm form) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return {}; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - - switch (form) { - case PluralForm::Cardinal: - return s_cardinal_categories[locale_index]; - case PluralForm::Ordinal: - return s_ordinal_categories[locale_index]; - } - - VERIFY_NOT_REACHED(); -} - -PluralCategory determine_plural_range(StringView locale, PluralCategory start, PluralCategory end) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return PluralCategory::Other; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - - PluralRangeFunction decider = s_range_functions[locale_index]; - return decider(start, end); -} - -} -)~~~"); - - TRY(file.write_until_depleted(generator.as_string_view().bytes())); - return {}; -} - -ErrorOr serenity_main(Main::Arguments arguments) -{ - StringView generated_header_path; - StringView generated_implementation_path; - StringView core_path; - StringView locale_names_path; - - Core::ArgsParser args_parser; - args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); - args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); - args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); - args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path"); - args_parser.parse(arguments); - - auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write)); - auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); - - CLDR cldr; - TRY(parse_all_locales(core_path, locale_names_path, cldr)); - - TRY(generate_unicode_locale_header(*generated_header_file, cldr)); - TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr)); - - return 0; -} diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp index b9077b6fa91..3001dd6f2fb 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp @@ -4,10 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include #include -#include -#include namespace JS::Intl { @@ -19,155 +16,43 @@ PluralRules::PluralRules(Object& prototype) { } -// 16.5.1 GetOperands ( s ), https://tc39.es/ecma402/#sec-getoperands -::Locale::PluralOperands get_operands(StringView string) +// 16.5.4 ResolvePlural ( pluralRules, n ), https://tc39.es/ecma402/#sec-resolveplural +::Locale::PluralCategory resolve_plural(PluralRules const& plural_rules, Value number) { - // 1.Let n be ! ToNumber(s). - auto number = string.to_number(AK::TrimWhitespace::Yes).release_value(); - - // 2. Assert: n is finite. - VERIFY(isfinite(number)); - - // 3. Let dp be StringIndexOf(s, ".", 0). - auto decimal_point = string.find('.'); - - Variant integer_part; - StringView fraction_slice; - - // 4. If dp = -1, then - if (!decimal_point.has_value()) { - // a. Let intPart be n. - integer_part = number; - - // b. Let fracSlice be "". - } - // 5. Else, - else { - // a. Let intPart be the substring of s from 0 to dp. - integer_part = string.substring_view(0, *decimal_point); - - // b. Let fracSlice be the substring of s from dp + 1. - fraction_slice = string.substring_view(*decimal_point + 1); - } - - // 6. Let i be abs(! ToNumber(intPart)). - auto integer = integer_part.visit( - [](Empty) -> u64 { VERIFY_NOT_REACHED(); }, - [](double value) { - return static_cast(fabs(value)); - }, - [](StringView value) { - auto value_as_int = value.template to_number().value(); - return static_cast(value_as_int); - }); - - // 7. Let fracDigitCount be the length of fracSlice. - auto fraction_digit_count = fraction_slice.length(); - - // 8. Let f be ! ToNumber(fracSlice). - auto fraction = fraction_slice.is_empty() ? 0u : fraction_slice.template to_number().value(); - - // 9. Let significantFracSlice be the value of fracSlice stripped of trailing "0". - auto significant_fraction_slice = fraction_slice.trim("0"sv, TrimMode::Right); - - // 10. Let significantFracDigitCount be the length of significantFracSlice. - auto significant_fraction_digit_count = significant_fraction_slice.length(); - - // 11. Let significantFrac be ! ToNumber(significantFracSlice). - auto significant_fraction = significant_fraction_slice.is_empty() ? 0u : significant_fraction_slice.template to_number().value(); - - // 12. Return a new Record { [[Number]]: abs(n), [[IntegerDigits]]: i, [[FractionDigits]]: f, [[NumberOfFractionDigits]]: fracDigitCount, [[FractionDigitsWithoutTrailing]]: significantFrac, [[NumberOfFractionDigitsWithoutTrailing]]: significantFracDigitCount }. - return ::Locale::PluralOperands { - .number = fabs(number), - .integer_digits = integer, - .fraction_digits = fraction, - .number_of_fraction_digits = fraction_digit_count, - .fraction_digits_without_trailing = significant_fraction, - .number_of_fraction_digits_without_trailing = significant_fraction_digit_count, - }; -} - -// 16.5.2 PluralRuleSelect ( locale, type, n, operands ), https://tc39.es/ecma402/#sec-pluralruleselect -::Locale::PluralCategory plural_rule_select(StringView locale, ::Locale::PluralForm type, Value, ::Locale::PluralOperands operands) -{ - return ::Locale::determine_plural_category(locale, type, move(operands)); -} - -// 16.5.3 ResolvePlural ( pluralRules, n ), https://tc39.es/ecma402/#sec-resolveplural -ResolvedPlurality resolve_plural(PluralRules const& plural_rules, Value number) -{ - // 1. Assert: Type(pluralRules) is Object. - // 2. Assert: pluralRules has an [[InitializedPluralRules]] internal slot. - // 3. Assert: Type(n) is Number. - - // 4. If n is not a finite Number, then + // 1. If n is not a finite Number, then if (!number.is_finite_number()) { - // a. Return "other". - return { ::Locale::PluralCategory::Other, String {} }; + // a. Let s be ! ToString(n). + // b. Return the Record { [[PluralCategory]]: "other", [[FormattedString]]: s }. + return ::Locale::PluralCategory::Other; } - // 5. Let locale be pluralRules.[[Locale]]. - auto const& locale = plural_rules.locale(); - - // 6. Let type be pluralRules.[[Type]]. - auto type = plural_rules.type(); - - // 7. Let res be ! FormatNumericToString(pluralRules, n). - auto result = format_numeric_to_string(plural_rules, number); - - // 8. Let s be res.[[FormattedString]]. - auto string = move(result); - - // 9. Let operands be ! GetOperands(s). - auto operands = get_operands(string); - - // 10. Let p be ! PluralRuleSelect(locale, type, n, operands). - auto plural_category = plural_rule_select(locale, type, number, move(operands)); - - // 11. Return the Record { [[PluralCategory]]: p, [[FormattedString]]: s }. - return { plural_category, move(string) }; + // 2. Let locale be pluralRules.[[Locale]]. + // 3. Let type be pluralRules.[[Type]]. + // 4. Let res be FormatNumericToString(pluralRules, ℝ(n)). + // 5. Let s be res.[[FormattedString]]. + // 6. Let operands be GetOperands(s). + // 7. Let p be PluralRuleSelect(locale, type, n, operands). + // 8. Return the Record { [[PluralCategory]]: p, [[FormattedString]]: s }. + return plural_rules.formatter().select_plural(number.as_double()); } -// 16.5.4 PluralRuleSelectRange ( locale, type, xp, yp ), https://tc39.es/ecma402/#sec-resolveplural -::Locale::PluralCategory plural_rule_select_range(StringView locale, ::Locale::PluralForm, ::Locale::PluralCategory start, ::Locale::PluralCategory end) -{ - return ::Locale::determine_plural_range(locale, start, end); -} - -// 16.5.5 ResolvePluralRange ( pluralRules, x, y ), https://tc39.es/ecma402/#sec-resolveplural +// 16.5.6 ResolvePluralRange ( pluralRules, x, y ), https://tc39.es/ecma402/#sec-resolveplural ThrowCompletionOr<::Locale::PluralCategory> resolve_plural_range(VM& vm, PluralRules const& plural_rules, Value start, Value end) { - // 1. Assert: Type(pluralRules) is Object. - // 2. Assert: pluralRules has an [[InitializedPluralRules]] internal slot. - // 3. Assert: Type(x) is Number. - // 4. Assert: Type(y) is Number. - - // 5. If x is NaN or y is NaN, throw a RangeError exception. + // 1. If x is NaN or y is NaN, throw a RangeError exception. if (start.is_nan()) return vm.throw_completion(ErrorType::NumberIsNaN, "start"sv); if (end.is_nan()) return vm.throw_completion(ErrorType::NumberIsNaN, "end"sv); - // 6. Let xp be ! ResolvePlural(pluralRules, x). - auto start_plurality = resolve_plural(plural_rules, start); - - // 7. Let yp be ! ResolvePlural(pluralRules, y). - auto end_plurality = resolve_plural(plural_rules, end); - - // 8. If xp.[[FormattedString]] is yp.[[FormattedString]], then - if (start_plurality.formatted_string == end_plurality.formatted_string) { - // a. Return xp.[[PluralCategory]]. - return start_plurality.plural_category; - } - - // 9. Let locale be pluralRules.[[Locale]]. - auto const& locale = plural_rules.locale(); - - // 10. Let type be pluralRules.[[Type]]. - auto type = plural_rules.type(); - - // 11. Return ! PluralRuleSelectRange(locale, type, xp.[[PluralCategory]], yp.[[PluralCategory]]). - return plural_rule_select_range(locale, type, start_plurality.plural_category, end_plurality.plural_category); + // 2. Let xp be ResolvePlural(pluralRules, x). + // 3. Let yp be ResolvePlural(pluralRules, y). + // 4. If xp.[[FormattedString]] is yp.[[FormattedString]], then + // a. Return xp.[[PluralCategory]]. + // 5. Let locale be pluralRules.[[Locale]]. + // 6. Let type be pluralRules.[[Type]]. + // 7. Return PluralRuleSelectRange(locale, type, xp.[[PluralCategory]], yp.[[PluralCategory]]). + return plural_rules.formatter().select_plural_range(start.as_double(), end.as_double()); } } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h index d5a9058ac04..7aa54fe9c92 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h @@ -6,7 +6,6 @@ #pragma once -#include #include #include #include @@ -32,15 +31,7 @@ private: ::Locale::PluralForm m_type { ::Locale::PluralForm::Cardinal }; // [[Type]] }; -struct ResolvedPlurality { - ::Locale::PluralCategory plural_category; // [[PluralCategory]] - String formatted_string; // [[FormattedString]] -}; - -::Locale::PluralOperands get_operands(StringView string); -::Locale::PluralCategory plural_rule_select(StringView locale, ::Locale::PluralForm type, Value number, ::Locale::PluralOperands operands); -ResolvedPlurality resolve_plural(PluralRules const&, Value number); -::Locale::PluralCategory plural_rule_select_range(StringView locale, ::Locale::PluralForm, ::Locale::PluralCategory start, ::Locale::PluralCategory end); +::Locale::PluralCategory resolve_plural(PluralRules const&, Value number); ThrowCompletionOr<::Locale::PluralCategory> resolve_plural_range(VM&, PluralRules const&, Value start, Value end); } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp index 1f4c97290ef..b7b97c72330 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp @@ -49,14 +49,58 @@ ThrowCompletionOr> PluralRulesConstructor::construct(Functi { auto& vm = this->vm(); - auto locales = vm.argument(0); - auto options = vm.argument(1); + auto locales_value = vm.argument(0); + auto options_value = vm.argument(1); - // 2. Let pluralRules be ? OrdinaryCreateFromConstructor(NewTarget, "%PluralRules.prototype%", « [[InitializedPluralRules]], [[Locale]], [[Type]], [[MinimumIntegerDigits]], [[MinimumFractionDigits]], [[MaximumFractionDigits]], [[MinimumSignificantDigits]], [[MaximumSignificantDigits]], [[RoundingType]], [[RoundingMode]], [[RoundingIncrement]], [[TrailingZeroDisplay]] »). + // 2. Let pluralRules be ? OrdinaryCreateFromConstructor(NewTarget, "%Intl.PluralRules.prototype%", « [[InitializedPluralRules]], [[Locale]], [[Type]], [[MinimumIntegerDigits]], [[MinimumFractionDigits]], [[MaximumFractionDigits]], [[MinimumSignificantDigits]], [[MaximumSignificantDigits]], [[RoundingType]], [[RoundingIncrement]], [[RoundingMode]], [[ComputedRoundingPriority]], [[TrailingZeroDisplay]] »). auto plural_rules = TRY(ordinary_create_from_constructor(vm, new_target, &Intrinsics::intl_plural_rules_prototype)); - // 3. Return ? InitializePluralRules(pluralRules, locales, options). - return TRY(initialize_plural_rules(vm, plural_rules, locales, options)); + // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales). + auto requested_locales = TRY(canonicalize_locale_list(vm, locales_value)); + + // 4. Set options to ? CoerceOptionsToObject(options). + auto* options = TRY(coerce_options_to_object(vm, options_value)); + + // 5. Let opt be a new Record. + LocaleOptions opt {}; + + // 6. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit"). + auto matcher = TRY(get_option(vm, *options, vm.names.localeMatcher, OptionType::String, AK::Array { "lookup"sv, "best fit"sv }, "best fit"sv)); + + // 7. Set opt.[[localeMatcher]] to matcher. + opt.locale_matcher = matcher; + + // 8. Let localeData be %Intl.PluralRules%.[[LocaleData]]. + // 9. Let r be ResolveLocale(%Intl.PluralRules%.[[AvailableLocales]], requestedLocales, opt, %Intl.PluralRules%.[[RelevantExtensionKeys]], localeData). + auto result = resolve_locale(requested_locales, opt, {}); + + // 10. Set pluralRules.[[Locale]] to r.[[locale]]. + plural_rules->set_locale(move(result.locale)); + + // Non-standard, the data locale is used by our NumberFormat implementation. + plural_rules->set_data_locale(move(result.data_locale)); + + // 11. Let t be ? GetOption(options, "type", string, « "cardinal", "ordinal" », "cardinal"). + auto type = TRY(get_option(vm, *options, vm.names.type, OptionType::String, AK::Array { "cardinal"sv, "ordinal"sv }, "cardinal"sv)); + + // 12. Set pluralRules.[[Type]] to t. + plural_rules->set_type(type.as_string().utf8_string_view()); + + // 13. Perform ? SetNumberFormatDigitOptions(pluralRules, options, 0, 3, "standard"). + TRY(set_number_format_digit_options(vm, plural_rules, *options, 0, 3, ::Locale::Notation::Standard)); + + // Non-standard, create an ICU number formatter for this Intl object. + auto formatter = ::Locale::NumberFormat::create( + plural_rules->locale(), + {}, + {}, + plural_rules->rounding_options()); + + formatter->create_plural_rules(plural_rules->type()); + plural_rules->set_formatter(move(formatter)); + + // 14. Return pluralRules. + return plural_rules; } // 16.2.2 Intl.PluralRules.supportedLocalesOf ( locales [ , options ] ), https://tc39.es/ecma402/#sec-intl.pluralrules.supportedlocalesof @@ -74,53 +118,4 @@ JS_DEFINE_NATIVE_FUNCTION(PluralRulesConstructor::supported_locales_of) return TRY(supported_locales(vm, requested_locales, options)); } -// 16.1.2 InitializePluralRules ( pluralRules, locales, options ), https://tc39.es/ecma402/#sec-initializepluralrules -ThrowCompletionOr> initialize_plural_rules(VM& vm, PluralRules& plural_rules, Value locales_value, Value options_value) -{ - // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). - auto requested_locales = TRY(canonicalize_locale_list(vm, locales_value)); - - // 2. Set options to ? CoerceOptionsToObject(options). - auto* options = TRY(coerce_options_to_object(vm, options_value)); - - // 3. Let opt be a new Record. - LocaleOptions opt {}; - - // 4. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit"). - auto matcher = TRY(get_option(vm, *options, vm.names.localeMatcher, OptionType::String, AK::Array { "lookup"sv, "best fit"sv }, "best fit"sv)); - - // 5. Set opt.[[localeMatcher]] to matcher. - opt.locale_matcher = matcher; - - // 6. Let t be ? GetOption(options, "type", string, « "cardinal", "ordinal" », "cardinal"). - auto type = TRY(get_option(vm, *options, vm.names.type, OptionType::String, AK::Array { "cardinal"sv, "ordinal"sv }, "cardinal"sv)); - - // 7. Set pluralRules.[[Type]] to t. - plural_rules.set_type(type.as_string().utf8_string_view()); - - // 8. Perform ? SetNumberFormatDigitOptions(pluralRules, options, +0𝔽, 3𝔽, "standard"). - TRY(set_number_format_digit_options(vm, plural_rules, *options, 0, 3, ::Locale::Notation::Standard)); - - // 9. Let localeData be %PluralRules%.[[LocaleData]]. - // 10. Let r be ResolveLocale(%PluralRules%.[[AvailableLocales]], requestedLocales, opt, %PluralRules%.[[RelevantExtensionKeys]], localeData). - auto result = resolve_locale(requested_locales, opt, {}); - - // 11. Set pluralRules.[[Locale]] to r.[[locale]]. - plural_rules.set_locale(move(result.locale)); - - // Non-standard, the data locale is used by our NumberFormat implementation. - plural_rules.set_data_locale(move(result.data_locale)); - - // Non-standard, create an ICU number formatter for this Intl object. - auto formatter = ::Locale::NumberFormat::create( - plural_rules.locale(), - {}, - {}, - plural_rules.rounding_options()); - plural_rules.set_formatter(move(formatter)); - - // 12. Return pluralRules. - return plural_rules; -} - } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h index dcfe99d5eef..f595d69d22c 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Tim Flynn + * Copyright (c) 2022-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -29,6 +29,4 @@ private: JS_DECLARE_NATIVE_FUNCTION(supported_locales_of); }; -ThrowCompletionOr> initialize_plural_rules(VM&, PluralRules&, Value locales_value, Value options_value); - } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp index 1ba9866a796..cfb5e68ef65 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp @@ -48,7 +48,7 @@ JS_DEFINE_NATIVE_FUNCTION(PluralRulesPrototype::select) // 4. Return ! ResolvePlural(pr, n).[[PluralCategory]]. auto plurality = resolve_plural(plural_rules, number); - return PrimitiveString::create(vm, ::Locale::plural_category_to_string(plurality.plural_category)); + return PrimitiveString::create(vm, ::Locale::plural_category_to_string(plurality)); } // 16.3.4 Intl.PluralRules.prototype.selectRange ( start, end ), https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.selectrange @@ -91,7 +91,7 @@ JS_DEFINE_NATIVE_FUNCTION(PluralRulesPrototype::resolved_options) auto options = Object::create(realm, realm.intrinsics().object_prototype()); // 4. Let pluralCategories be a List of Strings containing all possible results of PluralRuleSelect for the selected locale pr.[[Locale]]. - auto available_categories = ::Locale::available_plural_categories(plural_rules->locale(), plural_rules->type()); + auto available_categories = plural_rules->formatter().available_plural_categories(); auto plural_categories = Array::create_from<::Locale::PluralCategory>(realm, available_categories, [&](auto category) { return PrimitiveString::create(vm, ::Locale::plural_category_to_string(category)); diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js index 76619fafa66..9945ff9bdec 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js +++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js @@ -62,13 +62,6 @@ describe("correct behavior", () => { expect(pl.selectRange(0.14, 3.14)).toBe("other"); // other + other = other }); - test("default to end of range", () => { - // "so" specifies "one" to be the integer 1, but does not specify any ranges. - const so = new Intl.PluralRules("so"); - expect(so.selectRange(0, 1)).toBe("one"); - expect(so.selectRange(1, 2)).toBe("other"); - }); - test("numbers in reverse order", () => { const en = new Intl.PluralRules("en"); expect(en.selectRange(1, -Infinity)).toBe("other"); diff --git a/Userland/Libraries/LibLocale/Forward.h b/Userland/Libraries/LibLocale/Forward.h index bb64737bcec..1de393135f3 100644 --- a/Userland/Libraries/LibLocale/Forward.h +++ b/Userland/Libraries/LibLocale/Forward.h @@ -33,7 +33,6 @@ struct ListFormatPart; struct LocaleExtension; struct LocaleID; struct OtherExtension; -struct PluralOperands; struct TransformedExtension; struct TransformedField; diff --git a/Userland/Libraries/LibLocale/NumberFormat.cpp b/Userland/Libraries/LibLocale/NumberFormat.cpp index a54aece4233..faffe6309bb 100644 --- a/Userland/Libraries/LibLocale/NumberFormat.cpp +++ b/Userland/Libraries/LibLocale/NumberFormat.cpp @@ -17,6 +17,7 @@ #include #include +#include namespace Locale { @@ -416,6 +417,17 @@ static constexpr UNumberUnitWidth icu_unit_width(Style unit_display) VERIFY_NOT_REACHED(); } +static constexpr UPluralType icu_plural_type(PluralForm plural_form) +{ + switch (plural_form) { + case PluralForm::Cardinal: + return UPluralType::UPLURAL_TYPE_CARDINAL; + case PluralForm::Ordinal: + return UPluralType::UPLURAL_TYPE_ORDINAL; + } + VERIFY_NOT_REACHED(); +} + static void apply_display_options(icu::number::LocalizedNumberFormatter& formatter, DisplayOptions const& display_options) { UErrorCode status = U_ZERO_ERROR; @@ -678,6 +690,78 @@ public: return format_to_parts_impl(formatted, start, end); } + virtual void create_plural_rules(PluralForm plural_form) override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(!m_plural_rules); + + m_plural_rules = adopt_own(*icu::PluralRules::forLocale(m_locale, icu_plural_type(plural_form), status)); + VERIFY(icu_success(status)); + } + + virtual PluralCategory select_plural(double value) const override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(m_plural_rules); + + auto formatted = format_impl(value); + if (!formatted.has_value()) + return PluralCategory::Other; + + auto result = m_plural_rules->select(*formatted, status); + if (icu_failure(status)) + return PluralCategory::Other; + + return plural_category_from_string(icu_string_to_string(result)); + } + + virtual PluralCategory select_plural_range(double start, double end) const override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(m_plural_rules); + + auto formatted = format_range_impl(start, end); + if (!formatted.has_value()) + return PluralCategory::Other; + + auto [formatted_start, formatted_end] = formatted->getDecimalNumbers(status); + if (icu_failure(status)) + return PluralCategory::Other; + + if (formatted_start.string_view() == formatted_end.string_view()) + return select_plural(start); + + auto result = m_plural_rules->select(*formatted, status); + if (icu_failure(status)) + return PluralCategory::Other; + + return plural_category_from_string(icu_string_to_string(result)); + } + + virtual Vector available_plural_categories() const override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(m_plural_rules); + + auto keywords = adopt_own_if_nonnull(m_plural_rules->getKeywords(status)); + if (icu_failure(status)) + return {}; + + Vector result; + + while (true) { + i32 length = 0; + auto const* category = keywords->next(&length, status); + + if (icu_failure(status) || category == nullptr) + break; + + result.append(plural_category_from_string({ category, static_cast(length) })); + } + + return result; + } + private: static icu::Formattable value_to_formattable(Value const& value) { @@ -796,8 +880,12 @@ private: } icu::Locale& m_locale; + icu::number::LocalizedNumberFormatter m_formatter; mutable Optional m_range_formatter; + + OwnPtr m_plural_rules; + bool m_is_unit { false }; }; diff --git a/Userland/Libraries/LibLocale/NumberFormat.h b/Userland/Libraries/LibLocale/NumberFormat.h index ffe13134db7..2faac464dc4 100644 --- a/Userland/Libraries/LibLocale/NumberFormat.h +++ b/Userland/Libraries/LibLocale/NumberFormat.h @@ -12,6 +12,7 @@ #include #include #include +#include namespace Locale { @@ -162,6 +163,11 @@ public: virtual String format_range(Value const&, Value const&) const = 0; virtual Vector format_range_to_parts(Value const&, Value const&) const = 0; + virtual void create_plural_rules(PluralForm) = 0; + virtual PluralCategory select_plural(double) const = 0; + virtual PluralCategory select_plural_range(double, double) const = 0; + virtual Vector available_plural_categories() const = 0; + protected: NumberFormat() = default; }; diff --git a/Userland/Libraries/LibLocale/PluralRules.cpp b/Userland/Libraries/LibLocale/PluralRules.cpp index 9c36df4f76b..3434f2597db 100644 --- a/Userland/Libraries/LibLocale/PluralRules.cpp +++ b/Userland/Libraries/LibLocale/PluralRules.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Tim Flynn + * Copyright (c) 2022-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -24,25 +24,52 @@ StringView plural_form_to_string(PluralForm plural_form) return "cardinal"sv; case PluralForm::Ordinal: return "ordinal"sv; - default: - VERIFY_NOT_REACHED(); } + VERIFY_NOT_REACHED(); } -PluralCategory __attribute__((weak)) determine_plural_category(StringView, PluralForm, PluralOperands) +PluralCategory plural_category_from_string(StringView category) { - return PluralCategory::Other; + if (category == "other"sv) + return PluralCategory::Other; + if (category == "zero"sv) + return PluralCategory::Zero; + if (category == "one"sv) + return PluralCategory::One; + if (category == "two"sv) + return PluralCategory::Two; + if (category == "few"sv) + return PluralCategory::Few; + if (category == "many"sv) + return PluralCategory::Many; + if (category == "0"sv) + return PluralCategory::ExactlyZero; + if (category == "1"sv) + return PluralCategory::ExactlyOne; + VERIFY_NOT_REACHED(); } -ReadonlySpan __attribute__((weak)) available_plural_categories(StringView, PluralForm) +StringView plural_category_to_string(PluralCategory category) { - static constexpr Array categories { { PluralCategory::Other } }; - return categories.span(); -} - -PluralCategory __attribute__((weak)) determine_plural_range(StringView, PluralCategory, PluralCategory) -{ - return PluralCategory::Other; + switch (category) { + case PluralCategory::Other: + return "other"sv; + case PluralCategory::Zero: + return "zero"sv; + case PluralCategory::One: + return "one"sv; + case PluralCategory::Two: + return "two"sv; + case PluralCategory::Few: + return "few"sv; + case PluralCategory::Many: + return "many"sv; + case PluralCategory::ExactlyZero: + return "0"sv; + case PluralCategory::ExactlyOne: + return "1"sv; + } + VERIFY_NOT_REACHED(); } } diff --git a/Userland/Libraries/LibLocale/PluralRules.h b/Userland/Libraries/LibLocale/PluralRules.h index 62c948833cc..8b893b60c1c 100644 --- a/Userland/Libraries/LibLocale/PluralRules.h +++ b/Userland/Libraries/LibLocale/PluralRules.h @@ -1,15 +1,12 @@ /* - * Copyright (c) 2022, Tim Flynn + * Copyright (c) 2022-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once -#include #include -#include -#include namespace Locale { @@ -17,6 +14,8 @@ enum class PluralForm { Cardinal, Ordinal, }; +PluralForm plural_form_from_string(StringView); +StringView plural_form_to_string(PluralForm); enum class PluralCategory : u8 { Other, @@ -30,95 +29,7 @@ enum class PluralCategory : u8 { ExactlyZero, ExactlyOne, }; - -// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Operand_Meanings -struct PluralOperands { - static constexpr StringView symbol_to_variable_name(char symbol) - { - if (symbol == 'n') - return "number"sv; - if (symbol == 'i') - return "integer_digits"sv; - if (symbol == 'f') - return "fraction_digits"sv; - if (symbol == 'v') - return "number_of_fraction_digits"sv; - if (symbol == 't') - return "fraction_digits_without_trailing"sv; - if (symbol == 'w') - return "number_of_fraction_digits_without_trailing"sv; - VERIFY_NOT_REACHED(); - } - - static constexpr bool symbol_requires_floating_point_modulus(char symbol) - { - // From TR-35: "The modulus (% or mod) is a remainder operation as defined in Java; for - // example, where n = 4.3 the result of n mod 3 is 1.3." - // - // So, this returns whether the symbol represents a decimal value, and thus requires fmod. - return symbol == 'n'; - } - - double number { 0 }; - u64 integer_digits { 0 }; - u64 fraction_digits { 0 }; - u64 number_of_fraction_digits { 0 }; - u64 fraction_digits_without_trailing { 0 }; - u64 number_of_fraction_digits_without_trailing { 0 }; -}; - -PluralForm plural_form_from_string(StringView plural_form); -StringView plural_form_to_string(PluralForm plural_form); - -// NOTE: This must be defined inline to be callable from the code generators. -constexpr PluralCategory plural_category_from_string(StringView category) -{ - if (category == "other"sv) - return PluralCategory::Other; - if (category == "zero"sv) - return PluralCategory::Zero; - if (category == "one"sv) - return PluralCategory::One; - if (category == "two"sv) - return PluralCategory::Two; - if (category == "few"sv) - return PluralCategory::Few; - if (category == "many"sv) - return PluralCategory::Many; - if (category == "0"sv) - return PluralCategory::ExactlyZero; - if (category == "1"sv) - return PluralCategory::ExactlyOne; - VERIFY_NOT_REACHED(); -} - -// NOTE: This must be defined inline to be callable from the code generators. -constexpr StringView plural_category_to_string(PluralCategory category) -{ - switch (category) { - case PluralCategory::Other: - return "other"sv; - case PluralCategory::Zero: - return "zero"sv; - case PluralCategory::One: - return "one"sv; - case PluralCategory::Two: - return "two"sv; - case PluralCategory::Few: - return "few"sv; - case PluralCategory::Many: - return "many"sv; - case PluralCategory::ExactlyZero: - return "0"sv; - case PluralCategory::ExactlyOne: - return "1"sv; - } - - VERIFY_NOT_REACHED(); -} - -PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands); -ReadonlySpan available_plural_categories(StringView locale, PluralForm form); -PluralCategory determine_plural_range(StringView locale, PluralCategory start, PluralCategory end); +PluralCategory plural_category_from_string(StringView); +StringView plural_category_to_string(PluralCategory); }