diff --git a/Meta/CMake/locale_data.cmake b/Meta/CMake/locale_data.cmake index 21a47b82b95..e5fd642457c 100644 --- a/Meta/CMake/locale_data.cmake +++ b/Meta/CMake/locale_data.cmake @@ -18,9 +18,6 @@ set(CLDR_CORE_PATH "${CLDR_PATH}/${CLDR_CORE_SOURCE}") set(CLDR_DATES_SOURCE cldr-dates-modern) set(CLDR_DATES_PATH "${CLDR_PATH}/${CLDR_DATES_SOURCE}") -set(CLDR_LOCALES_SOURCE cldr-localenames-modern) -set(CLDR_LOCALES_PATH "${CLDR_PATH}/${CLDR_LOCALES_SOURCE}") - set(CLDR_NUMBERS_SOURCE cldr-numbers-modern) set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}") @@ -32,7 +29,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_BCP47_SOURCE}/**" "${CLDR_BCP47_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}") - extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_LOCALES_SOURCE}/**" "${CLDR_LOCALES_PATH}") extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}") else() message(STATUS "Skipping download of ${CLDR_ZIP_URL}, expecting the archive to have been extracted to ${CLDR_PATH}") @@ -41,9 +37,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) set(LOCALE_DATA_HEADER LocaleData.h) set(LOCALE_DATA_IMPLEMENTATION LocaleData.cpp) - set(PLURAL_RULES_DATA_HEADER PluralRulesData.h) - set(PLURAL_RULES_DATA_IMPLEMENTATION PluralRulesData.cpp) - invoke_generator( "LocaleData" Lagom::GenerateLocaleData @@ -52,19 +45,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${LOCALE_DATA_IMPLEMENTATION}" arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" ) - invoke_generator( - "PluralRulesData" - Lagom::GeneratePluralRulesData - "${CLDR_VERSION_FILE}" - "${PLURAL_RULES_DATA_HEADER}" - "${PLURAL_RULES_DATA_IMPLEMENTATION}" - arguments -r "${CLDR_CORE_PATH}" -l "${CLDR_LOCALES_PATH}" - ) set(LOCALE_DATA_SOURCES ${LOCALE_DATA_HEADER} ${LOCALE_DATA_IMPLEMENTATION} - ${PLURAL_RULES_DATA_HEADER} - ${PLURAL_RULES_DATA_IMPLEMENTATION} ) endif() diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt index f51f1ed909c..1a0a206154a 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/CMakeLists.txt @@ -1,2 +1 @@ lagom_tool(GenerateLocaleData SOURCES GenerateLocaleData.cpp LIBS LibMain) -lagom_tool(GeneratePluralRulesData SOURCES GeneratePluralRulesData.cpp LIBS LibMain) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GeneratePluralRulesData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GeneratePluralRulesData.cpp deleted file mode 100644 index 7fa4f454073..00000000000 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GeneratePluralRulesData.cpp +++ /dev/null @@ -1,685 +0,0 @@ -/* - * Copyright (c) 2022, Tim Flynn - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common. -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static ByteString format_identifier(StringView owner, ByteString identifier) -{ - identifier = identifier.replace("-"sv, "_"sv, ReplaceMode::All); - - if (all_of(identifier, is_ascii_digit)) - return ByteString::formatted("{}_{}", owner[0], identifier); - if (is_ascii_lower_alpha(identifier[0])) - return ByteString::formatted("{:c}{}", to_ascii_uppercase(identifier[0]), identifier.substring_view(1)); - return identifier; -} - -struct Relation { - using Range = Array; - using Comparator = Variant; - - enum class Type { - Equality, - Inequality, - }; - - ByteString const& modulus_variable_name() const - { - VERIFY(modulus.has_value()); - - if (!cached_modulus_variable_name.has_value()) - cached_modulus_variable_name = ByteString::formatted("mod_{}_{}", symbol, *modulus); - - return *cached_modulus_variable_name; - } - - ByteString const& exponential_variable_name() const - { - if (!cached_exponential_variable_name.has_value()) - cached_exponential_variable_name = ByteString::formatted("exp_{}", symbol); - - return *cached_exponential_variable_name; - } - - void generate_relation(SourceGenerator& generator) const - { - auto append_variable_name = [&]() { - if (modulus.has_value()) - generator.append(modulus_variable_name()); - else if (symbol == 'e' || symbol == 'c') - generator.append(exponential_variable_name()); - else - generator.append(ByteString::formatted("ops.{}", Locale::PluralOperands::symbol_to_variable_name(symbol))); - }; - - auto append_value = [&](u32 value) { - append_variable_name(); - generator.append(" == "sv); - generator.append(ByteString::number(value)); - }; - - auto append_range = [&](auto const& range) { - // This check avoids generating "0 <= unsigned_value", which is always true. - if (range[0] != 0 || Locale::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { - generator.append(ByteString::formatted("{} <= ", range[0])); - append_variable_name(); - generator.append(" && "sv); - } - - append_variable_name(); - generator.append(ByteString::formatted(" <= {}", range[1])); - }; - - if (type == Type::Inequality) - generator.append("!"sv); - - generator.append("("sv); - - bool first = true; - for (auto const& comparator : comparators) { - generator.append(first ? "("sv : " || ("sv); - - comparator.visit( - [&](u32 value) { append_value(value); }, - [&](Range const& range) { append_range(range); }); - - generator.append(")"sv); - first = false; - } - - generator.append(")"sv); - } - - void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const - { - // FIXME: How do we handle the exponential symbols? They seem unused by ECMA-402. - if (symbol == 'e' || symbol == 'c') { - if (auto variable = exponential_variable_name(); !generated_variables.contains(variable)) { - generated_variables.set(variable); - generator.set("variable"sv, move(variable)); - generator.append(R"~~~( - auto @variable@ = 0;)~~~"); - } - } - - if (!modulus.has_value()) - return; - - auto variable = modulus_variable_name(); - if (generated_variables.contains(variable)) - return; - - generated_variables.set(variable); - generator.set("variable"sv, move(variable)); - generator.set("operand"sv, Locale::PluralOperands::symbol_to_variable_name(symbol)); - generator.set("modulus"sv, ByteString::number(*modulus)); - - if (Locale::PluralOperands::symbol_requires_floating_point_modulus(symbol)) { - generator.append(R"~~~( - auto @variable@ = fmod(ops.@operand@, @modulus@);)~~~"); - } else { - generator.append(R"~~~( - auto @variable@ = ops.@operand@ % @modulus@;)~~~"); - } - } - - Type type; - char symbol { 0 }; - Optional modulus; - Vector comparators; - -private: - mutable Optional cached_modulus_variable_name; - mutable Optional cached_exponential_variable_name; -}; - -struct Condition { - void generate_condition(SourceGenerator& generator) const - { - for (size_t i = 0; i < relations.size(); ++i) { - if (i > 0) - generator.append(" || "sv); - - auto const& conjunctions = relations[i]; - if (conjunctions.size() > 1) - generator.append("("sv); - - for (size_t j = 0; j < conjunctions.size(); ++j) { - if (j > 0) - generator.append(" && "sv); - conjunctions[j].generate_relation(generator); - } - - if (conjunctions.size() > 1) - generator.append(")"sv); - } - } - - void generate_precomputed_variables(SourceGenerator& generator, HashTable& generated_variables) const - { - for (auto const& conjunctions : relations) { - for (auto const& relation : conjunctions) - relation.generate_precomputed_variables(generator, generated_variables); - } - } - - Vector> relations; -}; - -struct Range { - ByteString start; - ByteString end; - ByteString category; -}; - -using Conditions = HashMap; -using Ranges = Vector; - -struct LocaleData { - static ByteString generated_method_name(StringView form, StringView locale) - { - return ByteString::formatted("{}_plurality_{}", form, format_identifier({}, locale)); - } - - Conditions& rules_for_form(StringView form) - { - if (form == "cardinal") - return cardinal_rules; - if (form == "ordinal") - return ordinal_rules; - VERIFY_NOT_REACHED(); - } - - Conditions cardinal_rules; - Conditions ordinal_rules; - Ranges plural_ranges; -}; - -struct CLDR { - UniqueStringStorage unique_strings; - - HashMap locales; -}; - -static Relation parse_relation(StringView relation) -{ - static constexpr auto equality_operator = " = "sv; - static constexpr auto inequality_operator = " != "sv; - static constexpr auto modulus_operator = " % "sv; - static constexpr auto range_operator = ".."sv; - static constexpr auto set_operator = ','; - - Relation parsed; - - StringView lhs; - StringView rhs; - - if (auto index = relation.find(equality_operator); index.has_value()) { - parsed.type = Relation::Type::Equality; - lhs = relation.substring_view(0, *index); - rhs = relation.substring_view(*index + equality_operator.length()); - } else if (auto index = relation.find(inequality_operator); index.has_value()) { - parsed.type = Relation::Type::Inequality; - lhs = relation.substring_view(0, *index); - rhs = relation.substring_view(*index + inequality_operator.length()); - } else { - VERIFY_NOT_REACHED(); - } - - if (auto index = lhs.find(modulus_operator); index.has_value()) { - auto symbol = lhs.substring_view(0, *index); - VERIFY(symbol.length() == 1); - - auto modulus = lhs.substring_view(*index + modulus_operator.length()).to_number(); - VERIFY(modulus.has_value()); - - parsed.symbol = symbol[0]; - parsed.modulus = move(modulus); - } else { - VERIFY(lhs.length() == 1); - parsed.symbol = lhs[0]; - } - - rhs.for_each_split_view(set_operator, SplitBehavior::Nothing, [&](auto set) { - if (auto index = set.find(range_operator); index.has_value()) { - auto range_begin = set.substring_view(0, *index).template to_number(); - VERIFY(range_begin.has_value()); - - auto range_end = set.substring_view(*index + range_operator.length()).template to_number(); - VERIFY(range_end.has_value()); - - parsed.comparators.empend(Array { *range_begin, *range_end }); - } else { - auto value = set.template to_number(); - VERIFY(value.has_value()); - - parsed.comparators.empend(*value); - } - }); - - return parsed; -} - -// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax -// -// A very simplified view of a plural rule is: -// -// condition.* ([@integer|@decimal] sample)+ -// -// The "sample" being series of integer or decimal values that fit the specified condition. The -// condition may be one or more binary expressions, chained together with "and" or "or" operators. -static void parse_condition(StringView category, StringView rule, Conditions& rules) -{ - static constexpr auto other_category = "other"sv; - static constexpr auto disjunction_keyword = " or "sv; - static constexpr auto conjunction_keyword = " and "sv; - - // We don't need the examples in the generated code, so we can drop them here. - auto example_index = rule.find('@'); - VERIFY(example_index.has_value()); - - auto condition = rule.substring_view(0, *example_index).trim_whitespace(); - - // Our implementation does not generate rules for the "other" category. We simply return "other" - // for values that do not match any rules. This will need to be revisited if this VERIFY fails. - if (condition.is_empty()) { - VERIFY(category == other_category); - return; - } - - auto& relation_list = rules.ensure(category); - - // The grammar for a condition (i.e. a chain of relations) is: - // - // condition = and_condition ('or' and_condition)* - // and_condition = relation ('and' relation)* - // - // This affords some simplicity in that disjunctions are never embedded within a conjunction. - condition.for_each_split_view(disjunction_keyword, SplitBehavior::Nothing, [&](auto disjunction) { - Vector conjunctions; - - disjunction.for_each_split_view(conjunction_keyword, SplitBehavior::Nothing, [&](auto relation) { - conjunctions.append(parse_relation(relation)); - }); - - relation_list.relations.append(move(conjunctions)); - }); -} - -static ErrorOr parse_plural_rules(ByteString core_supplemental_path, StringView file_name, CLDR& cldr) -{ - static constexpr auto form_prefix = "plurals-type-"sv; - static constexpr auto rule_prefix = "pluralRule-count-"sv; - - LexicalPath plurals_path(move(core_supplemental_path)); - plurals_path = plurals_path.append(file_name); - - auto plurals = TRY(read_json_file(plurals_path.string())); - auto const& supplemental_object = plurals.as_object().get_object("supplemental"sv).value(); - - supplemental_object.for_each_member([&](auto const& key, auto const& plurals_object) { - if (!key.starts_with(form_prefix)) - return; - - auto form = key.substring_view(form_prefix.length()); - - plurals_object.as_object().for_each_member([&](auto const& loc, auto const& rules) { - auto locale = cldr.locales.get(loc); - if (!locale.has_value()) - return; - - rules.as_object().for_each_member([&](auto const& key, auto const& condition) { - VERIFY(key.starts_with(rule_prefix)); - - auto category = key.substring_view(rule_prefix.length()); - parse_condition(category, condition.as_string(), locale->rules_for_form(form)); - }); - }); - }); - - return {}; -} - -// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Ranges -static ErrorOr parse_plural_ranges(ByteString core_supplemental_path, CLDR& cldr) -{ - static constexpr auto start_segment = "-start-"sv; - static constexpr auto end_segment = "-end-"sv; - - LexicalPath plural_ranges_path(move(core_supplemental_path)); - plural_ranges_path = plural_ranges_path.append("pluralRanges.json"sv); - - auto plural_ranges = TRY(read_json_file(plural_ranges_path.string())); - auto const& supplemental_object = plural_ranges.as_object().get_object("supplemental"sv).value(); - auto const& plurals_object = supplemental_object.get_object("plurals"sv).value(); - - plurals_object.for_each_member([&](auto const& loc, auto const& ranges_object) { - auto locale = cldr.locales.get(loc); - if (!locale.has_value()) - return; - - ranges_object.as_object().for_each_member([&](auto const& range, auto const& category) { - auto start_index = range.find(start_segment); - VERIFY(start_index.has_value()); - - auto end_index = range.find(end_segment); - VERIFY(end_index.has_value()); - - *start_index += start_segment.length(); - - auto start = range.substring(*start_index, *end_index - *start_index); - auto end = range.substring(*end_index + end_segment.length()); - - locale->plural_ranges.empend(move(start), move(end), category.as_string()); - }); - }); - - return {}; -} - -static ErrorOr parse_all_locales(ByteString core_path, ByteString locale_names_path, CLDR& cldr) -{ - LexicalPath core_supplemental_path(move(core_path)); - core_supplemental_path = core_supplemental_path.append("supplemental"sv); - VERIFY(FileSystem::is_directory(core_supplemental_path.string())); - - auto remove_variants_from_path = [&](ByteString path) -> ErrorOr { - auto parsed_locale = TRY(CanonicalLanguageID::parse(cldr.unique_strings, LexicalPath::basename(path))); - - StringBuilder builder; - builder.append(cldr.unique_strings.get(parsed_locale.language)); - if (auto script = cldr.unique_strings.get(parsed_locale.script); !script.is_empty()) - builder.appendff("-{}", script); - if (auto region = cldr.unique_strings.get(parsed_locale.region); !region.is_empty()) - builder.appendff("-{}", region); - - return builder.to_byte_string(); - }; - - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", locale_names_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto locale_path = LexicalPath::join(directory.path().string(), entry.name).string(); - auto language = TRY(remove_variants_from_path(locale_path)); - - cldr.locales.ensure(language); - return IterationDecision::Continue; - })); - - TRY(parse_plural_rules(core_supplemental_path.string(), "plurals.json"sv, cldr)); - TRY(parse_plural_rules(core_supplemental_path.string(), "ordinals.json"sv, cldr)); - TRY(parse_plural_ranges(core_supplemental_path.string(), cldr)); - return {}; -} - -static ErrorOr generate_unicode_locale_header(Core::InputBufferedFile& file, CLDR&) -{ - StringBuilder builder; - SourceGenerator generator { builder }; - - generator.append(R"~~~( -#pragma once - -#include - -namespace Locale { -)~~~"); - - generator.append(R"~~~( -} -)~~~"); - - TRY(file.write_until_depleted(generator.as_string_view().bytes())); - return {}; -} - -static ErrorOr generate_unicode_locale_implementation(Core::InputBufferedFile& file, CLDR& cldr) -{ - StringBuilder builder; - SourceGenerator generator { builder }; - - auto locales = cldr.locales.keys(); - quick_sort(locales); - - generator.append(R"~~~( -#include -#include -#include -#include -#include -#include - -namespace Locale { - -using PluralCategoryFunction = PluralCategory(*)(PluralOperands); -using PluralRangeFunction = PluralCategory(*)(PluralCategory, PluralCategory); - -static PluralCategory default_category(PluralOperands) -{ - return PluralCategory::Other; -} - -static PluralCategory default_range(PluralCategory, PluralCategory end) -{ - return end; -} - -)~~~"); - - auto append_rules = [&](auto form, auto const& locale, auto const& rules) { - if (rules.is_empty()) - return; - - generator.set("method"sv, LocaleData::generated_method_name(form, locale)); - HashTable generated_variables; - - generator.append(R"~~~( -static PluralCategory @method@([[maybe_unused]] PluralOperands ops) -{)~~~"); - - for (auto [category, condition] : rules) { - condition.generate_precomputed_variables(generator, generated_variables); - - generator.append(R"~~~( - if ()~~~"); - - generator.set("category"sv, format_identifier({}, category)); - condition.generate_condition(generator); - - generator.append(R"~~~() - return PluralCategory::@category@;)~~~"); - } - - generator.append(R"~~~( - return PluralCategory::Other; -} -)~~~"); - }; - - auto append_ranges = [&](auto const& locale, auto const& ranges) { - if (ranges.is_empty()) - return; - - generator.set("method"sv, LocaleData::generated_method_name("range"sv, locale)); - - generator.append(R"~~~( -static PluralCategory @method@(PluralCategory start, PluralCategory end) -{)~~~"); - - for (auto const& range : ranges) { - generator.set("start"sv, format_identifier({}, range.start)); - generator.set("end"sv, format_identifier({}, range.end)); - generator.set("category"sv, format_identifier({}, range.category)); - - generator.append(R"~~~( - if (start == PluralCategory::@start@ && end == PluralCategory::@end@) - return PluralCategory::@category@;)~~~"); - } - - generator.append(R"~~~( - return end; -} -)~~~"); - }; - - auto append_lookup_table = [&](auto type, auto form, auto default_, auto data_for_locale) { - generator.set("type"sv, type); - generator.set("form"sv, form); - generator.set("default"sv, default_); - generator.set("size"sv, ByteString::number(locales.size())); - - generator.append(R"~~~( -static constexpr Array<@type@, @size@> s_@form@_functions { {)~~~"); - - for (auto const& locale : locales) { - auto& rules = data_for_locale(cldr.locales.find(locale)->value, form); - - if (rules.is_empty()) { - generator.append(R"~~~( - @default@,)~~~"); - } else { - generator.set("method"sv, LocaleData::generated_method_name(form, locale)); - generator.append(R"~~~( - @method@,)~~~"); - } - } - - generator.append(R"~~~( -} }; -)~~~"); - }; - - auto append_categories = [&](auto const& name, auto const& rules) { - generator.set("name", name); - generator.set("size", ByteString::number(rules.size() + 1)); - - generator.append(R"~~~( -static constexpr Array @name@ { { PluralCategory::Other)~~~"); - - for (auto [category, condition] : rules) { - generator.set("category"sv, format_identifier({}, category)); - generator.append(", PluralCategory::@category@"sv); - } - - generator.append("} };"); - }; - - for (auto const& [locale, rules] : cldr.locales) { - append_rules("cardinal"sv, locale, rules.cardinal_rules); - append_rules("ordinal"sv, locale, rules.ordinal_rules); - append_ranges(locale, rules.plural_ranges); - } - - append_lookup_table("PluralCategoryFunction"sv, "cardinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); }); - append_lookup_table("PluralCategoryFunction"sv, "ordinal"sv, "default_category"sv, [](auto& rules, auto form) -> Conditions& { return rules.rules_for_form(form); }); - append_lookup_table("PluralRangeFunction"sv, "range"sv, "default_range"sv, [](auto& rules, auto) -> Ranges& { return rules.plural_ranges; }); - - generate_mapping(generator, locales, "PluralCategory"sv, "s_cardinal_categories"sv, "s_cardinal_categories_{}"sv, format_identifier, - [&](auto const& name, auto const& locale) { - auto& rules = cldr.locales.find(locale)->value; - append_categories(name, rules.rules_for_form("cardinal"sv)); - }); - - generate_mapping(generator, locales, "PluralCategory"sv, "s_ordinal_categories"sv, "s_ordinal_categories_{}"sv, format_identifier, - [&](auto const& name, auto const& locale) { - auto& rules = cldr.locales.find(locale)->value; - append_categories(name, rules.rules_for_form("ordinal"sv)); - }); - - generator.append(R"~~~( -PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return PluralCategory::Other; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - PluralCategoryFunction decider { nullptr }; - - switch (form) { - case PluralForm::Cardinal: - decider = s_cardinal_functions[locale_index]; - break; - case PluralForm::Ordinal: - decider = s_ordinal_functions[locale_index]; - break; - } - - return decider(move(operands)); -} - -ReadonlySpan available_plural_categories(StringView locale, PluralForm form) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return {}; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - - switch (form) { - case PluralForm::Cardinal: - return s_cardinal_categories[locale_index]; - case PluralForm::Ordinal: - return s_ordinal_categories[locale_index]; - } - - VERIFY_NOT_REACHED(); -} - -PluralCategory determine_plural_range(StringView locale, PluralCategory start, PluralCategory end) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return PluralCategory::Other; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - - PluralRangeFunction decider = s_range_functions[locale_index]; - return decider(start, end); -} - -} -)~~~"); - - TRY(file.write_until_depleted(generator.as_string_view().bytes())); - return {}; -} - -ErrorOr serenity_main(Main::Arguments arguments) -{ - StringView generated_header_path; - StringView generated_implementation_path; - StringView core_path; - StringView locale_names_path; - - Core::ArgsParser args_parser; - args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); - args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); - args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); - args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path"); - args_parser.parse(arguments); - - auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write)); - auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); - - CLDR cldr; - TRY(parse_all_locales(core_path, locale_names_path, cldr)); - - TRY(generate_unicode_locale_header(*generated_header_file, cldr)); - TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr)); - - return 0; -} diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp index b9077b6fa91..3001dd6f2fb 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.cpp @@ -4,10 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include #include -#include -#include namespace JS::Intl { @@ -19,155 +16,43 @@ PluralRules::PluralRules(Object& prototype) { } -// 16.5.1 GetOperands ( s ), https://tc39.es/ecma402/#sec-getoperands -::Locale::PluralOperands get_operands(StringView string) +// 16.5.4 ResolvePlural ( pluralRules, n ), https://tc39.es/ecma402/#sec-resolveplural +::Locale::PluralCategory resolve_plural(PluralRules const& plural_rules, Value number) { - // 1.Let n be ! ToNumber(s). - auto number = string.to_number(AK::TrimWhitespace::Yes).release_value(); - - // 2. Assert: n is finite. - VERIFY(isfinite(number)); - - // 3. Let dp be StringIndexOf(s, ".", 0). - auto decimal_point = string.find('.'); - - Variant integer_part; - StringView fraction_slice; - - // 4. If dp = -1, then - if (!decimal_point.has_value()) { - // a. Let intPart be n. - integer_part = number; - - // b. Let fracSlice be "". - } - // 5. Else, - else { - // a. Let intPart be the substring of s from 0 to dp. - integer_part = string.substring_view(0, *decimal_point); - - // b. Let fracSlice be the substring of s from dp + 1. - fraction_slice = string.substring_view(*decimal_point + 1); - } - - // 6. Let i be abs(! ToNumber(intPart)). - auto integer = integer_part.visit( - [](Empty) -> u64 { VERIFY_NOT_REACHED(); }, - [](double value) { - return static_cast(fabs(value)); - }, - [](StringView value) { - auto value_as_int = value.template to_number().value(); - return static_cast(value_as_int); - }); - - // 7. Let fracDigitCount be the length of fracSlice. - auto fraction_digit_count = fraction_slice.length(); - - // 8. Let f be ! ToNumber(fracSlice). - auto fraction = fraction_slice.is_empty() ? 0u : fraction_slice.template to_number().value(); - - // 9. Let significantFracSlice be the value of fracSlice stripped of trailing "0". - auto significant_fraction_slice = fraction_slice.trim("0"sv, TrimMode::Right); - - // 10. Let significantFracDigitCount be the length of significantFracSlice. - auto significant_fraction_digit_count = significant_fraction_slice.length(); - - // 11. Let significantFrac be ! ToNumber(significantFracSlice). - auto significant_fraction = significant_fraction_slice.is_empty() ? 0u : significant_fraction_slice.template to_number().value(); - - // 12. Return a new Record { [[Number]]: abs(n), [[IntegerDigits]]: i, [[FractionDigits]]: f, [[NumberOfFractionDigits]]: fracDigitCount, [[FractionDigitsWithoutTrailing]]: significantFrac, [[NumberOfFractionDigitsWithoutTrailing]]: significantFracDigitCount }. - return ::Locale::PluralOperands { - .number = fabs(number), - .integer_digits = integer, - .fraction_digits = fraction, - .number_of_fraction_digits = fraction_digit_count, - .fraction_digits_without_trailing = significant_fraction, - .number_of_fraction_digits_without_trailing = significant_fraction_digit_count, - }; -} - -// 16.5.2 PluralRuleSelect ( locale, type, n, operands ), https://tc39.es/ecma402/#sec-pluralruleselect -::Locale::PluralCategory plural_rule_select(StringView locale, ::Locale::PluralForm type, Value, ::Locale::PluralOperands operands) -{ - return ::Locale::determine_plural_category(locale, type, move(operands)); -} - -// 16.5.3 ResolvePlural ( pluralRules, n ), https://tc39.es/ecma402/#sec-resolveplural -ResolvedPlurality resolve_plural(PluralRules const& plural_rules, Value number) -{ - // 1. Assert: Type(pluralRules) is Object. - // 2. Assert: pluralRules has an [[InitializedPluralRules]] internal slot. - // 3. Assert: Type(n) is Number. - - // 4. If n is not a finite Number, then + // 1. If n is not a finite Number, then if (!number.is_finite_number()) { - // a. Return "other". - return { ::Locale::PluralCategory::Other, String {} }; + // a. Let s be ! ToString(n). + // b. Return the Record { [[PluralCategory]]: "other", [[FormattedString]]: s }. + return ::Locale::PluralCategory::Other; } - // 5. Let locale be pluralRules.[[Locale]]. - auto const& locale = plural_rules.locale(); - - // 6. Let type be pluralRules.[[Type]]. - auto type = plural_rules.type(); - - // 7. Let res be ! FormatNumericToString(pluralRules, n). - auto result = format_numeric_to_string(plural_rules, number); - - // 8. Let s be res.[[FormattedString]]. - auto string = move(result); - - // 9. Let operands be ! GetOperands(s). - auto operands = get_operands(string); - - // 10. Let p be ! PluralRuleSelect(locale, type, n, operands). - auto plural_category = plural_rule_select(locale, type, number, move(operands)); - - // 11. Return the Record { [[PluralCategory]]: p, [[FormattedString]]: s }. - return { plural_category, move(string) }; + // 2. Let locale be pluralRules.[[Locale]]. + // 3. Let type be pluralRules.[[Type]]. + // 4. Let res be FormatNumericToString(pluralRules, ℝ(n)). + // 5. Let s be res.[[FormattedString]]. + // 6. Let operands be GetOperands(s). + // 7. Let p be PluralRuleSelect(locale, type, n, operands). + // 8. Return the Record { [[PluralCategory]]: p, [[FormattedString]]: s }. + return plural_rules.formatter().select_plural(number.as_double()); } -// 16.5.4 PluralRuleSelectRange ( locale, type, xp, yp ), https://tc39.es/ecma402/#sec-resolveplural -::Locale::PluralCategory plural_rule_select_range(StringView locale, ::Locale::PluralForm, ::Locale::PluralCategory start, ::Locale::PluralCategory end) -{ - return ::Locale::determine_plural_range(locale, start, end); -} - -// 16.5.5 ResolvePluralRange ( pluralRules, x, y ), https://tc39.es/ecma402/#sec-resolveplural +// 16.5.6 ResolvePluralRange ( pluralRules, x, y ), https://tc39.es/ecma402/#sec-resolveplural ThrowCompletionOr<::Locale::PluralCategory> resolve_plural_range(VM& vm, PluralRules const& plural_rules, Value start, Value end) { - // 1. Assert: Type(pluralRules) is Object. - // 2. Assert: pluralRules has an [[InitializedPluralRules]] internal slot. - // 3. Assert: Type(x) is Number. - // 4. Assert: Type(y) is Number. - - // 5. If x is NaN or y is NaN, throw a RangeError exception. + // 1. If x is NaN or y is NaN, throw a RangeError exception. if (start.is_nan()) return vm.throw_completion(ErrorType::NumberIsNaN, "start"sv); if (end.is_nan()) return vm.throw_completion(ErrorType::NumberIsNaN, "end"sv); - // 6. Let xp be ! ResolvePlural(pluralRules, x). - auto start_plurality = resolve_plural(plural_rules, start); - - // 7. Let yp be ! ResolvePlural(pluralRules, y). - auto end_plurality = resolve_plural(plural_rules, end); - - // 8. If xp.[[FormattedString]] is yp.[[FormattedString]], then - if (start_plurality.formatted_string == end_plurality.formatted_string) { - // a. Return xp.[[PluralCategory]]. - return start_plurality.plural_category; - } - - // 9. Let locale be pluralRules.[[Locale]]. - auto const& locale = plural_rules.locale(); - - // 10. Let type be pluralRules.[[Type]]. - auto type = plural_rules.type(); - - // 11. Return ! PluralRuleSelectRange(locale, type, xp.[[PluralCategory]], yp.[[PluralCategory]]). - return plural_rule_select_range(locale, type, start_plurality.plural_category, end_plurality.plural_category); + // 2. Let xp be ResolvePlural(pluralRules, x). + // 3. Let yp be ResolvePlural(pluralRules, y). + // 4. If xp.[[FormattedString]] is yp.[[FormattedString]], then + // a. Return xp.[[PluralCategory]]. + // 5. Let locale be pluralRules.[[Locale]]. + // 6. Let type be pluralRules.[[Type]]. + // 7. Return PluralRuleSelectRange(locale, type, xp.[[PluralCategory]], yp.[[PluralCategory]]). + return plural_rules.formatter().select_plural_range(start.as_double(), end.as_double()); } } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h index d5a9058ac04..7aa54fe9c92 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRules.h @@ -6,7 +6,6 @@ #pragma once -#include #include #include #include @@ -32,15 +31,7 @@ private: ::Locale::PluralForm m_type { ::Locale::PluralForm::Cardinal }; // [[Type]] }; -struct ResolvedPlurality { - ::Locale::PluralCategory plural_category; // [[PluralCategory]] - String formatted_string; // [[FormattedString]] -}; - -::Locale::PluralOperands get_operands(StringView string); -::Locale::PluralCategory plural_rule_select(StringView locale, ::Locale::PluralForm type, Value number, ::Locale::PluralOperands operands); -ResolvedPlurality resolve_plural(PluralRules const&, Value number); -::Locale::PluralCategory plural_rule_select_range(StringView locale, ::Locale::PluralForm, ::Locale::PluralCategory start, ::Locale::PluralCategory end); +::Locale::PluralCategory resolve_plural(PluralRules const&, Value number); ThrowCompletionOr<::Locale::PluralCategory> resolve_plural_range(VM&, PluralRules const&, Value start, Value end); } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp index 1f4c97290ef..b7b97c72330 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.cpp @@ -49,14 +49,58 @@ ThrowCompletionOr> PluralRulesConstructor::construct(Functi { auto& vm = this->vm(); - auto locales = vm.argument(0); - auto options = vm.argument(1); + auto locales_value = vm.argument(0); + auto options_value = vm.argument(1); - // 2. Let pluralRules be ? OrdinaryCreateFromConstructor(NewTarget, "%PluralRules.prototype%", « [[InitializedPluralRules]], [[Locale]], [[Type]], [[MinimumIntegerDigits]], [[MinimumFractionDigits]], [[MaximumFractionDigits]], [[MinimumSignificantDigits]], [[MaximumSignificantDigits]], [[RoundingType]], [[RoundingMode]], [[RoundingIncrement]], [[TrailingZeroDisplay]] »). + // 2. Let pluralRules be ? OrdinaryCreateFromConstructor(NewTarget, "%Intl.PluralRules.prototype%", « [[InitializedPluralRules]], [[Locale]], [[Type]], [[MinimumIntegerDigits]], [[MinimumFractionDigits]], [[MaximumFractionDigits]], [[MinimumSignificantDigits]], [[MaximumSignificantDigits]], [[RoundingType]], [[RoundingIncrement]], [[RoundingMode]], [[ComputedRoundingPriority]], [[TrailingZeroDisplay]] »). auto plural_rules = TRY(ordinary_create_from_constructor(vm, new_target, &Intrinsics::intl_plural_rules_prototype)); - // 3. Return ? InitializePluralRules(pluralRules, locales, options). - return TRY(initialize_plural_rules(vm, plural_rules, locales, options)); + // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales). + auto requested_locales = TRY(canonicalize_locale_list(vm, locales_value)); + + // 4. Set options to ? CoerceOptionsToObject(options). + auto* options = TRY(coerce_options_to_object(vm, options_value)); + + // 5. Let opt be a new Record. + LocaleOptions opt {}; + + // 6. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit"). + auto matcher = TRY(get_option(vm, *options, vm.names.localeMatcher, OptionType::String, AK::Array { "lookup"sv, "best fit"sv }, "best fit"sv)); + + // 7. Set opt.[[localeMatcher]] to matcher. + opt.locale_matcher = matcher; + + // 8. Let localeData be %Intl.PluralRules%.[[LocaleData]]. + // 9. Let r be ResolveLocale(%Intl.PluralRules%.[[AvailableLocales]], requestedLocales, opt, %Intl.PluralRules%.[[RelevantExtensionKeys]], localeData). + auto result = resolve_locale(requested_locales, opt, {}); + + // 10. Set pluralRules.[[Locale]] to r.[[locale]]. + plural_rules->set_locale(move(result.locale)); + + // Non-standard, the data locale is used by our NumberFormat implementation. + plural_rules->set_data_locale(move(result.data_locale)); + + // 11. Let t be ? GetOption(options, "type", string, « "cardinal", "ordinal" », "cardinal"). + auto type = TRY(get_option(vm, *options, vm.names.type, OptionType::String, AK::Array { "cardinal"sv, "ordinal"sv }, "cardinal"sv)); + + // 12. Set pluralRules.[[Type]] to t. + plural_rules->set_type(type.as_string().utf8_string_view()); + + // 13. Perform ? SetNumberFormatDigitOptions(pluralRules, options, 0, 3, "standard"). + TRY(set_number_format_digit_options(vm, plural_rules, *options, 0, 3, ::Locale::Notation::Standard)); + + // Non-standard, create an ICU number formatter for this Intl object. + auto formatter = ::Locale::NumberFormat::create( + plural_rules->locale(), + {}, + {}, + plural_rules->rounding_options()); + + formatter->create_plural_rules(plural_rules->type()); + plural_rules->set_formatter(move(formatter)); + + // 14. Return pluralRules. + return plural_rules; } // 16.2.2 Intl.PluralRules.supportedLocalesOf ( locales [ , options ] ), https://tc39.es/ecma402/#sec-intl.pluralrules.supportedlocalesof @@ -74,53 +118,4 @@ JS_DEFINE_NATIVE_FUNCTION(PluralRulesConstructor::supported_locales_of) return TRY(supported_locales(vm, requested_locales, options)); } -// 16.1.2 InitializePluralRules ( pluralRules, locales, options ), https://tc39.es/ecma402/#sec-initializepluralrules -ThrowCompletionOr> initialize_plural_rules(VM& vm, PluralRules& plural_rules, Value locales_value, Value options_value) -{ - // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). - auto requested_locales = TRY(canonicalize_locale_list(vm, locales_value)); - - // 2. Set options to ? CoerceOptionsToObject(options). - auto* options = TRY(coerce_options_to_object(vm, options_value)); - - // 3. Let opt be a new Record. - LocaleOptions opt {}; - - // 4. Let matcher be ? GetOption(options, "localeMatcher", string, « "lookup", "best fit" », "best fit"). - auto matcher = TRY(get_option(vm, *options, vm.names.localeMatcher, OptionType::String, AK::Array { "lookup"sv, "best fit"sv }, "best fit"sv)); - - // 5. Set opt.[[localeMatcher]] to matcher. - opt.locale_matcher = matcher; - - // 6. Let t be ? GetOption(options, "type", string, « "cardinal", "ordinal" », "cardinal"). - auto type = TRY(get_option(vm, *options, vm.names.type, OptionType::String, AK::Array { "cardinal"sv, "ordinal"sv }, "cardinal"sv)); - - // 7. Set pluralRules.[[Type]] to t. - plural_rules.set_type(type.as_string().utf8_string_view()); - - // 8. Perform ? SetNumberFormatDigitOptions(pluralRules, options, +0𝔽, 3𝔽, "standard"). - TRY(set_number_format_digit_options(vm, plural_rules, *options, 0, 3, ::Locale::Notation::Standard)); - - // 9. Let localeData be %PluralRules%.[[LocaleData]]. - // 10. Let r be ResolveLocale(%PluralRules%.[[AvailableLocales]], requestedLocales, opt, %PluralRules%.[[RelevantExtensionKeys]], localeData). - auto result = resolve_locale(requested_locales, opt, {}); - - // 11. Set pluralRules.[[Locale]] to r.[[locale]]. - plural_rules.set_locale(move(result.locale)); - - // Non-standard, the data locale is used by our NumberFormat implementation. - plural_rules.set_data_locale(move(result.data_locale)); - - // Non-standard, create an ICU number formatter for this Intl object. - auto formatter = ::Locale::NumberFormat::create( - plural_rules.locale(), - {}, - {}, - plural_rules.rounding_options()); - plural_rules.set_formatter(move(formatter)); - - // 12. Return pluralRules. - return plural_rules; -} - } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h index dcfe99d5eef..f595d69d22c 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesConstructor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Tim Flynn + * Copyright (c) 2022-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -29,6 +29,4 @@ private: JS_DECLARE_NATIVE_FUNCTION(supported_locales_of); }; -ThrowCompletionOr> initialize_plural_rules(VM&, PluralRules&, Value locales_value, Value options_value); - } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp index 1ba9866a796..cfb5e68ef65 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/PluralRulesPrototype.cpp @@ -48,7 +48,7 @@ JS_DEFINE_NATIVE_FUNCTION(PluralRulesPrototype::select) // 4. Return ! ResolvePlural(pr, n).[[PluralCategory]]. auto plurality = resolve_plural(plural_rules, number); - return PrimitiveString::create(vm, ::Locale::plural_category_to_string(plurality.plural_category)); + return PrimitiveString::create(vm, ::Locale::plural_category_to_string(plurality)); } // 16.3.4 Intl.PluralRules.prototype.selectRange ( start, end ), https://tc39.es/ecma402/#sec-intl.pluralrules.prototype.selectrange @@ -91,7 +91,7 @@ JS_DEFINE_NATIVE_FUNCTION(PluralRulesPrototype::resolved_options) auto options = Object::create(realm, realm.intrinsics().object_prototype()); // 4. Let pluralCategories be a List of Strings containing all possible results of PluralRuleSelect for the selected locale pr.[[Locale]]. - auto available_categories = ::Locale::available_plural_categories(plural_rules->locale(), plural_rules->type()); + auto available_categories = plural_rules->formatter().available_plural_categories(); auto plural_categories = Array::create_from<::Locale::PluralCategory>(realm, available_categories, [&](auto category) { return PrimitiveString::create(vm, ::Locale::plural_category_to_string(category)); diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js index 76619fafa66..9945ff9bdec 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js +++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/PluralRules/PluralRules.prototype.selectRange.js @@ -62,13 +62,6 @@ describe("correct behavior", () => { expect(pl.selectRange(0.14, 3.14)).toBe("other"); // other + other = other }); - test("default to end of range", () => { - // "so" specifies "one" to be the integer 1, but does not specify any ranges. - const so = new Intl.PluralRules("so"); - expect(so.selectRange(0, 1)).toBe("one"); - expect(so.selectRange(1, 2)).toBe("other"); - }); - test("numbers in reverse order", () => { const en = new Intl.PluralRules("en"); expect(en.selectRange(1, -Infinity)).toBe("other"); diff --git a/Userland/Libraries/LibLocale/Forward.h b/Userland/Libraries/LibLocale/Forward.h index bb64737bcec..1de393135f3 100644 --- a/Userland/Libraries/LibLocale/Forward.h +++ b/Userland/Libraries/LibLocale/Forward.h @@ -33,7 +33,6 @@ struct ListFormatPart; struct LocaleExtension; struct LocaleID; struct OtherExtension; -struct PluralOperands; struct TransformedExtension; struct TransformedField; diff --git a/Userland/Libraries/LibLocale/NumberFormat.cpp b/Userland/Libraries/LibLocale/NumberFormat.cpp index a54aece4233..faffe6309bb 100644 --- a/Userland/Libraries/LibLocale/NumberFormat.cpp +++ b/Userland/Libraries/LibLocale/NumberFormat.cpp @@ -17,6 +17,7 @@ #include #include +#include namespace Locale { @@ -416,6 +417,17 @@ static constexpr UNumberUnitWidth icu_unit_width(Style unit_display) VERIFY_NOT_REACHED(); } +static constexpr UPluralType icu_plural_type(PluralForm plural_form) +{ + switch (plural_form) { + case PluralForm::Cardinal: + return UPluralType::UPLURAL_TYPE_CARDINAL; + case PluralForm::Ordinal: + return UPluralType::UPLURAL_TYPE_ORDINAL; + } + VERIFY_NOT_REACHED(); +} + static void apply_display_options(icu::number::LocalizedNumberFormatter& formatter, DisplayOptions const& display_options) { UErrorCode status = U_ZERO_ERROR; @@ -678,6 +690,78 @@ public: return format_to_parts_impl(formatted, start, end); } + virtual void create_plural_rules(PluralForm plural_form) override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(!m_plural_rules); + + m_plural_rules = adopt_own(*icu::PluralRules::forLocale(m_locale, icu_plural_type(plural_form), status)); + VERIFY(icu_success(status)); + } + + virtual PluralCategory select_plural(double value) const override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(m_plural_rules); + + auto formatted = format_impl(value); + if (!formatted.has_value()) + return PluralCategory::Other; + + auto result = m_plural_rules->select(*formatted, status); + if (icu_failure(status)) + return PluralCategory::Other; + + return plural_category_from_string(icu_string_to_string(result)); + } + + virtual PluralCategory select_plural_range(double start, double end) const override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(m_plural_rules); + + auto formatted = format_range_impl(start, end); + if (!formatted.has_value()) + return PluralCategory::Other; + + auto [formatted_start, formatted_end] = formatted->getDecimalNumbers(status); + if (icu_failure(status)) + return PluralCategory::Other; + + if (formatted_start.string_view() == formatted_end.string_view()) + return select_plural(start); + + auto result = m_plural_rules->select(*formatted, status); + if (icu_failure(status)) + return PluralCategory::Other; + + return plural_category_from_string(icu_string_to_string(result)); + } + + virtual Vector available_plural_categories() const override + { + UErrorCode status = U_ZERO_ERROR; + VERIFY(m_plural_rules); + + auto keywords = adopt_own_if_nonnull(m_plural_rules->getKeywords(status)); + if (icu_failure(status)) + return {}; + + Vector result; + + while (true) { + i32 length = 0; + auto const* category = keywords->next(&length, status); + + if (icu_failure(status) || category == nullptr) + break; + + result.append(plural_category_from_string({ category, static_cast(length) })); + } + + return result; + } + private: static icu::Formattable value_to_formattable(Value const& value) { @@ -796,8 +880,12 @@ private: } icu::Locale& m_locale; + icu::number::LocalizedNumberFormatter m_formatter; mutable Optional m_range_formatter; + + OwnPtr m_plural_rules; + bool m_is_unit { false }; }; diff --git a/Userland/Libraries/LibLocale/NumberFormat.h b/Userland/Libraries/LibLocale/NumberFormat.h index ffe13134db7..2faac464dc4 100644 --- a/Userland/Libraries/LibLocale/NumberFormat.h +++ b/Userland/Libraries/LibLocale/NumberFormat.h @@ -12,6 +12,7 @@ #include #include #include +#include namespace Locale { @@ -162,6 +163,11 @@ public: virtual String format_range(Value const&, Value const&) const = 0; virtual Vector format_range_to_parts(Value const&, Value const&) const = 0; + virtual void create_plural_rules(PluralForm) = 0; + virtual PluralCategory select_plural(double) const = 0; + virtual PluralCategory select_plural_range(double, double) const = 0; + virtual Vector available_plural_categories() const = 0; + protected: NumberFormat() = default; }; diff --git a/Userland/Libraries/LibLocale/PluralRules.cpp b/Userland/Libraries/LibLocale/PluralRules.cpp index 9c36df4f76b..3434f2597db 100644 --- a/Userland/Libraries/LibLocale/PluralRules.cpp +++ b/Userland/Libraries/LibLocale/PluralRules.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Tim Flynn + * Copyright (c) 2022-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -24,25 +24,52 @@ StringView plural_form_to_string(PluralForm plural_form) return "cardinal"sv; case PluralForm::Ordinal: return "ordinal"sv; - default: - VERIFY_NOT_REACHED(); } + VERIFY_NOT_REACHED(); } -PluralCategory __attribute__((weak)) determine_plural_category(StringView, PluralForm, PluralOperands) +PluralCategory plural_category_from_string(StringView category) { - return PluralCategory::Other; + if (category == "other"sv) + return PluralCategory::Other; + if (category == "zero"sv) + return PluralCategory::Zero; + if (category == "one"sv) + return PluralCategory::One; + if (category == "two"sv) + return PluralCategory::Two; + if (category == "few"sv) + return PluralCategory::Few; + if (category == "many"sv) + return PluralCategory::Many; + if (category == "0"sv) + return PluralCategory::ExactlyZero; + if (category == "1"sv) + return PluralCategory::ExactlyOne; + VERIFY_NOT_REACHED(); } -ReadonlySpan __attribute__((weak)) available_plural_categories(StringView, PluralForm) +StringView plural_category_to_string(PluralCategory category) { - static constexpr Array categories { { PluralCategory::Other } }; - return categories.span(); -} - -PluralCategory __attribute__((weak)) determine_plural_range(StringView, PluralCategory, PluralCategory) -{ - return PluralCategory::Other; + switch (category) { + case PluralCategory::Other: + return "other"sv; + case PluralCategory::Zero: + return "zero"sv; + case PluralCategory::One: + return "one"sv; + case PluralCategory::Two: + return "two"sv; + case PluralCategory::Few: + return "few"sv; + case PluralCategory::Many: + return "many"sv; + case PluralCategory::ExactlyZero: + return "0"sv; + case PluralCategory::ExactlyOne: + return "1"sv; + } + VERIFY_NOT_REACHED(); } } diff --git a/Userland/Libraries/LibLocale/PluralRules.h b/Userland/Libraries/LibLocale/PluralRules.h index 62c948833cc..8b893b60c1c 100644 --- a/Userland/Libraries/LibLocale/PluralRules.h +++ b/Userland/Libraries/LibLocale/PluralRules.h @@ -1,15 +1,12 @@ /* - * Copyright (c) 2022, Tim Flynn + * Copyright (c) 2022-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once -#include #include -#include -#include namespace Locale { @@ -17,6 +14,8 @@ enum class PluralForm { Cardinal, Ordinal, }; +PluralForm plural_form_from_string(StringView); +StringView plural_form_to_string(PluralForm); enum class PluralCategory : u8 { Other, @@ -30,95 +29,7 @@ enum class PluralCategory : u8 { ExactlyZero, ExactlyOne, }; - -// https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Operand_Meanings -struct PluralOperands { - static constexpr StringView symbol_to_variable_name(char symbol) - { - if (symbol == 'n') - return "number"sv; - if (symbol == 'i') - return "integer_digits"sv; - if (symbol == 'f') - return "fraction_digits"sv; - if (symbol == 'v') - return "number_of_fraction_digits"sv; - if (symbol == 't') - return "fraction_digits_without_trailing"sv; - if (symbol == 'w') - return "number_of_fraction_digits_without_trailing"sv; - VERIFY_NOT_REACHED(); - } - - static constexpr bool symbol_requires_floating_point_modulus(char symbol) - { - // From TR-35: "The modulus (% or mod) is a remainder operation as defined in Java; for - // example, where n = 4.3 the result of n mod 3 is 1.3." - // - // So, this returns whether the symbol represents a decimal value, and thus requires fmod. - return symbol == 'n'; - } - - double number { 0 }; - u64 integer_digits { 0 }; - u64 fraction_digits { 0 }; - u64 number_of_fraction_digits { 0 }; - u64 fraction_digits_without_trailing { 0 }; - u64 number_of_fraction_digits_without_trailing { 0 }; -}; - -PluralForm plural_form_from_string(StringView plural_form); -StringView plural_form_to_string(PluralForm plural_form); - -// NOTE: This must be defined inline to be callable from the code generators. -constexpr PluralCategory plural_category_from_string(StringView category) -{ - if (category == "other"sv) - return PluralCategory::Other; - if (category == "zero"sv) - return PluralCategory::Zero; - if (category == "one"sv) - return PluralCategory::One; - if (category == "two"sv) - return PluralCategory::Two; - if (category == "few"sv) - return PluralCategory::Few; - if (category == "many"sv) - return PluralCategory::Many; - if (category == "0"sv) - return PluralCategory::ExactlyZero; - if (category == "1"sv) - return PluralCategory::ExactlyOne; - VERIFY_NOT_REACHED(); -} - -// NOTE: This must be defined inline to be callable from the code generators. -constexpr StringView plural_category_to_string(PluralCategory category) -{ - switch (category) { - case PluralCategory::Other: - return "other"sv; - case PluralCategory::Zero: - return "zero"sv; - case PluralCategory::One: - return "one"sv; - case PluralCategory::Two: - return "two"sv; - case PluralCategory::Few: - return "few"sv; - case PluralCategory::Many: - return "many"sv; - case PluralCategory::ExactlyZero: - return "0"sv; - case PluralCategory::ExactlyOne: - return "1"sv; - } - - VERIFY_NOT_REACHED(); -} - -PluralCategory determine_plural_category(StringView locale, PluralForm form, PluralOperands operands); -ReadonlySpan available_plural_categories(StringView locale, PluralForm form); -PluralCategory determine_plural_range(StringView locale, PluralCategory start, PluralCategory end); +PluralCategory plural_category_from_string(StringView); +StringView plural_category_to_string(PluralCategory); }