LibLocale+LibJS: Port locale parsing and processing to String

In order to prevent this commit from having to refactor almost all of
Intl, the goal here is to update the internal parsing/canonicalization
of locales within LibLocale only. Call sites which are already equiped
to handle String and OOM errors do so, however.
This commit is contained in:
Timothy Flynn 2023-01-19 10:53:20 -05:00 committed by Linus Groh
parent 618714e29a
commit ca62aeb6bd
Notes: sideshowbarker 2024-07-17 08:35:21 +09:00
11 changed files with 371 additions and 299 deletions

View File

@ -1069,6 +1069,7 @@ static ErrorOr<void> generate_unicode_locale_implementation(Core::Stream::Buffer
#include <AK/BinarySearch.h>
#include <AK/Optional.h>
#include <AK/Span.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Vector.h>
#include <LibLocale/DateTimeFormat.h>
@ -1229,24 +1230,25 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
generator.append(R"~~~(
struct CanonicalLanguageID {
LanguageID to_unicode_language_id() const
struct CanonicalLanguageID
{
ErrorOr<LanguageID> to_unicode_language_id() const
{
LanguageID language_id {};
language_id.variants.ensure_capacity(variants_size);
TRY(language_id.variants.try_ensure_capacity(variants_size));
language_id.language = decode_string(language);
language_id.language = TRY(String::from_utf8(decode_string(language)));
if (script != 0)
language_id.script = decode_string(script);
language_id.script = TRY(String::from_utf8(decode_string(script)));
if (region != 0)
language_id.region = decode_string(region);
language_id.region = TRY(String::from_utf8(decode_string(region)));
for (size_t i = 0; i < variants_size; ++i)
language_id.variants.append(decode_string(variants[i]));
language_id.variants.append(TRY(String::from_utf8(decode_string(variants[i]))));
return language_id;
}
bool matches_variants(Vector<DeprecatedString> const& other_variants) const {
bool matches_variants(Vector<String> const& other_variants) const {
if (variants_size == 0)
return true;
if (other_variants.size() != variants_size)
@ -1375,7 +1377,7 @@ static LanguageMapping const* resolve_likely_subtag(LanguageID const& language_i
if (!language_id.script.has_value())
continue;
search_key.language = "und"sv;
search_key.language = String::from_utf8("und"sv).release_value_but_fixme_should_propagate_errors();
search_key.script = *language_id.script;
break;
@ -1680,9 +1682,9 @@ Optional<CharacterOrder> character_order_for_locale(StringView locale)
void resolve_complex_language_aliases(LanguageID& language_id)
{
for (auto const& map : s_complex_alias) {
auto const& key_language = decode_string(map.key.language);
auto const& key_script = decode_string(map.key.script);
auto const& key_region = decode_string(map.key.region);
auto key_language = decode_string(map.key.language);
auto key_script = decode_string(map.key.script);
auto key_region = decode_string(map.key.region);
if ((key_language != language_id.language) && (key_language != "und"sv))
continue;
@ -1693,7 +1695,7 @@ void resolve_complex_language_aliases(LanguageID& language_id)
if (!map.key.matches_variants(language_id.variants))
continue;
auto alias = map.alias.to_unicode_language_id();
auto alias = map.alias.to_unicode_language_id().release_value_but_fixme_should_propagate_errors();
if (alias.language == "und"sv)
alias.language = move(language_id.language);
@ -1718,19 +1720,19 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id)
auto maximized = language_id;
auto const& key_script = decode_string(likely_subtag->key.script);
auto const& key_region = decode_string(likely_subtag->key.region);
auto key_script = decode_string(likely_subtag->key.script);
auto key_region = decode_string(likely_subtag->key.region);
auto const& alias_language = decode_string(likely_subtag->alias.language);
auto const& alias_script = decode_string(likely_subtag->alias.script);
auto const& alias_region = decode_string(likely_subtag->alias.region);
auto alias_language = decode_string(likely_subtag->alias.language);
auto alias_script = decode_string(likely_subtag->alias.script);
auto alias_region = decode_string(likely_subtag->alias.region);
if (maximized.language == "und"sv)
maximized.language = alias_language;
maximized.language = String::from_utf8(alias_language).release_value_but_fixme_should_propagate_errors();
if (!maximized.script.has_value() || (!key_script.is_empty() && !alias_script.is_empty()))
maximized.script = alias_script;
maximized.script = String::from_utf8(alias_script).release_value_but_fixme_should_propagate_errors();
if (!maximized.region.has_value() || (!key_region.is_empty() && !alias_region.is_empty()))
maximized.region = alias_region;
maximized.region = String::from_utf8(alias_region).release_value_but_fixme_should_propagate_errors();
return maximized;
}

View File

@ -87,20 +87,34 @@ TEST_CASE(is_type_identifier)
EXPECT(!Locale::is_type_identifier("aaaa-"sv));
}
template<typename LHS, typename RHS>
[[nodiscard]] static bool compare_vectors(LHS const& lhs, RHS const& rhs)
{
if (lhs.size() != rhs.size())
return false;
for (size_t i = 0; i < lhs.size(); ++i) {
if (lhs[i] != rhs[i])
return false;
}
return true;
}
TEST_CASE(parse_unicode_locale_id)
{
auto fail = [](StringView locale) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
EXPECT(!locale_id.has_value());
};
auto pass = [](StringView locale, Optional<StringView> expected_language, Optional<StringView> expected_script, Optional<StringView> expected_region, Vector<DeprecatedString> expected_variants) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto pass = [](StringView locale, Optional<StringView> expected_language, Optional<StringView> expected_script, Optional<StringView> expected_region, Vector<StringView> expected_variants) {
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
VERIFY(locale_id.has_value());
EXPECT_EQ(locale_id->language_id.language, expected_language);
EXPECT_EQ(locale_id->language_id.script, expected_script);
EXPECT_EQ(locale_id->language_id.region, expected_region);
EXPECT_EQ(locale_id->language_id.variants, expected_variants);
EXPECT(compare_vectors(locale_id->language_id.variants, expected_variants));
};
fail("a"sv);
@ -120,17 +134,27 @@ TEST_CASE(parse_unicode_locale_id)
TEST_CASE(parse_unicode_locale_id_with_unicode_locale_extension)
{
struct LocaleExtension {
struct Keyword {
StringView key {};
StringView value {};
};
Vector<StringView> attributes {};
Vector<Keyword> keywords {};
};
auto fail = [](StringView locale) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
EXPECT(!locale_id.has_value());
};
auto pass = [](StringView locale, Locale::LocaleExtension const& expected_extension) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto pass = [](StringView locale, LocaleExtension const& expected_extension) {
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
VERIFY(locale_id.has_value());
EXPECT_EQ(locale_id->extensions.size(), 1u);
auto const& actual_extension = locale_id->extensions[0].get<Locale::LocaleExtension>();
VERIFY(actual_extension.attributes == expected_extension.attributes);
EXPECT(compare_vectors(actual_extension.attributes, expected_extension.attributes));
EXPECT_EQ(actual_extension.keywords.size(), expected_extension.keywords.size());
for (size_t i = 0; i < actual_extension.keywords.size(); ++i) {
@ -166,12 +190,30 @@ TEST_CASE(parse_unicode_locale_id_with_unicode_locale_extension)
TEST_CASE(parse_unicode_locale_id_with_transformed_extension)
{
struct TransformedExtension {
struct LanguageID {
bool is_root { false };
Optional<StringView> language {};
Optional<StringView> script {};
Optional<StringView> region {};
Vector<StringView> variants {};
};
struct TransformedField {
StringView key {};
StringView value {};
};
Optional<LanguageID> language {};
Vector<TransformedField> fields {};
};
auto fail = [](StringView locale) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
EXPECT(!locale_id.has_value());
};
auto pass = [](StringView locale, Locale::TransformedExtension const& expected_extension) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto pass = [](StringView locale, TransformedExtension const& expected_extension) {
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
VERIFY(locale_id.has_value());
EXPECT_EQ(locale_id->extensions.size(), 1u);
@ -182,7 +224,7 @@ TEST_CASE(parse_unicode_locale_id_with_transformed_extension)
EXPECT_EQ(actual_extension.language->language, expected_extension.language->language);
EXPECT_EQ(actual_extension.language->script, expected_extension.language->script);
EXPECT_EQ(actual_extension.language->region, expected_extension.language->region);
EXPECT_EQ(actual_extension.language->variants, expected_extension.language->variants);
EXPECT(compare_vectors(actual_extension.language->variants, expected_extension.language->variants));
}
EXPECT_EQ(actual_extension.fields.size(), expected_extension.fields.size());
@ -216,28 +258,33 @@ TEST_CASE(parse_unicode_locale_id_with_transformed_extension)
fail("en-t-k0-aa"sv);
fail("en-t-k0-aaaaaaaaa"sv);
pass("en-t-en"sv, { Locale::LanguageID { false, "en"sv }, {} });
pass("en-t-en-latn"sv, { Locale::LanguageID { false, "en"sv, "latn"sv }, {} });
pass("en-t-en-us"sv, { Locale::LanguageID { false, "en"sv, {}, "us"sv }, {} });
pass("en-t-en-latn-us"sv, { Locale::LanguageID { false, "en"sv, "latn"sv, "us"sv }, {} });
pass("en-t-en-posix"sv, { Locale::LanguageID { false, "en"sv, {}, {}, { "posix"sv } }, {} });
pass("en-t-en-latn-posix"sv, { Locale::LanguageID { false, "en"sv, "latn"sv, {}, { "posix"sv } }, {} });
pass("en-t-en-us-posix"sv, { Locale::LanguageID { false, "en"sv, {}, "us"sv, { "posix"sv } }, {} });
pass("en-t-en-latn-us-posix"sv, { Locale::LanguageID { false, "en"sv, "latn"sv, "us"sv, { "posix"sv } }, {} });
pass("en-t-en"sv, { TransformedExtension::LanguageID { false, "en"sv }, {} });
pass("en-t-en-latn"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv }, {} });
pass("en-t-en-us"sv, { TransformedExtension::LanguageID { false, "en"sv, {}, "us"sv }, {} });
pass("en-t-en-latn-us"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv, "us"sv }, {} });
pass("en-t-en-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, {}, {}, { "posix"sv } }, {} });
pass("en-t-en-latn-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv, {}, { "posix"sv } }, {} });
pass("en-t-en-us-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, {}, "us"sv, { "posix"sv } }, {} });
pass("en-t-en-latn-us-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv, "us"sv, { "posix"sv } }, {} });
pass("en-t-k0-aaa"sv, { {}, { { "k0"sv, { "aaa"sv } } } });
pass("en-t-k0-aaa-bbbb"sv, { {}, { { "k0"sv, "aaa-bbbb"sv } } });
pass("en-t-k0-aaa-k1-bbbb"sv, { {}, { { "k0"sv, { "aaa"sv } }, { "k1"sv, "bbbb"sv } } });
pass("en-t-en-k0-aaa"sv, { Locale::LanguageID { false, "en"sv }, { { "k0"sv, "aaa"sv } } });
pass("en-t-en-k0-aaa"sv, { TransformedExtension::LanguageID { false, "en"sv }, { { "k0"sv, "aaa"sv } } });
}
TEST_CASE(parse_unicode_locale_id_with_other_extension)
{
struct OtherExtension {
char key {};
StringView value {};
};
auto fail = [](StringView locale) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
EXPECT(!locale_id.has_value());
};
auto pass = [](StringView locale, Locale::OtherExtension const& expected_extension) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto pass = [](StringView locale, OtherExtension const& expected_extension) {
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
VERIFY(locale_id.has_value());
EXPECT_EQ(locale_id->extensions.size(), 1u);
@ -267,13 +314,13 @@ TEST_CASE(parse_unicode_locale_id_with_other_extension)
TEST_CASE(parse_unicode_locale_id_with_private_use_extension)
{
auto fail = [](StringView locale) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
EXPECT(!locale_id.has_value());
};
auto pass = [](StringView locale, Vector<DeprecatedString> const& expected_extension) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto pass = [](StringView locale, Vector<StringView> const& expected_extension) {
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
VERIFY(locale_id.has_value());
EXPECT_EQ(locale_id->private_use_extensions, expected_extension);
EXPECT(compare_vectors(locale_id->private_use_extensions, expected_extension));
};
fail("en-x"sv);
@ -291,10 +338,10 @@ TEST_CASE(parse_unicode_locale_id_with_private_use_extension)
TEST_CASE(canonicalize_unicode_locale_id)
{
auto test = [](StringView locale, StringView expected_canonical_locale) {
auto locale_id = Locale::parse_unicode_locale_id(locale);
auto locale_id = MUST(Locale::parse_unicode_locale_id(locale));
VERIFY(locale_id.has_value());
auto canonical_locale = Locale::canonicalize_unicode_locale_id(*locale_id);
auto canonical_locale = MUST(Locale::canonicalize_unicode_locale_id(*locale_id));
EXPECT_EQ(*canonical_locale, expected_canonical_locale);
};

View File

@ -29,7 +29,7 @@ Optional<::Locale::LocaleID> is_structurally_valid_language_tag(StringView local
quick_sort(variants);
for (size_t i = 0; i < variants.size() - 1; ++i) {
if (variants[i].equals_ignoring_case(variants[i + 1]))
if (variants[i].equals_ignoring_case(variants[i + 1]).release_value_but_fixme_should_propagate_errors())
return true;
}
@ -39,7 +39,7 @@ Optional<::Locale::LocaleID> is_structurally_valid_language_tag(StringView local
// IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise:
// locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML § 3.2 Unicode Locale Identifier;
auto locale_id = ::Locale::parse_unicode_locale_id(locale);
auto locale_id = ::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors();
if (!locale_id.has_value())
return {};
@ -114,11 +114,11 @@ DeprecatedString canonicalize_unicode_locale_id(::Locale::LocaleID& locale)
// 1. Let localeId be the string locale after performing the algorithm to transform it to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers.
// 2. Let localeId be the string localeId after performing the algorithm to transform it to canonical form.
auto locale_id = ::Locale::canonicalize_unicode_locale_id(locale);
auto locale_id = ::Locale::canonicalize_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors();
VERIFY(locale_id.has_value());
// 4. Return localeId.
return locale_id.release_value();
return locale_id->to_deprecated_string();
}
// 6.3.1 IsWellFormedCurrencyCode ( currency ), https://tc39.es/ecma402/#sec-iswellformedcurrencycode
@ -301,7 +301,7 @@ static MatcherResult lookup_matcher(Vector<DeprecatedString> const& requested_lo
// 2. For each element locale of requestedLocales, do
for (auto const& locale : requested_locales) {
auto locale_id = ::Locale::parse_unicode_locale_id(locale);
auto locale_id = ::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors();
VERIFY(locale_id.has_value());
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.
@ -453,10 +453,10 @@ ThrowCompletionOr<LocaleResult> resolve_locale(Vector<DeprecatedString> const& r
// a. If keyLocaleData contains requestedValue, then
if (key_locale_data.contains_slow(requested_value)) {
// i. Let value be requestedValue.
value = move(requested_value);
value = requested_value.to_deprecated_string();
// ii. Let supportedExtensionAddition be the string-concatenation of "-", key, "-", and value.
supported_extension_addition = ::Locale::Keyword { key, move(entry.value) };
supported_extension_addition = ::Locale::Keyword { String::from_utf8(key).release_value_but_fixme_should_propagate_errors(), move(entry.value) };
}
}
// 4. Else if keyLocaleData contains "true", then
@ -465,7 +465,7 @@ ThrowCompletionOr<LocaleResult> resolve_locale(Vector<DeprecatedString> const& r
value = "true"sv;
// b. Let supportedExtensionAddition be the string-concatenation of "-" and key.
supported_extension_addition = ::Locale::Keyword { key, {} };
supported_extension_addition = ::Locale::Keyword { String::from_utf8(key).release_value_but_fixme_should_propagate_errors(), {} };
}
break;
@ -480,7 +480,9 @@ ThrowCompletionOr<LocaleResult> resolve_locale(Vector<DeprecatedString> const& r
if (options_value.has_value()) {
// 1. Let optionsValue be the string optionsValue after performing the algorithm steps to transform Unicode extension values to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions.
// 2. Let optionsValue be the string optionsValue after performing the algorithm steps to replace Unicode extension values with their canonical form per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions.
::Locale::canonicalize_unicode_extension_values(key, *options_value, true);
auto options_value_string = String::from_deprecated_string(*options_value).release_value_but_fixme_should_propagate_errors();
::Locale::canonicalize_unicode_extension_values(key, options_value_string, true).release_value_but_fixme_should_propagate_errors();
options_value = options_value_string.to_deprecated_string();
// 3. If optionsValue is the empty String, then
if (options_value->is_empty()) {
@ -508,7 +510,7 @@ ThrowCompletionOr<LocaleResult> resolve_locale(Vector<DeprecatedString> const& r
// 10. If supportedExtension is not "-u", then
if (!supported_extension.keywords.is_empty()) {
auto locale_id = ::Locale::parse_unicode_locale_id(found_locale);
auto locale_id = ::Locale::parse_unicode_locale_id(found_locale).release_value_but_fixme_should_propagate_errors();
VERIFY(locale_id.has_value());
// a. Set foundLocale to InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension).
@ -530,7 +532,7 @@ Vector<DeprecatedString> lookup_supported_locales(Vector<DeprecatedString> const
// 2. For each element locale of requestedLocales, do
for (auto const& locale : requested_locales) {
auto locale_id = ::Locale::parse_unicode_locale_id(locale);
auto locale_id = ::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors();
VERIFY(locale_id.has_value());
// a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed.

View File

@ -106,7 +106,7 @@ ThrowCompletionOr<Value> canonical_code_for_display_names(VM& vm, DisplayNames::
// 1. If type is "language", then
if (type == DisplayNames::Type::Language) {
// a. If code does not match the unicode_language_id production, throw a RangeError exception.
if (!::Locale::parse_unicode_language_id(code).has_value())
if (!TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_language_id(code)).has_value())
return vm.throw_completion<RangeError>(ErrorType::OptionIsNotValidValue, code, "language"sv);
// b. If IsStructurallyValidLanguageTag(code) is false, throw a RangeError exception.

View File

@ -36,17 +36,17 @@ Locale::Locale(::Locale::LocaleID const& locale_id, Object& prototype)
for (auto const& keyword : extension.get<::Locale::LocaleExtension>().keywords) {
if (keyword.key == "ca"sv) {
set_calendar(keyword.value);
set_calendar(keyword.value.to_deprecated_string());
} else if (keyword.key == "co"sv) {
set_collation(keyword.value);
set_collation(keyword.value.to_deprecated_string());
} else if (keyword.key == "hc"sv) {
set_hour_cycle(keyword.value);
set_hour_cycle(keyword.value.to_deprecated_string());
} else if (keyword.key == "kf"sv) {
set_case_first(keyword.value);
set_case_first(keyword.value.to_deprecated_string());
} else if (keyword.key == "kn"sv) {
set_numeric(keyword.value.is_empty());
} else if (keyword.key == "nu"sv) {
set_numbering_system(keyword.value);
set_numbering_system(keyword.value.to_deprecated_string());
}
}
@ -81,7 +81,7 @@ Array* calendars_of_locale(VM& vm, Locale const& locale_object)
auto const& locale = locale_object.locale();
// 3. Assert: locale matches the unicode_locale_id production.
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value());
// 4. Let list be a List of 1 or more unique canonical calendar identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for date and time formatting in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "ca"sv);
@ -100,7 +100,7 @@ Array* collations_of_locale(VM& vm, Locale const& locale_object)
auto const& locale = locale_object.locale();
// 3. Assert: locale matches the unicode_locale_id production.
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value());
// 4. Let list be a List of 1 or more unique canonical collation identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, ordered as if an Array of the same values had been sorted, using %Array.prototype.sort% using undefined as comparefn, of those in common use for string comparison in locale. The values "standard" and "search" must be excluded from list.
auto list = ::Locale::get_keywords_for_locale(locale, "co"sv);
@ -119,7 +119,7 @@ Array* hour_cycles_of_locale(VM& vm, Locale const& locale_object)
auto const& locale = locale_object.locale();
// 3. Assert: locale matches the unicode_locale_id production.
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value());
// 4. Let list be a List of 1 or more unique hour cycle identifiers, which must be lower case String values indicating either the 12-hour format ("h11", "h12") or the 24-hour format ("h23", "h24"), sorted in descending preference of those in common use for date and time formatting in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "hc"sv);
@ -138,7 +138,7 @@ Array* numbering_systems_of_locale(VM& vm, Locale const& locale_object)
auto const& locale = locale_object.locale();
// 3. Assert: locale matches the unicode_locale_id production.
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value());
// 4. Let list be a List of 1 or more unique canonical numbering system identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for formatting numeric values in locale.
auto list = ::Locale::get_keywords_for_locale(locale, "nu"sv);
@ -174,7 +174,7 @@ StringView character_direction_of_locale(Locale const& locale_object)
auto const& locale = locale_object.locale();
// 2. Assert: locale matches the unicode_locale_id production.
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value());
// 3. If the default general ordering of characters (characterOrder) within a line in locale is right-to-left, return "rtl".
// NOTE: LibUnicode handles both LTR and RTL character orders in this call, not just RTL. We then fallback to LTR
@ -235,7 +235,7 @@ WeekInfo week_info_of_locale(Locale const& locale_object)
auto const& locale = locale_object.locale();
// 2. Assert: locale matches the unicode_locale_id production.
VERIFY(::Locale::parse_unicode_locale_id(locale).has_value());
VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value());
// 3. Return a record whose fields are defined by Table 1, with values based on locale.
WeekInfo week_info {};

View File

@ -6,6 +6,7 @@
#include <AK/DeprecatedString.h>
#include <AK/Optional.h>
#include <AK/String.h>
#include <AK/TypeCasts.h>
#include <LibJS/Runtime/AbstractOperations.h>
#include <LibJS/Runtime/GlobalObject.h>
@ -17,26 +18,26 @@
namespace JS::Intl {
struct LocaleAndKeys {
DeprecatedString locale;
Optional<DeprecatedString> ca;
Optional<DeprecatedString> co;
Optional<DeprecatedString> hc;
Optional<DeprecatedString> kf;
Optional<DeprecatedString> kn;
Optional<DeprecatedString> nu;
String locale;
Optional<String> ca;
Optional<String> co;
Optional<String> hc;
Optional<String> kf;
Optional<String> kn;
Optional<String> nu;
};
// Note: This is not an AO in the spec. This just serves to abstract very similar steps in ApplyOptionsToTag and the Intl.Locale constructor.
static ThrowCompletionOr<Optional<DeprecatedString>> get_string_option(VM& vm, Object const& options, PropertyKey const& property, Function<bool(StringView)> validator, Span<StringView const> values = {})
static ThrowCompletionOr<Optional<String>> get_string_option(VM& vm, Object const& options, PropertyKey const& property, Function<bool(StringView)> validator, Span<StringView const> values = {})
{
auto option = TRY(get_option(vm, options, property, OptionType::String, values, Empty {}));
if (option.is_undefined())
return Optional<DeprecatedString> {};
return Optional<String> {};
if (validator && !validator(TRY(option.as_string().utf8_string_view())))
return vm.throw_completion<RangeError>(ErrorType::OptionIsNotValidValue, option, property);
return TRY(option.as_string().deprecated_string());
return TRY(option.as_string().utf8_string());
}
// 14.1.2 ApplyOptionsToTag ( tag, options ), https://tc39.es/ecma402/#sec-apply-options-to-tag
@ -69,7 +70,7 @@ static ThrowCompletionOr<DeprecatedString> apply_options_to_tag(VM& vm, StringVi
auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(*locale_id);
// 11. Assert: tag matches the unicode_locale_id production.
locale_id = ::Locale::parse_unicode_locale_id(canonicalized_tag);
locale_id = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(canonicalized_tag));
VERIFY(locale_id.has_value());
// 12. Let languageId be the substring of tag corresponding to the unicode_language_id production.
@ -109,10 +110,10 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey
{
// 1. Assert: Type(tag) is String.
// 2. Assert: tag matches the unicode_locale_id production.
auto locale_id = ::Locale::parse_unicode_locale_id(tag);
auto locale_id = ::Locale::parse_unicode_locale_id(tag).release_value_but_fixme_should_propagate_errors();
VERIFY(locale_id.has_value());
Vector<DeprecatedString> attributes;
Vector<String> attributes;
Vector<::Locale::Keyword> keywords;
// 3. If tag contains a substring that is a Unicode locale extension sequence, then
@ -134,7 +135,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey
// a. Let attributes be a new empty List.
// b. Let keywords be a new empty List.
auto field_from_key = [](LocaleAndKeys& value, StringView key) -> Optional<DeprecatedString>& {
auto field_from_key = [](LocaleAndKeys& value, StringView key) -> Optional<String>& {
if (key == "ca"sv)
return value.ca;
if (key == "co"sv)
@ -156,7 +157,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey
// 6. For each element key of relevantExtensionKeys, do
for (auto const& key : relevant_extension_keys) {
// a. Let value be undefined.
Optional<DeprecatedString> value {};
Optional<String> value {};
::Locale::Keyword* entry = nullptr;
// b. If keywords contains an element whose [[Key]] is the same as key, then
@ -188,7 +189,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey
// iv. Else,
else {
// 1. Append the Record { [[Key]]: key, [[Value]]: value } to keywords.
keywords.append({ key, *value });
keywords.append({ String::from_utf8(key).release_value_but_fixme_should_propagate_errors(), *value });
}
}
@ -198,7 +199,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey
// 7. Let locale be the String value that is tag with any Unicode locale extension sequences removed.
locale_id->remove_extension_type<::Locale::LocaleExtension>();
auto locale = locale_id->to_deprecated_string();
auto locale = locale_id->to_string().release_value_but_fixme_should_propagate_errors();
// 8. Let newExtension be a Unicode BCP 47 U Extension based on attributes and keywords.
::Locale::LocaleExtension new_extension { move(attributes), move(keywords) };
@ -206,7 +207,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey
// 9. If newExtension is not the empty String, then
if (!new_extension.attributes.is_empty() || !new_extension.keywords.is_empty()) {
// a. Let locale be ! InsertUnicodeExtensionAndCanonicalize(locale, newExtension).
locale = insert_unicode_extension_and_canonicalize(locale_id.release_value(), move(new_extension));
locale = String::from_deprecated_string(insert_unicode_extension_and_canonicalize(locale_id.release_value(), move(new_extension))).release_value_but_fixme_should_propagate_errors();
}
// 10. Set result.[[locale]] to locale.
@ -313,7 +314,7 @@ ThrowCompletionOr<NonnullGCPtr<Object>> LocaleConstructor::construct(FunctionObj
// 24. If kn is not undefined, set kn to ! ToString(kn).
// 25. Set opt.[[kn]] to kn.
if (!kn.is_undefined())
opt.kn = TRY(kn.to_deprecated_string(vm));
opt.kn = TRY(kn.to_string(vm));
// 26. Let numberingSystem be ? GetOption(options, "numberingSystem", string, empty, undefined).
// 27. If numberingSystem is not undefined, then
@ -325,22 +326,22 @@ ThrowCompletionOr<NonnullGCPtr<Object>> LocaleConstructor::construct(FunctionObj
auto result = apply_unicode_extension_to_tag(tag, move(opt), relevant_extension_keys);
// 30. Set locale.[[Locale]] to r.[[locale]].
locale->set_locale(move(result.locale));
locale->set_locale(result.locale.to_deprecated_string());
// 31. Set locale.[[Calendar]] to r.[[ca]].
if (result.ca.has_value())
locale->set_calendar(result.ca.release_value());
locale->set_calendar(result.ca->to_deprecated_string());
// 32. Set locale.[[Collation]] to r.[[co]].
if (result.co.has_value())
locale->set_collation(result.co.release_value());
locale->set_collation(result.co->to_deprecated_string());
// 33. Set locale.[[HourCycle]] to r.[[hc]].
if (result.hc.has_value())
locale->set_hour_cycle(result.hc.release_value());
locale->set_hour_cycle(result.hc->to_deprecated_string());
// 34. If relevantExtensionKeys contains "kf", then
if (relevant_extension_keys.span().contains_slow("kf"sv)) {
// a. Set locale.[[CaseFirst]] to r.[[kf]].
if (result.kf.has_value())
locale->set_case_first(result.kf.release_value());
locale->set_case_first(result.kf->to_deprecated_string());
}
// 35. If relevantExtensionKeys contains "kn", then
@ -359,7 +360,7 @@ ThrowCompletionOr<NonnullGCPtr<Object>> LocaleConstructor::construct(FunctionObj
// 36. Set locale.[[NumberingSystem]] to r.[[nu]].
if (result.nu.has_value())
locale->set_numbering_system(result.nu.release_value());
locale->set_numbering_system(result.nu->to_deprecated_string());
// 37. Return locale.
return locale;

View File

@ -61,7 +61,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::maximize)
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
auto* locale_object = TRY(typed_this_object(vm));
auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale());
auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale()));
VERIFY(locale.has_value());
// 3. Let maximal be the result of the Add Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set maximal to loc.[[Locale]].
@ -81,7 +81,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::minimize)
// 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]).
auto* locale_object = TRY(typed_this_object(vm));
auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale());
auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale()));
VERIFY(locale.has_value());
// 3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set minimal to loc.[[Locale]].
@ -111,7 +111,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::base_name)
auto* locale_object = TRY(typed_this_object(vm));
// 3. Let locale be loc.[[Locale]].
auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale());
auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale()));
VERIFY(locale.has_value());
// 4. Return the substring of locale corresponding to the unicode_language_id production.
@ -160,13 +160,13 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::language)
auto* locale_object = TRY(typed_this_object(vm));
// 3. Let locale be loc.[[Locale]].
auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale());
auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale()));
// 4. Assert: locale matches the unicode_locale_id production.
VERIFY(locale.has_value());
// 5. Return the substring of locale corresponding to the unicode_language_subtag production of the unicode_language_id.
return PrimitiveString::create(vm, *locale->language_id.language);
return PrimitiveString::create(vm, locale->language_id.language.release_value());
}
// 14.3.14 get Intl.Locale.prototype.script, https://tc39.es/ecma402/#sec-Intl.Locale.prototype.script
@ -177,7 +177,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::script)
auto* locale_object = TRY(typed_this_object(vm));
// 3. Let locale be loc.[[Locale]].
auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale());
auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale()));
// 4. Assert: locale matches the unicode_locale_id production.
VERIFY(locale.has_value());
@ -187,7 +187,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::script)
return js_undefined();
// 6. Return the substring of locale corresponding to the unicode_script_subtag production of the unicode_language_id.
return PrimitiveString::create(vm, *locale->language_id.script);
return PrimitiveString::create(vm, locale->language_id.script.release_value());
}
// 14.3.15 get Intl.Locale.prototype.region, https://tc39.es/ecma402/#sec-Intl.Locale.prototype.region
@ -198,7 +198,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::region)
auto* locale_object = TRY(typed_this_object(vm));
// 3. Let locale be loc.[[Locale]].
auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale());
auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale()));
// 4. Assert: locale matches the unicode_locale_id production.
VERIFY(locale.has_value());
@ -208,7 +208,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::region)
return js_undefined();
// 6. Return the substring of locale corresponding to the unicode_region_subtag production of the unicode_language_id.
return PrimitiveString::create(vm, *locale->language_id.region);
return PrimitiveString::create(vm, locale->language_id.region.release_value());
}
#define JS_ENUMERATE_LOCALE_INFO_PROPERTIES \
@ -238,7 +238,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::time_zones)
auto* locale_object = TRY(typed_this_object(vm));
// 3. Let locale be loc.[[Locale]].
auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale());
auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale()));
// 4. If the unicode_language_id production of locale does not contain the ["-" unicode_region_subtag] sequence, return undefined.
if (!locale.has_value() || !locale->language_id.region.has_value())

View File

@ -890,12 +890,12 @@ static ThrowCompletionOr<String> transform_case(VM& vm, String const& string, Va
// 2. If requestedLocales is not an empty List, then
if (!requested_locales.is_empty()) {
// a. Let requestedLocale be requestedLocales[0].
requested_locale = Locale::parse_unicode_locale_id(requested_locales[0]);
requested_locale = TRY_OR_THROW_OOM(vm, Locale::parse_unicode_locale_id(requested_locales[0]));
}
// 3. Else,
else {
// a. Let requestedLocale be ! DefaultLocale().
requested_locale = Locale::parse_unicode_locale_id(Locale::default_locale());
requested_locale = TRY_OR_THROW_OOM(vm, Locale::parse_unicode_locale_id(Locale::default_locale()));
}
VERIFY(requested_locale.has_value());

View File

@ -109,7 +109,7 @@ static auto find_regional_values_for_locale(StringView locale, GetRegionalValues
auto return_default_values = [&]() { return get_regional_values("001"sv); };
auto language = parse_unicode_language_id(locale);
auto language = parse_unicode_language_id(locale).release_value_but_fixme_should_propagate_errors();
if (!language.has_value())
return return_default_values();

View File

@ -91,7 +91,7 @@ bool is_type_identifier(StringView identifier)
return lexer.is_eof() && (lexer.tell() > 0);
}
static Optional<LanguageID> parse_unicode_language_id(GenericLexer& lexer)
static ErrorOr<Optional<LanguageID>> parse_unicode_language_id(GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#Unicode_language_identifier
//
@ -120,25 +120,25 @@ static Optional<LanguageID> parse_unicode_language_id(GenericLexer& lexer)
while (!lexer.is_eof() && (state != ParseState::Done)) {
auto segment = consume_next_segment(lexer, state != ParseState::ParsingLanguageOrScript);
if (!segment.has_value())
return {};
return OptionalNone {};
switch (state) {
case ParseState::ParsingLanguageOrScript:
if (is_unicode_language_subtag(*segment)) {
state = ParseState::ParsingScript;
language_id.language = *segment;
language_id.language = TRY(String::from_utf8(*segment));
} else if (is_unicode_script_subtag(*segment)) {
state = ParseState::ParsingRegion;
language_id.script = *segment;
language_id.script = TRY(String::from_utf8(*segment));
} else {
return {};
return OptionalNone {};
}
break;
case ParseState::ParsingScript:
if (is_unicode_script_subtag(*segment)) {
state = ParseState::ParsingRegion;
language_id.script = *segment;
language_id.script = TRY(String::from_utf8(*segment));
break;
}
@ -148,7 +148,7 @@ static Optional<LanguageID> parse_unicode_language_id(GenericLexer& lexer)
case ParseState::ParsingRegion:
if (is_unicode_region_subtag(*segment)) {
state = ParseState::ParsingVariant;
language_id.region = *segment;
language_id.region = TRY(String::from_utf8(*segment));
break;
}
@ -157,7 +157,7 @@ static Optional<LanguageID> parse_unicode_language_id(GenericLexer& lexer)
case ParseState::ParsingVariant:
if (is_unicode_variant_subtag(*segment)) {
language_id.variants.append(*segment);
TRY(language_id.variants.try_append(TRY(String::from_utf8(*segment))));
} else {
lexer.retreat(segment->length() + 1);
state = ParseState::Done;
@ -172,7 +172,7 @@ static Optional<LanguageID> parse_unicode_language_id(GenericLexer& lexer)
return language_id;
}
static Optional<LocaleExtension> parse_unicode_locale_extension(GenericLexer& lexer)
static ErrorOr<Optional<LocaleExtension>> parse_unicode_locale_extension(GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#unicode_locale_extensions
//
@ -191,7 +191,7 @@ static Optional<LocaleExtension> parse_unicode_locale_extension(GenericLexer& le
while (!lexer.is_eof() && (state != ParseState::Done)) {
auto segment = consume_next_segment(lexer);
if (!segment.has_value())
return {};
return OptionalNone {};
if (state == ParseState::ParsingAttributeOrKeyword)
state = is_key(*segment) ? ParseState::ParsingKeyword : ParseState::ParsingAttribute;
@ -199,7 +199,7 @@ static Optional<LocaleExtension> parse_unicode_locale_extension(GenericLexer& le
switch (state) {
case ParseState::ParsingAttribute:
if (is_attribute(*segment)) {
locale_extension.attributes.append(*segment);
TRY(locale_extension.attributes.try_append(TRY(String::from_utf8(*segment))));
break;
}
@ -208,7 +208,7 @@ static Optional<LocaleExtension> parse_unicode_locale_extension(GenericLexer& le
case ParseState::ParsingKeyword: {
// keyword = key (sep type)?
Keyword keyword { .key = *segment };
Keyword keyword { .key = TRY(String::from_utf8(*segment)) };
Vector<StringView> keyword_values;
if (!is_key(*segment)) {
@ -226,14 +226,14 @@ static Optional<LocaleExtension> parse_unicode_locale_extension(GenericLexer& le
break;
}
keyword_values.append(*type);
TRY(keyword_values.try_append(*type));
}
StringBuilder builder;
builder.join('-', keyword_values);
keyword.value = builder.build();
TRY(builder.try_join('-', keyword_values));
keyword.value = TRY(builder.to_string());
locale_extension.keywords.append(move(keyword));
TRY(locale_extension.keywords.try_append(move(keyword)));
break;
}
@ -243,11 +243,11 @@ static Optional<LocaleExtension> parse_unicode_locale_extension(GenericLexer& le
}
if (locale_extension.attributes.is_empty() && locale_extension.keywords.is_empty())
return {};
return OptionalNone {};
return locale_extension;
}
static Optional<TransformedExtension> parse_transformed_extension(GenericLexer& lexer)
static ErrorOr<Optional<TransformedExtension>> parse_transformed_extension(GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#transformed_extensions
//
@ -266,7 +266,7 @@ static Optional<TransformedExtension> parse_transformed_extension(GenericLexer&
while (!lexer.is_eof() && (state != ParseState::Done)) {
auto segment = consume_next_segment(lexer);
if (!segment.has_value())
return {};
return OptionalNone {};
if (state == ParseState::ParsingLanguageOrField)
state = is_unicode_language_subtag(*segment) ? ParseState::ParsingLanguage : ParseState::ParsingField;
@ -275,17 +275,17 @@ static Optional<TransformedExtension> parse_transformed_extension(GenericLexer&
case ParseState::ParsingLanguage:
lexer.retreat(segment->length());
if (auto language_id = parse_unicode_language_id(lexer); language_id.has_value()) {
if (auto language_id = TRY(parse_unicode_language_id(lexer)); language_id.has_value()) {
transformed_extension.language = language_id.release_value();
state = ParseState::ParsingField;
break;
}
return {};
return OptionalNone {};
case ParseState::ParsingField: {
// tfield = tkey tvalue;
TransformedField field { .key = *segment };
TransformedField field { .key = TRY(String::from_utf8(*segment)) };
Vector<StringView> field_values;
if (!is_transformed_key(*segment)) {
@ -303,17 +303,17 @@ static Optional<TransformedExtension> parse_transformed_extension(GenericLexer&
break;
}
field_values.append(*value);
TRY(field_values.try_append(*value));
}
if (field_values.is_empty())
return {};
return OptionalNone {};
StringBuilder builder;
builder.join('-', field_values);
field.value = builder.build();
TRY(builder.try_join('-', field_values));
field.value = TRY(builder.to_string());
transformed_extension.fields.append(move(field));
TRY(transformed_extension.fields.try_append(move(field)));
break;
}
@ -323,11 +323,11 @@ static Optional<TransformedExtension> parse_transformed_extension(GenericLexer&
}
if (!transformed_extension.language.has_value() && transformed_extension.fields.is_empty())
return {};
return OptionalNone {};
return transformed_extension;
}
static Optional<OtherExtension> parse_other_extension(char key, GenericLexer& lexer)
static ErrorOr<Optional<OtherExtension>> parse_other_extension(char key, GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#other_extensions
//
@ -336,7 +336,7 @@ static Optional<OtherExtension> parse_other_extension(char key, GenericLexer& le
Vector<StringView> other_values;
if (!is_ascii_alphanumeric(key) || (key == 'x') || (key == 'X'))
return {};
return OptionalNone {};
while (true) {
auto segment = consume_next_segment(lexer);
@ -348,20 +348,20 @@ static Optional<OtherExtension> parse_other_extension(char key, GenericLexer& le
break;
}
other_values.append(*segment);
TRY(other_values.try_append(*segment));
}
if (other_values.is_empty())
return {};
return OptionalNone {};
StringBuilder builder;
builder.join('-', other_values);
other_extension.value = builder.build();
TRY(builder.try_join('-', other_values));
other_extension.value = TRY(builder.to_string());
return other_extension;
}
static Optional<Extension> parse_extension(GenericLexer& lexer)
static ErrorOr<Optional<Extension>> parse_extension(GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#extensions
//
@ -372,28 +372,28 @@ static Optional<Extension> parse_extension(GenericLexer& lexer)
switch (char key = (*header)[0]) {
case 'u':
case 'U':
if (auto extension = parse_unicode_locale_extension(lexer); extension.has_value())
if (auto extension = TRY(parse_unicode_locale_extension(lexer)); extension.has_value())
return Extension { extension.release_value() };
break;
case 't':
case 'T':
if (auto extension = parse_transformed_extension(lexer); extension.has_value())
if (auto extension = TRY(parse_transformed_extension(lexer)); extension.has_value())
return Extension { extension.release_value() };
break;
default:
if (auto extension = parse_other_extension(key, lexer); extension.has_value())
if (auto extension = TRY(parse_other_extension(key, lexer)); extension.has_value())
return Extension { extension.release_value() };
break;
}
}
lexer.retreat(lexer.tell() - starting_position);
return {};
return OptionalNone {};
}
static Vector<DeprecatedString> parse_private_use_extensions(GenericLexer& lexer)
static ErrorOr<Vector<String>> parse_private_use_extensions(GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#pu_extensions
//
@ -402,10 +402,10 @@ static Vector<DeprecatedString> parse_private_use_extensions(GenericLexer& lexer
auto header = consume_next_segment(lexer);
if (!header.has_value())
return {};
return Vector<String> {};
auto parse_values = [&]() -> Vector<DeprecatedString> {
Vector<DeprecatedString> extensions;
auto parse_values = [&]() -> ErrorOr<Vector<String>> {
Vector<String> extensions;
while (true) {
auto segment = consume_next_segment(lexer);
@ -417,33 +417,33 @@ static Vector<DeprecatedString> parse_private_use_extensions(GenericLexer& lexer
break;
}
extensions.append(*segment);
TRY(extensions.try_append(TRY(String::from_utf8(*segment))));
}
return extensions;
};
if ((header->length() == 1) && (((*header)[0] == 'x') || ((*header)[0] == 'X'))) {
if (auto extensions = parse_values(); !extensions.is_empty())
if (auto extensions = TRY(parse_values()); !extensions.is_empty())
return extensions;
}
lexer.retreat(lexer.tell() - starting_position);
return {};
return Vector<String> {};
}
Optional<LanguageID> parse_unicode_language_id(StringView language)
ErrorOr<Optional<LanguageID>> parse_unicode_language_id(StringView language)
{
GenericLexer lexer { language };
auto language_id = parse_unicode_language_id(lexer);
auto language_id = TRY(parse_unicode_language_id(lexer));
if (!lexer.is_eof())
return {};
return OptionalNone {};
return language_id;
}
Optional<LocaleID> parse_unicode_locale_id(StringView locale)
ErrorOr<Optional<LocaleID>> parse_unicode_locale_id(StringView locale)
{
GenericLexer lexer { locale };
@ -452,28 +452,28 @@ Optional<LocaleID> parse_unicode_locale_id(StringView locale)
// unicode_locale_id = unicode_language_id
// extensions*
// pu_extensions?
auto language_id = parse_unicode_language_id(lexer);
auto language_id = TRY(parse_unicode_language_id(lexer));
if (!language_id.has_value())
return {};
return OptionalNone {};
LocaleID locale_id { language_id.release_value() };
while (true) {
auto extension = parse_extension(lexer);
auto extension = TRY(parse_extension(lexer));
if (!extension.has_value())
break;
locale_id.extensions.append(extension.release_value());
TRY(locale_id.extensions.try_append(extension.release_value()));
}
locale_id.private_use_extensions = parse_private_use_extensions(lexer);
locale_id.private_use_extensions = TRY(parse_private_use_extensions(lexer));
if (!lexer.is_eof())
return {};
return OptionalNone {};
return locale_id;
}
static void perform_hard_coded_key_value_substitutions(StringView key, DeprecatedString& value)
static ErrorOr<void> perform_hard_coded_key_value_substitutions(StringView key, String& value)
{
// FIXME: In the XML export of CLDR, there are some aliases defined in the following files:
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml
@ -483,65 +483,71 @@ static void perform_hard_coded_key_value_substitutions(StringView key, Deprecate
// https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/transform.xml
//
// There isn't yet a counterpart in the JSON export. See: https://unicode-org.atlassian.net/browse/CLDR-14571
Optional<StringView> result;
if (key == "ca"sv) {
if (value == "islamicc"sv)
value = "islamic-civil"sv;
result = "islamic-civil"sv;
else if (value == "ethiopic-amete-alem"sv)
value = "ethioaa"sv;
result = "ethioaa"sv;
} else if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) {
value = "true"sv;
result = "true"sv;
} else if (key == "ks"sv) {
if (value == "primary"sv)
value = "level1"sv;
result = "level1"sv;
else if (value == "tertiary"sv)
value = "level3"sv;
result = "level3"sv;
// Note: There are also aliases for "secondary", "quaternary", "quarternary", and "identical",
// but those are semantically incorrect values (they are too long), so they can be skipped.
} else if ((key == "m0"sv) && (value == "names"sv)) {
value = "prprname"sv;
result = "prprname"sv;
} else if ((key == "ms"sv) && (value == "imperial"sv)) {
value = "uksystem"sv;
result = "uksystem"sv;
} else if (key == "tz"sv) {
// Formatter disabled because this block is easier to read / check against timezone.xml as one-liners.
// clang-format off
if (value == "aqams"sv) value = "nzakl"sv;
else if (value == "cnckg"sv) value = "cnsha"sv;
else if (value == "cnhrb"sv) value = "cnsha"sv;
else if (value == "cnkhg"sv) value = "cnurc"sv;
else if (value == "cuba"sv) value = "cuhav"sv;
else if (value == "egypt"sv) value = "egcai"sv;
else if (value == "eire"sv) value = "iedub"sv;
else if (value == "est"sv) value = "utcw05"sv;
else if (value == "gmt0"sv) value = "gmt"sv;
else if (value == "hongkong"sv) value = "hkhkg"sv;
else if (value == "hst"sv) value = "utcw10"sv;
else if (value == "iceland"sv) value = "isrey"sv;
else if (value == "iran"sv) value = "irthr"sv;
else if (value == "israel"sv) value = "jeruslm"sv;
else if (value == "jamaica"sv) value = "jmkin"sv;
else if (value == "japan"sv) value = "jptyo"sv;
else if (value == "kwajalein"sv) value = "mhkwa"sv;
else if (value == "libya"sv) value = "lytip"sv;
else if (value == "mst"sv) value = "utcw07"sv;
else if (value == "navajo"sv) value = "usden"sv;
else if (value == "poland"sv) value = "plwaw"sv;
else if (value == "portugal"sv) value = "ptlis"sv;
else if (value == "prc"sv) value = "cnsha"sv;
else if (value == "roc"sv) value = "twtpe"sv;
else if (value == "rok"sv) value = "krsel"sv;
else if (value == "singapore"sv) value = "sgsin"sv;
else if (value == "turkey"sv) value = "trist"sv;
else if (value == "uct"sv) value = "utc"sv;
else if (value == "usnavajo"sv) value = "usden"sv;
else if (value == "zulu"sv) value = "utc"sv;
if (value == "aqams"sv) result = "nzakl"sv;
else if (value == "cnckg"sv) result = "cnsha"sv;
else if (value == "cnhrb"sv) result = "cnsha"sv;
else if (value == "cnkhg"sv) result = "cnurc"sv;
else if (value == "cuba"sv) result = "cuhav"sv;
else if (value == "egypt"sv) result = "egcai"sv;
else if (value == "eire"sv) result = "iedub"sv;
else if (value == "est"sv) result = "utcw05"sv;
else if (value == "gmt0"sv) result = "gmt"sv;
else if (value == "hongkong"sv) result = "hkhkg"sv;
else if (value == "hst"sv) result = "utcw10"sv;
else if (value == "iceland"sv) result = "isrey"sv;
else if (value == "iran"sv) result = "irthr"sv;
else if (value == "israel"sv) result = "jeruslm"sv;
else if (value == "jamaica"sv) result = "jmkin"sv;
else if (value == "japan"sv) result = "jptyo"sv;
else if (value == "kwajalein"sv) result = "mhkwa"sv;
else if (value == "libya"sv) result = "lytip"sv;
else if (value == "mst"sv) result = "utcw07"sv;
else if (value == "navajo"sv) result = "usden"sv;
else if (value == "poland"sv) result = "plwaw"sv;
else if (value == "portugal"sv) result = "ptlis"sv;
else if (value == "prc"sv) result = "cnsha"sv;
else if (value == "roc"sv) result = "twtpe"sv;
else if (value == "rok"sv) result = "krsel"sv;
else if (value == "singapore"sv) result = "sgsin"sv;
else if (value == "turkey"sv) result = "trist"sv;
else if (value == "uct"sv) result = "utc"sv;
else if (value == "usnavajo"sv) result = "usden"sv;
else if (value == "zulu"sv) result = "utc"sv;
// clang-format on
}
if (result.has_value())
value = TRY(String::from_utf8(*result));
return {};
}
void canonicalize_unicode_extension_values(StringView key, DeprecatedString& value, bool remove_true)
ErrorOr<void> canonicalize_unicode_extension_values(StringView key, String& value, bool remove_true)
{
value = value.to_lowercase();
perform_hard_coded_key_value_substitutions(key, value);
value = TRY(value.to_lowercase());
TRY(perform_hard_coded_key_value_substitutions(key, value));
// Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec
// because, for tvalues, that would result in invalid syntax:
@ -550,7 +556,7 @@ void canonicalize_unicode_extension_values(StringView key, DeprecatedString& val
// https://github.com/tc39/test262/blob/18bb955771669541c56c28748603f6afdb2e25ff/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js
if (remove_true && (value == "true"sv)) {
value = {};
return;
return {};
}
if (key.is_one_of("sd"sv, "rg"sv)) {
@ -560,26 +566,28 @@ void canonicalize_unicode_extension_values(StringView key, DeprecatedString& val
// FIXME: Subdivision subtags do not appear in the CLDR likelySubtags.json file.
// Implement the spec's recommendation of using just the first alias for now,
// but we should determine if there's anything else needed here.
value = aliases[0].to_deprecated_string();
value = TRY(String::from_utf8(aliases[0]));
}
}
return {};
}
static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
static ErrorOr<void> transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
{
auto canonicalize_language = [&](LanguageID& language_id, bool force_lowercase) {
language_id.language = language_id.language->to_lowercase();
auto canonicalize_language = [&](LanguageID& language_id, bool force_lowercase) -> ErrorOr<void> {
language_id.language = TRY(language_id.language->to_lowercase());
if (language_id.script.has_value())
language_id.script = language_id.script->to_titlecase();
language_id.script = TRY(language_id.script->to_titlecase());
if (language_id.region.has_value())
language_id.region = language_id.region->to_uppercase();
language_id.region = TRY(language_id.region->to_uppercase());
for (auto& variant : language_id.variants)
variant = variant.to_lowercase();
variant = TRY(variant.to_lowercase());
resolve_complex_language_aliases(language_id);
if (auto alias = resolve_language_alias(*language_id.language); alias.has_value()) {
auto language_alias = parse_unicode_language_id(*alias);
auto language_alias = TRY(parse_unicode_language_id(*alias));
VERIFY(language_alias.has_value());
language_id.language = move(language_alias->language);
@ -593,31 +601,33 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
if (language_id.script.has_value()) {
if (auto alias = resolve_script_tag_alias(*language_id.script); alias.has_value())
language_id.script = move(*alias);
language_id.script = TRY(String::from_utf8(*alias));
}
if (language_id.region.has_value()) {
if (auto alias = resolve_territory_alias(*language_id.region); alias.has_value())
language_id.region = resolve_most_likely_territory_alias(language_id, *alias);
language_id.region = TRY(String::from_deprecated_string(resolve_most_likely_territory_alias(language_id, *alias)));
}
quick_sort(language_id.variants);
for (auto& variant : language_id.variants) {
variant = variant.to_lowercase();
variant = TRY(variant.to_lowercase());
if (auto alias = resolve_variant_alias(variant); alias.has_value())
variant = move(*alias);
variant = TRY(String::from_utf8(*alias));
}
if (force_lowercase) {
if (language_id.script.has_value())
language_id.script = language_id.script->to_lowercase();
language_id.script = TRY(language_id.script->to_lowercase());
if (language_id.region.has_value())
language_id.region = language_id.region->to_lowercase();
language_id.region = TRY(language_id.region->to_lowercase());
}
return {};
};
canonicalize_language(locale_id.language_id, false);
TRY(canonicalize_language(locale_id.language_id, false));
quick_sort(locale_id.extensions, [](auto const& left, auto const& right) {
auto key = [](auto const& extension) {
@ -631,109 +641,119 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id)
});
for (auto& extension : locale_id.extensions) {
extension.visit(
[&](LocaleExtension& ext) {
TRY(extension.visit(
[&](LocaleExtension& ext) -> ErrorOr<void> {
for (auto& attribute : ext.attributes)
attribute = attribute.to_lowercase();
attribute = TRY(attribute.to_lowercase());
for (auto& keyword : ext.keywords) {
keyword.key = keyword.key.to_lowercase();
canonicalize_unicode_extension_values(keyword.key, keyword.value, true);
keyword.key = TRY(keyword.key.to_lowercase());
TRY(canonicalize_unicode_extension_values(keyword.key, keyword.value, true));
}
quick_sort(ext.attributes);
quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; });
return {};
},
[&](TransformedExtension& ext) {
[&](TransformedExtension& ext) -> ErrorOr<void> {
if (ext.language.has_value())
canonicalize_language(*ext.language, true);
TRY(canonicalize_language(*ext.language, true));
for (auto& field : ext.fields) {
field.key = field.key.to_lowercase();
canonicalize_unicode_extension_values(field.key, field.value, false);
field.key = TRY(field.key.to_lowercase());
TRY(canonicalize_unicode_extension_values(field.key, field.value, false));
}
quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; });
return {};
},
[&](OtherExtension& ext) {
[&](OtherExtension& ext) -> ErrorOr<void> {
ext.key = static_cast<char>(to_ascii_lowercase(ext.key));
ext.value = ext.value.to_lowercase();
});
ext.value = TRY(ext.value.to_lowercase());
return {};
}));
}
for (auto& extension : locale_id.private_use_extensions)
extension = extension.to_lowercase();
extension = TRY(extension.to_lowercase());
return {};
}
Optional<DeprecatedString> canonicalize_unicode_locale_id(LocaleID& locale_id)
ErrorOr<Optional<String>> canonicalize_unicode_locale_id(LocaleID& locale_id)
{
// https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
StringBuilder builder;
auto append_sep_and_string = [&](Optional<DeprecatedString> const& string) {
auto append_sep_and_string = [&](Optional<String> const& string) -> ErrorOr<void> {
if (!string.has_value() || string->is_empty())
return;
builder.appendff("-{}", *string);
return {};
TRY(builder.try_appendff("-{}", *string));
return {};
};
if (!locale_id.language_id.language.has_value())
return {};
return OptionalNone {};
transform_unicode_locale_id_to_canonical_syntax(locale_id);
TRY(transform_unicode_locale_id_to_canonical_syntax(locale_id));
builder.append(locale_id.language_id.language->to_lowercase());
append_sep_and_string(locale_id.language_id.script);
append_sep_and_string(locale_id.language_id.region);
TRY(builder.try_append(TRY(locale_id.language_id.language->to_lowercase())));
TRY(append_sep_and_string(locale_id.language_id.script));
TRY(append_sep_and_string(locale_id.language_id.region));
for (auto const& variant : locale_id.language_id.variants)
append_sep_and_string(variant);
TRY(append_sep_and_string(variant));
for (auto const& extension : locale_id.extensions) {
extension.visit(
[&](LocaleExtension const& ext) {
builder.append("-u"sv);
TRY(extension.visit(
[&](LocaleExtension const& ext) -> ErrorOr<void> {
TRY(builder.try_append("-u"sv));
for (auto const& attribute : ext.attributes)
append_sep_and_string(attribute);
TRY(append_sep_and_string(attribute));
for (auto const& keyword : ext.keywords) {
append_sep_and_string(keyword.key);
append_sep_and_string(keyword.value);
TRY(append_sep_and_string(keyword.key));
TRY(append_sep_and_string(keyword.value));
}
return {};
},
[&](TransformedExtension const& ext) {
builder.append("-t"sv);
[&](TransformedExtension const& ext) -> ErrorOr<void> {
TRY(builder.try_append("-t"sv));
if (ext.language.has_value()) {
append_sep_and_string(ext.language->language);
append_sep_and_string(ext.language->script);
append_sep_and_string(ext.language->region);
TRY(append_sep_and_string(ext.language->language));
TRY(append_sep_and_string(ext.language->script));
TRY(append_sep_and_string(ext.language->region));
for (auto const& variant : ext.language->variants)
append_sep_and_string(variant);
TRY(append_sep_and_string(variant));
}
for (auto const& field : ext.fields) {
append_sep_and_string(field.key);
append_sep_and_string(field.value);
TRY(append_sep_and_string(field.key));
TRY(append_sep_and_string(field.value));
}
return {};
},
[&](OtherExtension const& ext) {
builder.appendff("-{:c}", to_ascii_lowercase(ext.key));
append_sep_and_string(ext.value);
});
[&](OtherExtension const& ext) -> ErrorOr<void> {
TRY(builder.try_appendff("-{:c}", to_ascii_lowercase(ext.key)));
TRY(append_sep_and_string(ext.value));
return {};
}));
}
if (!locale_id.private_use_extensions.is_empty()) {
builder.append("-x"sv);
TRY(builder.try_append("-x"sv));
for (auto const& extension : locale_id.private_use_extensions)
append_sep_and_string(extension);
TRY(append_sep_and_string(extension));
}
return builder.build();
return builder.to_string();
}
DeprecatedString const& default_locale()
StringView default_locale()
{
static DeprecatedString locale = "en"sv;
return locale;
return "en"sv;
}
bool is_locale_available(StringView locale)
@ -872,7 +892,7 @@ Optional<LanguageID> remove_likely_subtags(LanguageID const& language_id)
// 4. Then for trial in {languagemax, languagemax_regionmax, languagemax_scriptmax}:
// If AddLikelySubtags(trial) = max, then return trial + variants.
auto run_trial = [&](Optional<DeprecatedString> language, Optional<DeprecatedString> script, Optional<DeprecatedString> region) -> Optional<LanguageID> {
auto run_trial = [&](Optional<String> language, Optional<String> script, Optional<String> region) -> Optional<LanguageID> {
LanguageID trial { .language = move(language), .script = move(script), .region = move(region) };
if (add_likely_subtags(trial) == maximized)
@ -910,7 +930,7 @@ ErrorOr<String> LanguageID::to_string() const
{
StringBuilder builder;
auto append_segment = [&](Optional<DeprecatedString> const& segment) -> ErrorOr<void> {
auto append_segment = [&](Optional<String> const& segment) -> ErrorOr<void> {
if (!segment.has_value())
return {};
if (!builder.is_empty())
@ -937,21 +957,21 @@ ErrorOr<String> LocaleID::to_string() const
{
StringBuilder builder;
auto append_segment = [&](Optional<DeprecatedString> const& segment) -> ErrorOr<void> {
if (!segment.has_value() || segment->is_empty())
auto append_segment = [&](auto const& segment) -> ErrorOr<void> {
if (segment.is_empty())
return {};
if (!builder.is_empty())
TRY(builder.try_append('-'));
TRY(builder.try_append(*segment));
TRY(builder.try_append(segment));
return {};
};
TRY(append_segment(language_id.to_deprecated_string()));
TRY(append_segment(TRY(language_id.to_string())));
for (auto const& extension : extensions) {
TRY(extension.visit(
[&](LocaleExtension const& ext) -> ErrorOr<void> {
builder.append("-u"sv);
TRY(builder.try_append("-u"sv));
for (auto const& attribute : ext.attributes)
TRY(append_segment(attribute));
for (auto const& keyword : ext.keywords) {
@ -961,9 +981,9 @@ ErrorOr<String> LocaleID::to_string() const
return {};
},
[&](TransformedExtension const& ext) -> ErrorOr<void> {
builder.append("-t"sv);
TRY(builder.try_append("-t"sv));
if (ext.language.has_value())
TRY(append_segment(ext.language->to_deprecated_string()));
TRY(append_segment(TRY(ext.language->to_string())));
for (auto const& field : ext.fields) {
TRY(append_segment(field.key));
TRY(append_segment(field.value));
@ -971,14 +991,14 @@ ErrorOr<String> LocaleID::to_string() const
return {};
},
[&](OtherExtension const& ext) -> ErrorOr<void> {
builder.appendff("-{}", ext.key);
TRY(builder.try_appendff("-{}", ext.key));
TRY(append_segment(ext.value));
return {};
}));
}
if (!private_use_extensions.is_empty()) {
builder.append("-x"sv);
TRY(builder.try_append("-x"sv));
for (auto const& extension : private_use_extensions)
TRY(append_segment(extension));
}

View File

@ -24,25 +24,25 @@ struct LanguageID {
bool operator==(LanguageID const&) const = default;
bool is_root { false };
Optional<DeprecatedString> language {};
Optional<DeprecatedString> script {};
Optional<DeprecatedString> region {};
Vector<DeprecatedString> variants {};
Optional<String> language {};
Optional<String> script {};
Optional<String> region {};
Vector<String> variants {};
};
struct Keyword {
DeprecatedString key {};
DeprecatedString value {};
String key {};
String value {};
};
struct LocaleExtension {
Vector<DeprecatedString> attributes {};
Vector<String> attributes {};
Vector<Keyword> keywords {};
};
struct TransformedField {
DeprecatedString key {};
DeprecatedString value {};
String key {};
String value {};
};
struct TransformedExtension {
@ -52,7 +52,7 @@ struct TransformedExtension {
struct OtherExtension {
char key {};
DeprecatedString value {};
String value {};
};
using Extension = AK::Variant<LocaleExtension, TransformedExtension, OtherExtension>;
@ -79,7 +79,7 @@ struct LocaleID {
LanguageID language_id {};
Vector<Extension> extensions {};
Vector<DeprecatedString> private_use_extensions {};
Vector<String> private_use_extensions {};
};
enum class Style : u8 {
@ -140,13 +140,13 @@ constexpr bool is_unicode_variant_subtag(StringView subtag)
bool is_type_identifier(StringView);
Optional<LanguageID> parse_unicode_language_id(StringView);
Optional<LocaleID> parse_unicode_locale_id(StringView);
ErrorOr<Optional<LanguageID>> parse_unicode_language_id(StringView);
ErrorOr<Optional<LocaleID>> parse_unicode_locale_id(StringView);
void canonicalize_unicode_extension_values(StringView key, DeprecatedString& value, bool remove_true);
Optional<DeprecatedString> canonicalize_unicode_locale_id(LocaleID&);
ErrorOr<void> canonicalize_unicode_extension_values(StringView key, String& value, bool remove_true);
ErrorOr<Optional<String>> canonicalize_unicode_locale_id(LocaleID&);
DeprecatedString const& default_locale();
StringView default_locale();
bool is_locale_available(StringView locale);
Span<StringView const> get_available_keyword_values(StringView key);