LibJS+LibLocale: Replace number range formatting with ICU

This uses ICU for the Intl.NumberFormat `formatRange` and
`formatRangeToParts` prototypes.

Note: All of the changes to the test files in this patch are now aligned
with both Chrome and Safari.
This commit is contained in:
Timothy Flynn 2024-06-09 19:26:25 -04:00 committed by Andreas Kling
parent 67f3de2320
commit f6bee0f5a8
Notes: sideshowbarker 2024-07-17 02:22:23 +09:00
6 changed files with 148 additions and 195 deletions

View File

@ -229,7 +229,7 @@ ThrowCompletionOr<MathematicalValue> to_intl_mathematical_value(VM& vm, Value va
}
// 15.5.19 PartitionNumberRangePattern ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-partitionnumberrangepattern
ThrowCompletionOr<Vector<PatternPartitionWithSource>> partition_number_range_pattern(VM& vm, NumberFormat& number_format, MathematicalValue start, MathematicalValue end)
ThrowCompletionOr<Vector<::Locale::NumberFormat::Partition>> partition_number_range_pattern(VM& vm, NumberFormat const& number_format, MathematicalValue const& start, MathematicalValue const& end)
{
// 1. If x is NaN or y is NaN, throw a RangeError exception.
if (start.is_nan())
@ -237,132 +237,38 @@ ThrowCompletionOr<Vector<PatternPartitionWithSource>> partition_number_range_pat
if (end.is_nan())
return vm.throw_completion<RangeError>(ErrorType::NumberIsNaN, "end"sv);
// 2. Let result be a new empty List.
Vector<PatternPartitionWithSource> result;
// 3. Let xResult be ? PartitionNumberPattern(numberFormat, x).
auto raw_start_result = partition_number_pattern(number_format, start);
auto start_result = PatternPartitionWithSource::create_from_parent_list(move(raw_start_result));
// 4. Let yResult be ? PartitionNumberPattern(numberFormat, y).
auto raw_end_result = partition_number_pattern(number_format, end);
auto end_result = PatternPartitionWithSource::create_from_parent_list(move(raw_end_result));
// 5. If ! FormatNumeric(numberFormat, x) is equal to ! FormatNumeric(numberFormat, y), then
auto formatted_start = format_numeric(number_format, start);
auto formatted_end = format_numeric(number_format, end);
if (formatted_start == formatted_end) {
// a. Let appxResult be ? FormatApproximately(numberFormat, xResult).
auto approximate_result = format_approximately(number_format, move(start_result));
// b. For each r in appxResult, do
for (auto& result : approximate_result) {
// i. Set r.[[Source]] to "shared".
result.source = "shared"sv;
}
// c. Return appxResult.
return approximate_result;
}
// 6. For each element r in xResult, do
result.ensure_capacity(start_result.size());
for (auto& start_part : start_result) {
// a. Append a new Record { [[Type]]: r.[[Type]], [[Value]]: r.[[Value]], [[Source]]: "startRange" } as the last element of result.
PatternPartitionWithSource part;
part.type = start_part.type;
part.value = move(start_part.value);
part.source = "startRange"sv;
result.unchecked_append(move(part));
}
// 7. Let rangeSeparator be an ILND String value used to separate two numbers.
auto range_separator_symbol = ::Locale::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), ::Locale::NumericSymbol::RangeSeparator).value_or("-"sv);
auto range_separator = ::Locale::augment_range_pattern(range_separator_symbol, result.last().value, end_result[0].value);
// 8. Append a new Record { [[Type]]: "literal", [[Value]]: rangeSeparator, [[Source]]: "shared" } element to result.
PatternPartitionWithSource part;
part.type = "literal"sv;
part.value = range_separator.has_value()
? range_separator.release_value()
: MUST(String::from_utf8(range_separator_symbol));
part.source = "shared"sv;
result.append(move(part));
// 9. For each element r in yResult, do
result.ensure_capacity(result.size() + end_result.size());
for (auto& end_part : end_result) {
// a. Append a new Record { [[Type]]: r.[[Type]], [[Value]]: r.[[Value]], [[Source]]: "endRange" } as the last element of result.
PatternPartitionWithSource part;
part.type = end_part.type;
part.value = move(end_part.value);
part.source = "endRange"sv;
result.unchecked_append(move(part));
}
// 10. Return ! CollapseNumberRange(result).
return collapse_number_range(move(result));
}
// 15.5.20 FormatApproximately ( numberFormat, result ), https://tc39.es/ecma402/#sec-formatapproximately
Vector<PatternPartitionWithSource> format_approximately(NumberFormat& number_format, Vector<PatternPartitionWithSource> result)
{
// 1. Let approximatelySign be an ILND String value used to signify that a number is approximate.
auto approximately_sign = ::Locale::get_number_system_symbol(number_format.data_locale(), number_format.numbering_system(), ::Locale::NumericSymbol::ApproximatelySign);
// 2. If approximatelySign is not empty, insert a new Record { [[Type]]: "approximatelySign", [[Value]]: approximatelySign } at an ILND index in result. For example, if numberFormat has [[Locale]] "en-US" and [[NumberingSystem]] "latn" and [[Style]] "decimal", the new Record might be inserted before the first element of result.
if (approximately_sign.has_value() && !approximately_sign->is_empty()) {
PatternPartitionWithSource partition;
partition.type = "approximatelySign"sv;
partition.value = MUST(String::from_utf8(*approximately_sign));
result.insert_before_matching(move(partition), [](auto const& part) {
return part.type.is_one_of("integer"sv, "decimal"sv, "plusSign"sv, "minusSign"sv, "percentSign"sv, "currency"sv);
});
}
// 3. Return result.
return result;
}
// 15.5.21 CollapseNumberRange ( result ), https://tc39.es/ecma402/#sec-collapsenumberrange
Vector<PatternPartitionWithSource> collapse_number_range(Vector<PatternPartitionWithSource> result)
{
// Returning result unmodified is guaranteed to be a correct implementation of CollapseNumberRange.
return result;
return number_format.formatter().format_range_to_parts(start.to_value(), end.to_value());
}
// 15.5.22 FormatNumericRange ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-formatnumericrange
ThrowCompletionOr<String> format_numeric_range(VM& vm, NumberFormat& number_format, MathematicalValue start, MathematicalValue end)
ThrowCompletionOr<String> format_numeric_range(VM& vm, NumberFormat const& number_format, MathematicalValue const& start, MathematicalValue const& end)
{
// 1. Let parts be ? PartitionNumberRangePattern(numberFormat, x, y).
auto parts = TRY(partition_number_range_pattern(vm, number_format, move(start), move(end)));
{
// NOTE: We short-circuit PartitionNumberRangePattern as we do not need individual partitions. But we must still
// perform the NaN sanity checks from its first step.
// 2. Let result be the empty String.
StringBuilder result;
// 3. For each part in parts, do
for (auto& part : parts) {
// a. Set result to the string-concatenation of result and part.[[Value]].
result.append(part.value);
// 1. If x is NaN or y is NaN, throw a RangeError exception.
if (start.is_nan())
return vm.throw_completion<RangeError>(ErrorType::NumberIsNaN, "start"sv);
if (end.is_nan())
return vm.throw_completion<RangeError>(ErrorType::NumberIsNaN, "end"sv);
}
// 2. Let result be the empty String.
// 3. For each part in parts, do
// a. Set result to the string-concatenation of result and part.[[Value]].
// 4. Return result.
return MUST(result.to_string());
return number_format.formatter().format_range(start.to_value(), end.to_value());
}
// 15.5.23 FormatNumericRangeToParts ( numberFormat, x, y ), https://tc39.es/ecma402/#sec-formatnumericrangetoparts
ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM& vm, NumberFormat& number_format, MathematicalValue start, MathematicalValue end)
ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM& vm, NumberFormat const& number_format, MathematicalValue const& start, MathematicalValue const& end)
{
auto& realm = *vm.current_realm();
// 1. Let parts be ? PartitionNumberRangePattern(numberFormat, x, y).
auto parts = TRY(partition_number_range_pattern(vm, number_format, move(start), move(end)));
auto parts = TRY(partition_number_range_pattern(vm, number_format, start, end));
// 2. Let result be ! ArrayCreate(0).
auto result = MUST(Array::create(realm, 0));

View File

@ -194,10 +194,8 @@ Vector<::Locale::NumberFormat::Partition> partition_number_pattern(NumberFormat
String format_numeric(NumberFormat const&, MathematicalValue const& number);
NonnullGCPtr<Array> format_numeric_to_parts(VM&, NumberFormat const&, MathematicalValue const& number);
ThrowCompletionOr<MathematicalValue> to_intl_mathematical_value(VM&, Value value);
ThrowCompletionOr<Vector<PatternPartitionWithSource>> partition_number_range_pattern(VM&, NumberFormat&, MathematicalValue start, MathematicalValue end);
Vector<PatternPartitionWithSource> format_approximately(NumberFormat&, Vector<PatternPartitionWithSource> result);
Vector<PatternPartitionWithSource> collapse_number_range(Vector<PatternPartitionWithSource> result);
ThrowCompletionOr<String> format_numeric_range(VM&, NumberFormat&, MathematicalValue start, MathematicalValue end);
ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM&, NumberFormat&, MathematicalValue start, MathematicalValue end);
ThrowCompletionOr<Vector<::Locale::NumberFormat::Partition>> partition_number_range_pattern(VM&, NumberFormat const&, MathematicalValue const& start, MathematicalValue const& end);
ThrowCompletionOr<String> format_numeric_range(VM&, NumberFormat const&, MathematicalValue const& start, MathematicalValue const& end);
ThrowCompletionOr<NonnullGCPtr<Array>> format_numeric_range_to_parts(VM&, NumberFormat const&, MathematicalValue const& start, MathematicalValue const& end);
}

View File

@ -41,12 +41,12 @@ describe("correct behavior", () => {
const en1 = new Intl.NumberFormat("en");
expect(en1.formatRange(100, 101)).toBe("100101");
expect(en1.formatRange(3.14, 6.28)).toBe("3.146.28");
expect(en1.formatRange(-0, 1)).toBe("-01");
expect(en1.formatRange(-0, 1)).toBe("-0 1");
const ja1 = new Intl.NumberFormat("ja");
expect(ja1.formatRange(100, 101)).toBe("100101");
expect(ja1.formatRange(3.14, 6.28)).toBe("3.146.28");
expect(ja1.formatRange(-0, 1)).toBe("-01");
expect(ja1.formatRange(-0, 1)).toBe("-0 1");
});
test("approximately formatting", () => {
@ -79,7 +79,7 @@ describe("correct behavior", () => {
const en1 = new Intl.NumberFormat("en");
expect(en1.formatRange(3, 5)).toBe("35");
expect(en1.formatRange(-1, -0)).toBe("-1 -0");
expect(en1.formatRange(0, Infinity)).toBe("0 ∞");
expect(en1.formatRange(0, Infinity)).toBe("0–∞");
expect(en1.formatRange(-Infinity, 0)).toBe("-∞ 0");
const en2 = new Intl.NumberFormat("en", {
@ -92,7 +92,7 @@ describe("correct behavior", () => {
const ja1 = new Intl.NumberFormat("ja");
expect(ja1.formatRange(3, 5)).toBe("35");
expect(ja1.formatRange(-1, -0)).toBe("-1 -0");
expect(ja1.formatRange(0, Infinity)).toBe("0 ∞");
expect(ja1.formatRange(0, Infinity)).toBe("0~∞");
expect(ja1.formatRange(-Infinity, 0)).toBe("-∞ 0");
const ja2 = new Intl.NumberFormat("ja", {
@ -106,21 +106,21 @@ describe("correct behavior", () => {
test("numbers in reverse order", () => {
const en = new Intl.NumberFormat("en");
expect(en.formatRange(1, 0)).toBe("10");
expect(en.formatRange(1, -Infinity)).toBe("1 -∞");
expect(en.formatRange(1, -0)).toBe("1 -0");
expect(en.formatRange(Infinity, 0)).toBe("∞ 0");
expect(en.formatRange(Infinity, -Infinity)).toBe("∞ -∞");
expect(en.formatRange(Infinity, -0)).toBe("∞ -0");
expect(en.formatRange(1, -Infinity)).toBe("1-∞");
expect(en.formatRange(1, -0)).toBe("1-0");
expect(en.formatRange(Infinity, 0)).toBe("∞0");
expect(en.formatRange(Infinity, -Infinity)).toBe("∞-∞");
expect(en.formatRange(Infinity, -0)).toBe("∞-0");
expect(en.formatRange(-0, -1)).toBe("-0 -1");
expect(en.formatRange(-0, -Infinity)).toBe("-0 -∞");
const ja = new Intl.NumberFormat("ja");
expect(ja.formatRange(1, 0)).toBe("10");
expect(ja.formatRange(1, -Infinity)).toBe("1 -∞");
expect(ja.formatRange(1, -0)).toBe("1 -0");
expect(ja.formatRange(Infinity, 0)).toBe("∞ 0");
expect(ja.formatRange(Infinity, -Infinity)).toBe("∞ -∞");
expect(ja.formatRange(Infinity, -0)).toBe("∞ -0");
expect(ja.formatRange(1, -Infinity)).toBe("1-∞");
expect(ja.formatRange(1, -0)).toBe("1-0");
expect(ja.formatRange(Infinity, 0)).toBe("∞0");
expect(ja.formatRange(Infinity, -Infinity)).toBe("∞-∞");
expect(ja.formatRange(Infinity, -0)).toBe("∞-0");
expect(ja.formatRange(-0, -1)).toBe("-0 -1");
expect(ja.formatRange(-0, -Infinity)).toBe("-0 -∞");
});

View File

@ -135,13 +135,13 @@ describe("correct behavior", () => {
const en = new Intl.NumberFormat("en");
expect(en.formatRangeToParts(1, -Infinity)).toEqual([
{ type: "integer", value: "1", source: "startRange" },
{ type: "literal", value: " ", source: "shared" },
{ type: "literal", value: "", source: "shared" },
{ type: "minusSign", value: "-", source: "endRange" },
{ type: "infinity", value: "∞", source: "endRange" },
]);
expect(en.formatRangeToParts(Infinity, -Infinity)).toEqual([
{ type: "infinity", value: "∞", source: "startRange" },
{ type: "literal", value: " ", source: "shared" },
{ type: "literal", value: "", source: "shared" },
{ type: "minusSign", value: "-", source: "endRange" },
{ type: "infinity", value: "∞", source: "endRange" },
]);
@ -156,13 +156,13 @@ describe("correct behavior", () => {
const ja = new Intl.NumberFormat("ja");
expect(ja.formatRangeToParts(1, -Infinity)).toEqual([
{ type: "integer", value: "1", source: "startRange" },
{ type: "literal", value: " ", source: "shared" },
{ type: "literal", value: "", source: "shared" },
{ type: "minusSign", value: "-", source: "endRange" },
{ type: "infinity", value: "∞", source: "endRange" },
]);
expect(ja.formatRangeToParts(Infinity, -Infinity)).toEqual([
{ type: "infinity", value: "∞", source: "startRange" },
{ type: "literal", value: " ", source: "shared" },
{ type: "literal", value: "", source: "shared" },
{ type: "minusSign", value: "-", source: "endRange" },
{ type: "infinity", value: "∞", source: "endRange" },
]);

View File

@ -16,10 +16,7 @@
#include <math.h>
#include <unicode/numberformatter.h>
#if ENABLE_UNICODE_DATA
# include <LibUnicode/UnicodeData.h>
#endif
#include <unicode/numberrangeformatter.h>
namespace Locale {
@ -534,6 +531,11 @@ static constexpr StringView icu_number_format_field_to_string(i32 field, NumberF
}
struct Range {
constexpr bool contains(i32 position) const
{
return start <= position && position < end;
}
constexpr bool operator<(Range const& other) const
{
if (start < other.start)
@ -604,8 +606,9 @@ static void flatten_partitions(Vector<Range>& partitions)
class NumberFormatImpl : public NumberFormat {
public:
NumberFormatImpl(icu::number::LocalizedNumberFormatter formatter, bool is_unit)
: m_formatter(move(formatter))
NumberFormatImpl(icu::Locale& locale, icu::number::LocalizedNumberFormatter formatter, bool is_unit)
: m_locale(locale)
, m_formatter(move(formatter))
, m_is_unit(is_unit)
{
}
@ -648,10 +651,46 @@ public:
if (!formatted.has_value())
return {};
return format_to_parts_impl(formatted, value);
return format_to_parts_impl(formatted, value, value);
}
virtual String format_range(Value const& start, Value const& end) const override
{
UErrorCode status = U_ZERO_ERROR;
auto formatted = format_range_impl(start, end);
if (!formatted.has_value())
return {};
auto result = formatted->toTempString(status);
if (icu_failure(status))
return {};
return icu_string_to_string(result);
}
virtual Vector<Partition> format_range_to_parts(Value const& start, Value const& end) const override
{
auto formatted = format_range_impl(start, end);
if (!formatted.has_value())
return {};
return format_to_parts_impl(formatted, start, end);
}
private:
static icu::Formattable value_to_formattable(Value const& value)
{
UErrorCode status = U_ZERO_ERROR;
auto formattable = value.visit(
[&](double number) { return icu::Formattable { number }; },
[&](String const& number) { return icu::Formattable(icu_string_piece(number), status); });
VERIFY(icu_success(status));
return formattable;
}
Optional<icu::number::FormattedNumber> format_impl(Value const& value) const
{
UErrorCode status = U_ZERO_ERROR;
@ -670,8 +709,34 @@ private:
return formatted;
}
Optional<icu::number::FormattedNumberRange> format_range_impl(Value const& start, Value const& end) const
{
UErrorCode status = U_ZERO_ERROR;
if (!m_range_formatter.has_value()) {
auto skeleton = icu::number::NumberFormatter::forSkeleton(m_formatter.toSkeleton(status), status);
if (icu_failure(status))
return {};
auto formatter = icu::number::UnlocalizedNumberRangeFormatter().numberFormatterBoth(move(skeleton)).locale(m_locale);
if (icu_failure(status))
return {};
m_range_formatter = move(formatter);
}
auto formattable_start = value_to_formattable(start);
auto formattable_end = value_to_formattable(end);
auto formatted = m_range_formatter->formatFormattableRange(formattable_start, formattable_end, status);
if (icu_failure(status))
return {};
return formatted;
}
template<typename Formatted>
Vector<Partition> format_to_parts_impl(Formatted const& formatted, Value const& value) const
Vector<Partition> format_to_parts_impl(Formatted const& formatted, Value const& start, Value const& end) const
{
UErrorCode status = U_ZERO_ERROR;
@ -683,22 +748,48 @@ private:
ranges.empend(LITERAL_FIELD, 0, formatted_number.length());
icu::ConstrainedFieldPosition position;
Optional<Range> start_range;
Optional<Range> end_range;
while (static_cast<bool>(formatted->nextPosition(position, status)) && icu_success(status)) {
ranges.empend(position.getField(), position.getStart(), position.getLimit());
if (position.getCategory() == UFIELD_CATEGORY_NUMBER_RANGE_SPAN) {
if (position.getField() == 0)
start_range.emplace(position.getField(), position.getStart(), position.getLimit());
else
end_range.emplace(position.getField(), position.getStart(), position.getLimit());
} else {
ranges.empend(position.getField(), position.getStart(), position.getLimit());
}
}
flatten_partitions(ranges);
auto apply_to_partition = [&](Partition& partition, auto field, auto index) {
if (start_range.has_value() && start_range->contains(index)) {
partition.type = icu_number_format_field_to_string(field, start, m_is_unit);
partition.source = "startRange"sv;
return;
}
if (end_range.has_value() && end_range->contains(index)) {
partition.type = icu_number_format_field_to_string(field, end, m_is_unit);
partition.source = "endRange"sv;
return;
}
partition.type = icu_number_format_field_to_string(field, end, m_is_unit);
partition.source = "shared"sv;
};
Vector<Partition> result;
result.ensure_capacity(ranges.size());
for (auto const& range : ranges) {
auto string = formatted_number.tempSubStringBetween(range.start, range.end);
auto value = formatted_number.tempSubStringBetween(range.start, range.end);
Partition partition;
partition.type = icu_number_format_field_to_string(range.field, value, m_is_unit);
partition.value = icu_string_to_string(string);
partition.value = icu_string_to_string(value);
apply_to_partition(partition, range.field, range.start);
result.unchecked_append(move(partition));
}
@ -706,7 +797,9 @@ private:
return result;
}
icu::Locale& m_locale;
icu::number::LocalizedNumberFormatter m_formatter;
mutable Optional<icu::number::LocalizedNumberRangeFormatter> m_range_formatter;
bool m_is_unit { false };
};
@ -731,7 +824,7 @@ NonnullOwnPtr<NumberFormat> NumberFormat::create(
}
bool is_unit = display_options.style == NumberFormatStyle::Unit;
return adopt_own(*new NumberFormatImpl(move(formatter), is_unit));
return adopt_own(*new NumberFormatImpl(locale_data->locale(), move(formatter), is_unit));
}
Optional<StringView> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return {}; }
@ -764,50 +857,4 @@ String replace_digits_for_number_system(StringView system, StringView number)
return MUST(builder.to_string());
}
#if ENABLE_UNICODE_DATA
static u32 last_code_point(StringView string)
{
Utf8View utf8_string { string };
u32 code_point = 0;
for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it)
code_point = *it;
return code_point;
}
#endif
// https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
Optional<String> augment_range_pattern([[maybe_unused]] StringView range_separator, [[maybe_unused]] StringView lower, [[maybe_unused]] StringView upper)
{
#if ENABLE_UNICODE_DATA
auto range_pattern_with_spacing = [&]() {
return MUST(String::formatted(" {} ", range_separator));
};
Utf8View utf8_range_separator { range_separator };
Utf8View utf8_upper { upper };
// NOTE: Our implementation does the prescribed checks backwards for simplicity.
// To determine whether to add spacing, the currently recommended heuristic is:
// 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) {
if (Unicode::code_point_has_property(*it, Unicode::Property::White_Space))
return {};
}
// 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
if (auto it = utf8_upper.begin(); it != utf8_upper.end()) {
if (!Unicode::code_point_has_general_category(*it, Unicode::GeneralCategory::Decimal_Number))
return range_pattern_with_spacing();
}
if (!Unicode::code_point_has_general_category(last_code_point(lower), Unicode::GeneralCategory::Decimal_Number))
return range_pattern_with_spacing();
#endif
return {};
}
}

View File

@ -151,6 +151,7 @@ public:
struct Partition {
StringView type;
String value;
StringView source;
};
using Value = Variant<double, String>;
@ -159,6 +160,9 @@ public:
virtual String format_to_decimal(Value const&) const = 0;
virtual Vector<Partition> format_to_parts(Value const&) const = 0;
virtual String format_range(Value const&, Value const&) const = 0;
virtual Vector<Partition> format_range_to_parts(Value const&, Value const&) const = 0;
protected:
NumberFormat() = default;
};
@ -181,6 +185,4 @@ Optional<StringView> get_number_system_symbol(StringView locale, StringView syst
Optional<ReadonlySpan<u32>> get_digits_for_number_system(StringView system);
String replace_digits_for_number_system(StringView system, StringView number);
Optional<String> augment_range_pattern(StringView range_separator, StringView lower, StringView upper);
}