LibJS+LibUnicode: Generate a set of default DateTimeFormat patterns

This isn't called out in TR-35, but before ICU even looks at CLDR data,
it adds a hard-coded set of default patterns to each locale's calendar.
It has done this since 2006 when its DateTimeFormat feature was first
created. Several test262 tests depend on this, which under ECMA-402,
falls into "implementation defined" behavior. For compatibility, we
can do the same in LibUnicode.
This commit is contained in:
Timothy Flynn 2022-07-22 18:00:06 -04:00 committed by Linus Groh
parent 8150d71821
commit ae2acc8cdf
Notes: sideshowbarker 2024-07-17 08:38:23 +09:00
3 changed files with 56 additions and 2 deletions

View File

@ -1160,6 +1160,21 @@ static void parse_interval_patterns(Calendar& calendar, JsonObject const& interv
calendar.range12_formats = locale_data.unique_range_pattern_lists.ensure(move(range12_formats));
}
static void generate_default_patterns(CalendarPatternList& formats, UnicodeLocaleData& locale_data)
{
// For compatibility with ICU, we generate a list of default patterns for every locale:
// https://github.com/unicode-org/icu/blob/release-71-1/icu4c/source/i18n/dtptngen.cpp#L1343-L1354=
static constexpr auto default_patterns = Array { "G"sv, "y"sv, "M"sv, "E"sv, "D"sv, "F"sv, "d"sv, "a"sv, "B"sv, "H"sv, "mm"sv, "ss"sv, "SS"sv, "v"sv };
for (auto pattern : default_patterns) {
auto index = parse_date_time_pattern(pattern, pattern, locale_data);
VERIFY(index.has_value());
if (!formats.contains_slow(*index))
formats.append(*index);
}
}
static void generate_missing_patterns(Calendar& calendar, CalendarPatternList& formats, Vector<CalendarPattern> date_formats, Vector<CalendarPattern> time_formats, UnicodeLocaleData& locale_data)
{
// https://unicode.org/reports/tr35/tr35-dates.html#Missing_Skeleton_Fields
@ -1473,6 +1488,7 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, UnicodeLocale
auto const& interval_formats_object = date_time_formats_object.as_object().get("intervalFormats"sv);
parse_interval_patterns(calendar, interval_formats_object.as_object(), locale_data);
generate_default_patterns(available_formats, locale_data);
generate_missing_patterns(calendar, available_formats, move(date_formats), move(time_formats), locale_data);
parse_calendar_symbols(calendar, value.as_object(), locale_data);

View File

@ -431,7 +431,7 @@ Optional<Unicode::CalendarPattern> basic_format_matcher(Unicode::CalendarPattern
best_format->for_each_calendar_field_zipped_with(options, [&](auto& best_format_field, auto const& option_field, auto field_type) {
switch (field_type) {
case Unicode::CalendarPattern::Field::FractionalSecondDigits:
if (best_format->second.has_value() && option_field.has_value())
if ((best_format_field.has_value() || best_format->second.has_value()) && option_field.has_value())
best_format_field = option_field;
break;

View File

@ -263,7 +263,11 @@ describe("dayPeriod", () => {
});
test("flexible day period rolls over midnight", () => {
const en = new Intl.DateTimeFormat("en", { dayPeriod: "short", timeZone: "UTC" });
const en = new Intl.DateTimeFormat("en", {
hour: "numeric",
dayPeriod: "short",
timeZone: "UTC",
});
// For the en locale, these times (05:00 and 23:00) fall in the flexible day period range of
// [21:00, 06:00), on either side of midnight.
@ -291,6 +295,7 @@ describe("dayPeriod", () => {
// The en locale includes the "noon" fixed day period, whereas the ar locale does not.
data.forEach(d => {
const en = new Intl.DateTimeFormat("en", {
hour: "numeric",
dayPeriod: "short",
timeZone: "UTC",
minute: d.minute,
@ -303,6 +308,7 @@ describe("dayPeriod", () => {
expect(en.format(date3)).toBe(d.en3);
const ar = new Intl.DateTimeFormat("ar", {
hour: "numeric",
dayPeriod: "short",
timeZone: "UTC",
minute: d.minute,
@ -315,6 +321,38 @@ describe("dayPeriod", () => {
expect(ar.format(date3)).toBe(d.ar3);
});
});
test("dayPeriod without time", () => {
// prettier-ignore
const data = [
{ dayPeriod: "narrow", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"},
{ dayPeriod: "short", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "ص", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"},
{ dayPeriod: "long", en0: "in the afternoon", en1: "in the morning", ar0: "بعد الظهر", ar1: "صباحًا", as0: "অপৰাহ্ন", as1: "পূৰ্বাহ্ন"},
];
data.forEach(d => {
const en = new Intl.DateTimeFormat("en", {
dayPeriod: d.dayPeriod,
timeZone: "UTC",
});
expect(en.format(d0)).toBe(d.en0);
expect(en.format(d1)).toBe(d.en1);
const ar = new Intl.DateTimeFormat("ar", {
dayPeriod: d.dayPeriod,
timeZone: "UTC",
});
expect(ar.format(d0)).toBe(d.ar0);
expect(ar.format(d1)).toBe(d.ar1);
const as = new Intl.DateTimeFormat("as", {
dayPeriod: d.dayPeriod,
timeZone: "UTC",
});
expect(as.format(d0)).toBe(d.as0);
expect(as.format(d1)).toBe(d.as1);
});
});
});
describe("hour", () => {