LibUnicode: Implement TR-35's localized GMT offset formatting

This adds an API to use LibTimeZone to convert a time zone such as
"America/New_York" to a GMT offset string like "GMT-5" (short form) or
"GMT-05:00" (long form).
This commit is contained in:
Timothy Flynn 2022-01-11 11:35:50 -05:00 committed by Linus Groh
parent 6409900a5b
commit 8d35563f28
Notes: sideshowbarker 2024-07-17 21:09:41 +09:00
5 changed files with 238 additions and 20 deletions

View File

@ -414,14 +414,32 @@ struct AK::Traits<TimeZoneNames> : public GenericTraits<TimeZoneNames> {
struct TimeZoneFormat {
unsigned hash() const
{
return int_hash(gmt_zero_format);
auto hash = int_hash(symbol_ahead_sign);
hash = pair_int_hash(hash, symbol_ahead_separator);
hash = pair_int_hash(hash, symbol_behind_sign);
hash = pair_int_hash(hash, symbol_behind_separator);
hash = pair_int_hash(hash, gmt_format);
hash = pair_int_hash(hash, gmt_zero_format);
return hash;
}
bool operator==(TimeZoneFormat const& other) const
{
return gmt_zero_format == other.gmt_zero_format;
return (symbol_ahead_sign == other.symbol_ahead_sign)
&& (symbol_ahead_separator == other.symbol_ahead_separator)
&& (symbol_behind_sign == other.symbol_behind_sign)
&& (symbol_behind_separator == other.symbol_behind_separator)
&& (gmt_format == other.gmt_format)
&& (gmt_zero_format == other.gmt_zero_format);
}
StringIndexType symbol_ahead_sign { 0 };
StringIndexType symbol_ahead_separator { 0 };
StringIndexType symbol_behind_sign { 0 };
StringIndexType symbol_behind_separator { 0 };
StringIndexType gmt_format { 0 };
StringIndexType gmt_zero_format { 0 };
};
@ -429,7 +447,13 @@ template<>
struct AK::Formatter<TimeZoneFormat> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, TimeZoneFormat const& time_zone_format)
{
return Formatter<FormatString>::format(builder, "{{ {} }}", time_zone_format.gmt_zero_format);
return Formatter<FormatString>::format(builder, "{{ {}, {}, {}, {}, {}, {} }}",
time_zone_format.symbol_ahead_sign,
time_zone_format.symbol_ahead_separator,
time_zone_format.symbol_behind_sign,
time_zone_format.symbol_behind_separator,
time_zone_format.gmt_format,
time_zone_format.gmt_zero_format);
}
};
@ -1380,6 +1404,8 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
auto const& dates_object = locale_object.as_object().get("dates"sv);
auto const& time_zone_names_object = dates_object.as_object().get("timeZoneNames"sv);
auto const& meta_zone_object = time_zone_names_object.as_object().get("metazone"sv);
auto const& hour_format_string = time_zone_names_object.as_object().get("hourFormat"sv);
auto const& gmt_format_string = time_zone_names_object.as_object().get("gmtFormat"sv);
auto const& gmt_zero_format_string = time_zone_names_object.as_object().get("gmtZeroFormat"sv);
if (meta_zone_object.is_null())
@ -1401,9 +1427,34 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
return {};
};
auto parse_hour_format = [&](auto const& format, auto& time_zone_formats) {
auto hour_formats = format.split_view(';');
auto hour_format_ahead_start = hour_formats[0].find('H').value();
auto separator_ahead_start = hour_formats[0].find_last('H').value() + 1;
auto separator_ahead_end = hour_formats[0].find('m').value();
auto hour_format_behind_start = hour_formats[1].find('H').value();
auto separator_behind_start = hour_formats[1].find_last('H').value() + 1;
auto separator_behind_end = hour_formats[1].find('m').value();
auto symbol_ahead_sign = hour_formats[0].substring_view(0, hour_format_ahead_start);
auto symbol_ahead_separator = hour_formats[0].substring_view(separator_ahead_start, separator_ahead_end - separator_ahead_start);
auto symbol_behind_sign = hour_formats[1].substring_view(0, hour_format_behind_start);
auto symbol_behind_separator = hour_formats[1].substring_view(separator_behind_start, separator_behind_end - separator_behind_start);
time_zone_formats.symbol_ahead_sign = locale_data.unique_strings.ensure(symbol_ahead_sign);
time_zone_formats.symbol_ahead_separator = locale_data.unique_strings.ensure(symbol_ahead_separator);
time_zone_formats.symbol_behind_sign = locale_data.unique_strings.ensure(symbol_behind_sign);
time_zone_formats.symbol_behind_separator = locale_data.unique_strings.ensure(symbol_behind_separator);
};
TimeZoneNamesList time_zones;
TimeZoneFormat time_zone_formats {};
parse_hour_format(hour_format_string.as_string(), time_zone_formats);
time_zone_formats.gmt_format = locale_data.unique_strings.ensure(gmt_format_string.as_string());
time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string());
auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) {
@ -1748,7 +1799,27 @@ struct TimeZoneNames {
@string_index_type@ short_name { 0 };
};
struct TimeZoneFormat {
struct TimeZoneFormatImpl {
TimeZoneFormat to_time_zone_format() const {
TimeZoneFormat time_zone_format {};
time_zone_format.symbol_ahead_sign = s_string_list[symbol_ahead_sign];
time_zone_format.symbol_ahead_separator = s_string_list[symbol_ahead_separator];
time_zone_format.symbol_behind_sign = s_string_list[symbol_behind_sign];
time_zone_format.symbol_behind_separator = s_string_list[symbol_behind_separator];
time_zone_format.gmt_format = s_string_list[gmt_format];
time_zone_format.gmt_zero_format = s_string_list[gmt_zero_format];
return time_zone_format;
}
@string_index_type@ symbol_ahead_sign { 0 };
@string_index_type@ symbol_ahead_separator { 0 };
@string_index_type@ symbol_behind_sign { 0 };
@string_index_type@ symbol_behind_separator { 0 };
@string_index_type@ gmt_format { 0 };
@string_index_type@ gmt_zero_format { 0 };
};
@ -1766,7 +1837,7 @@ struct DayPeriodData {
locale_data.unique_calendars.generate(generator, "CalendarData"sv, "s_calendars"sv, 10);
locale_data.unique_time_zones.generate(generator, "TimeZoneNames"sv, "s_time_zones"sv, 30);
locale_data.unique_time_zone_lists.generate(generator, s_time_zone_index_type, "s_time_zone_lists"sv);
locale_data.unique_time_zone_formats.generate(generator, "TimeZoneFormat"sv, "s_time_zone_formats"sv, 30);
locale_data.unique_time_zone_formats.generate(generator, "TimeZoneFormatImpl"sv, "s_time_zone_formats"sv, 30);
locale_data.unique_day_periods.generate(generator, "DayPeriodData"sv, "s_day_periods"sv, 30);
locale_data.unique_day_period_lists.generate(generator, s_day_period_index_type, "s_day_period_lists"sv);
locale_data.unique_hour_cycle_lists.generate(generator, "u8"sv, "s_hour_cycle_lists"sv);
@ -2068,16 +2139,17 @@ Optional<StringView> get_calendar_day_period_symbol_for_hour(StringView locale,
return get_calendar_day_period_symbol(locale, calendar, style, DayPeriod::PM);
}
static TimeZoneFormat const* find_time_zone_formats(StringView locale)
Optional<TimeZoneFormat> get_time_zone_format(StringView locale)
{
auto locale_value = locale_from_string(locale);
if (!locale_value.has_value())
return nullptr;
return {};
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
auto time_zone_format_index = s_locale_time_zone_formats.at(locale_index);
return &s_time_zone_formats.at(time_zone_format_index);
auto const& time_zone_format = s_time_zone_formats.at(time_zone_format_index);
return time_zone_format.to_time_zone_format();
}
static TimeZoneNames const* find_time_zone_names(StringView locale, StringView time_zone)
@ -2111,14 +2183,12 @@ Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone,
return s_string_list[time_zone_index];
}
} else {
// FIXME: This becomes more complicated when time zones other than UTC are supported. We will need to know the GMT offset
// of each time zone (which must be parsed from the time zone database, not the CLDR). For now, assuming UTC means
// we can assume a GMT offset of 0, for which the CLDR has a specific format string for the offset styles. Further,
// we will need to parse the "generic" time zone names from timeZoneNames.json.
// FIXME: We will need to parse the "generic" time zone names from timeZoneNames.json
// to support time zones other than UTC.
VERIFY(time_zone == "UTC"sv);
if (auto const* formats = find_time_zone_formats(locale); formats != nullptr)
return s_string_list[formats->gmt_zero_format];
if (auto formats = get_time_zone_format(locale); formats.has_value())
return formats->gmt_zero_format;
}
return {};

View File

@ -8,6 +8,7 @@
#include <AK/Array.h>
#include <AK/StringView.h>
#include <AK/Time.h>
#include <LibUnicode/DateTimeFormat.h>
TEST_CASE(time_zone_name)
@ -22,15 +23,11 @@ TEST_CASE(time_zone_name)
constexpr auto test_data = Array {
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "Coordinated Universal Time"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "GMT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "التوقيت العالمي المنسق"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "غرينتش"sv },
@ -65,3 +62,60 @@ TEST_CASE(time_zone_name)
EXPECT_EQ(*time_zone, test.expected_result);
}
}
TEST_CASE(format_time_zone_offset)
{
constexpr auto jan_1_1833 = AK::Time::from_seconds(-4323283200); // Tuesday, January 1, 1833 12:00:00 AM
constexpr auto jan_1_2022 = AK::Time::from_seconds(1640995200); // Saturday, January 1, 2022 12:00:00 AM
struct TestData {
StringView locale;
Unicode::CalendarPatternStyle style;
AK::Time time;
StringView time_zone;
StringView expected_result;
};
constexpr auto test_data = Array {
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, {}, "UTC"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, {}, "UTC"sv, "GMT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, {}, "UTC"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, {}, "UTC"sv, "غرينتش"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "America/Los_Angeles"sv, "GMT-7:52:58"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "America/Los_Angeles"sv, "GMT-8"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "America/Los_Angeles"sv, "GMT-07:52:58"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "America/Los_Angeles"sv, "GMT-08:00"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "America/Los_Angeles"sv, "غرينتش-٧:٥٢:٥٨"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "America/Los_Angeles"sv, "غرينتش-٨"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "America/Los_Angeles"sv, "غرينتش-٠٧:٥٢:٥٨"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "America/Los_Angeles"sv, "غرينتش-٠٨:٠٠"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Europe/London"sv, "GMT-0:01:15"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Europe/London"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Europe/London"sv, "GMT-00:01:15"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Europe/London"sv, "GMT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Europe/London"sv, "غرينتش-٠:٠١:١٥"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Europe/London"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Europe/London"sv, "غرينتش-٠٠:٠١:١٥"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Europe/London"sv, "غرينتش"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Asia/Kathmandu"sv, "GMT+5:41:16"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Asia/Kathmandu"sv, "GMT+5:45"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Asia/Kathmandu"sv, "GMT+05:41:16"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Asia/Kathmandu"sv, "GMT+05:45"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Asia/Kathmandu"sv, "غرينتش+٥:٤١:١٦"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Asia/Kathmandu"sv, "غرينتش+٥:٤٥"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Asia/Kathmandu"sv, "غرينتش+٠٥:٤١:١٦"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Asia/Kathmandu"sv, "غرينتش+٠٥:٤٥"sv },
};
for (auto const& test : test_data) {
auto time_zone = Unicode::format_time_zone(test.locale, test.time_zone, test.style, test.time);
EXPECT_EQ(time_zone, test.expected_result);
}
}

View File

@ -16,7 +16,7 @@ set(SOURCES
)
serenity_lib(LibUnicode unicode)
target_link_libraries(LibUnicode LibCore)
target_link_libraries(LibUnicode LibCore LibTimeZone)
target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$<BOOL:${ENABLE_UNICODE_DATABASE_DOWNLOAD}>)
if (DEFINED UNICODE_DATA_SOURCES)

View File

@ -6,8 +6,11 @@
#include <AK/Array.h>
#include <AK/StringBuilder.h>
#include <LibTimeZone/TimeZone.h>
#include <LibUnicode/DateTimeFormat.h>
#include <LibUnicode/Locale.h>
#include <LibUnicode/NumberFormat.h>
#include <stdlib.h>
namespace Unicode {
@ -191,6 +194,83 @@ Optional<StringView> __attribute__((weak)) get_calendar_month_symbol(StringView,
Optional<StringView> __attribute__((weak)) get_calendar_weekday_symbol(StringView, StringView, CalendarPatternStyle, Weekday) { return {}; }
Optional<StringView> __attribute__((weak)) get_calendar_day_period_symbol(StringView, StringView, CalendarPatternStyle, DayPeriod) { return {}; }
Optional<StringView> __attribute__((weak)) get_calendar_day_period_symbol_for_hour(StringView, StringView, CalendarPatternStyle, u8) { return {}; }
Optional<StringView> __attribute__((weak)) get_time_zone_name(StringView, StringView, CalendarPatternStyle) { return {}; }
Optional<TimeZoneFormat> __attribute__((weak)) get_time_zone_format(StringView) { return {}; }
static Optional<String> format_time_zone_offset(StringView locale, StringView time_zone, CalendarPatternStyle style, AK::Time time)
{
auto formats = get_time_zone_format(locale);
if (!formats.has_value())
return {};
auto number_system = get_default_number_system(locale);
if (!number_system.has_value())
return {};
auto offset_seconds = TimeZone::get_time_zone_offset(time_zone, time);
if (!offset_seconds.has_value())
return {};
if (*offset_seconds == 0)
return formats->gmt_zero_format;
auto sign = *offset_seconds > 0 ? formats->symbol_ahead_sign : formats->symbol_behind_sign;
auto separator = *offset_seconds > 0 ? formats->symbol_ahead_separator : formats->symbol_behind_separator;
*offset_seconds = llabs(*offset_seconds);
auto offset_hours = *offset_seconds / 3'600;
*offset_seconds %= 3'600;
auto offset_minutes = *offset_seconds / 60;
*offset_seconds %= 60;
StringBuilder builder;
builder.append(sign);
switch (style) {
// The long format always uses 2-digit hours field and minutes field, with optional 2-digit seconds field.
case CalendarPatternStyle::LongOffset:
builder.appendff("{:02}{}{:02}", offset_hours, separator, offset_minutes);
if (*offset_seconds > 0)
builder.appendff("{}{:02}", separator, *offset_seconds);
break;
// The short format is intended for the shortest representation and uses hour fields without leading zero, with optional 2-digit minutes and seconds fields.
case CalendarPatternStyle::ShortOffset:
builder.appendff("{}", offset_hours);
if (offset_minutes > 0) {
builder.appendff("{}{:02}", separator, offset_minutes);
if (*offset_seconds > 0)
builder.appendff("{}{:02}", separator, *offset_seconds);
}
break;
default:
VERIFY_NOT_REACHED();
}
// The digits used for hours, minutes and seconds fields in this format are the locale's default decimal digits.
auto offset = replace_digits_for_number_system(*number_system, builder.build());
return formats->gmt_format.replace("{0}"sv, offset);
}
// https://unicode.org/reports/tr35/tr35-dates.html#Time_Zone_Format_Terminology
String format_time_zone(StringView locale, StringView time_zone, CalendarPatternStyle style, AK::Time time)
{
switch (style) {
case CalendarPatternStyle::Short:
case CalendarPatternStyle::Long:
case CalendarPatternStyle::ShortGeneric:
case CalendarPatternStyle::LongGeneric:
return get_time_zone_name(locale, time_zone, style).value_or(time_zone);
case CalendarPatternStyle::ShortOffset:
case CalendarPatternStyle::LongOffset:
return format_time_zone_offset(locale, time_zone, style, time).value_or(time_zone);
default:
VERIFY_NOT_REACHED();
}
}
}

View File

@ -9,6 +9,7 @@
#include <AK/Optional.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Time.h>
#include <AK/Types.h>
#include <AK/Vector.h>
#include <LibUnicode/Forward.h>
@ -167,6 +168,17 @@ enum class CalendarSymbol : u8 {
Weekday,
};
struct TimeZoneFormat {
StringView symbol_ahead_sign {};
StringView symbol_ahead_separator {};
StringView symbol_behind_sign {};
StringView symbol_behind_separator {};
StringView gmt_format {};
StringView gmt_zero_format {};
};
HourCycle hour_cycle_from_string(StringView hour_cycle);
StringView hour_cycle_to_string(HourCycle hour_cycle);
@ -197,6 +209,8 @@ Optional<StringView> get_calendar_weekday_symbol(StringView locale, StringView c
Optional<StringView> get_calendar_day_period_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::DayPeriod value);
Optional<StringView> get_calendar_day_period_symbol_for_hour(StringView locale, StringView calendar, CalendarPatternStyle style, u8 hour);
String format_time_zone(StringView locale, StringView time_zone, CalendarPatternStyle style, AK::Time time);
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style);
Optional<TimeZoneFormat> get_time_zone_format(StringView locale);
}