From 8d35563f280768935642217e13014da179c9b6e9 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Tue, 11 Jan 2022 11:35:50 -0500 Subject: [PATCH] LibUnicode: Implement TR-35's localized GMT offset formatting This adds an API to use LibTimeZone to convert a time zone such as "America/New_York" to a GMT offset string like "GMT-5" (short form) or "GMT-05:00" (long form). --- .../GenerateUnicodeDateTimeFormat.cpp | 100 +++++++++++++++--- .../LibUnicode/TestUnicodeDateTimeFormat.cpp | 62 ++++++++++- Userland/Libraries/LibUnicode/CMakeLists.txt | 2 +- .../Libraries/LibUnicode/DateTimeFormat.cpp | 80 ++++++++++++++ .../Libraries/LibUnicode/DateTimeFormat.h | 14 +++ 5 files changed, 238 insertions(+), 20 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp index 2d0bb1faf36..d0f5750c7d4 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp @@ -414,14 +414,32 @@ struct AK::Traits : public GenericTraits { struct TimeZoneFormat { unsigned hash() const { - return int_hash(gmt_zero_format); + auto hash = int_hash(symbol_ahead_sign); + hash = pair_int_hash(hash, symbol_ahead_separator); + hash = pair_int_hash(hash, symbol_behind_sign); + hash = pair_int_hash(hash, symbol_behind_separator); + hash = pair_int_hash(hash, gmt_format); + hash = pair_int_hash(hash, gmt_zero_format); + return hash; } bool operator==(TimeZoneFormat const& other) const { - return gmt_zero_format == other.gmt_zero_format; + return (symbol_ahead_sign == other.symbol_ahead_sign) + && (symbol_ahead_separator == other.symbol_ahead_separator) + && (symbol_behind_sign == other.symbol_behind_sign) + && (symbol_behind_separator == other.symbol_behind_separator) + && (gmt_format == other.gmt_format) + && (gmt_zero_format == other.gmt_zero_format); } + StringIndexType symbol_ahead_sign { 0 }; + StringIndexType symbol_ahead_separator { 0 }; + + StringIndexType symbol_behind_sign { 0 }; + StringIndexType symbol_behind_separator { 0 }; + + StringIndexType gmt_format { 0 }; StringIndexType gmt_zero_format { 0 }; }; @@ -429,7 +447,13 @@ template<> struct AK::Formatter : Formatter { ErrorOr format(FormatBuilder& builder, TimeZoneFormat const& time_zone_format) { - return Formatter::format(builder, "{{ {} }}", time_zone_format.gmt_zero_format); + return Formatter::format(builder, "{{ {}, {}, {}, {}, {}, {} }}", + time_zone_format.symbol_ahead_sign, + time_zone_format.symbol_ahead_separator, + time_zone_format.symbol_behind_sign, + time_zone_format.symbol_behind_separator, + time_zone_format.gmt_format, + time_zone_format.gmt_zero_format); } }; @@ -1380,6 +1404,8 @@ static ErrorOr parse_time_zone_names(String locale_time_zone_names_path, U auto const& dates_object = locale_object.as_object().get("dates"sv); auto const& time_zone_names_object = dates_object.as_object().get("timeZoneNames"sv); auto const& meta_zone_object = time_zone_names_object.as_object().get("metazone"sv); + auto const& hour_format_string = time_zone_names_object.as_object().get("hourFormat"sv); + auto const& gmt_format_string = time_zone_names_object.as_object().get("gmtFormat"sv); auto const& gmt_zero_format_string = time_zone_names_object.as_object().get("gmtZeroFormat"sv); if (meta_zone_object.is_null()) @@ -1401,9 +1427,34 @@ static ErrorOr parse_time_zone_names(String locale_time_zone_names_path, U return {}; }; + auto parse_hour_format = [&](auto const& format, auto& time_zone_formats) { + auto hour_formats = format.split_view(';'); + + auto hour_format_ahead_start = hour_formats[0].find('H').value(); + auto separator_ahead_start = hour_formats[0].find_last('H').value() + 1; + auto separator_ahead_end = hour_formats[0].find('m').value(); + + auto hour_format_behind_start = hour_formats[1].find('H').value(); + auto separator_behind_start = hour_formats[1].find_last('H').value() + 1; + auto separator_behind_end = hour_formats[1].find('m').value(); + + auto symbol_ahead_sign = hour_formats[0].substring_view(0, hour_format_ahead_start); + auto symbol_ahead_separator = hour_formats[0].substring_view(separator_ahead_start, separator_ahead_end - separator_ahead_start); + + auto symbol_behind_sign = hour_formats[1].substring_view(0, hour_format_behind_start); + auto symbol_behind_separator = hour_formats[1].substring_view(separator_behind_start, separator_behind_end - separator_behind_start); + + time_zone_formats.symbol_ahead_sign = locale_data.unique_strings.ensure(symbol_ahead_sign); + time_zone_formats.symbol_ahead_separator = locale_data.unique_strings.ensure(symbol_ahead_separator); + time_zone_formats.symbol_behind_sign = locale_data.unique_strings.ensure(symbol_behind_sign); + time_zone_formats.symbol_behind_separator = locale_data.unique_strings.ensure(symbol_behind_separator); + }; + TimeZoneNamesList time_zones; TimeZoneFormat time_zone_formats {}; + parse_hour_format(hour_format_string.as_string(), time_zone_formats); + time_zone_formats.gmt_format = locale_data.unique_strings.ensure(gmt_format_string.as_string()); time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string()); auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) { @@ -1748,7 +1799,27 @@ struct TimeZoneNames { @string_index_type@ short_name { 0 }; }; -struct TimeZoneFormat { +struct TimeZoneFormatImpl { + TimeZoneFormat to_time_zone_format() const { + TimeZoneFormat time_zone_format {}; + + time_zone_format.symbol_ahead_sign = s_string_list[symbol_ahead_sign]; + time_zone_format.symbol_ahead_separator = s_string_list[symbol_ahead_separator]; + time_zone_format.symbol_behind_sign = s_string_list[symbol_behind_sign]; + time_zone_format.symbol_behind_separator = s_string_list[symbol_behind_separator]; + time_zone_format.gmt_format = s_string_list[gmt_format]; + time_zone_format.gmt_zero_format = s_string_list[gmt_zero_format]; + + return time_zone_format; + } + + @string_index_type@ symbol_ahead_sign { 0 }; + @string_index_type@ symbol_ahead_separator { 0 }; + + @string_index_type@ symbol_behind_sign { 0 }; + @string_index_type@ symbol_behind_separator { 0 }; + + @string_index_type@ gmt_format { 0 }; @string_index_type@ gmt_zero_format { 0 }; }; @@ -1766,7 +1837,7 @@ struct DayPeriodData { locale_data.unique_calendars.generate(generator, "CalendarData"sv, "s_calendars"sv, 10); locale_data.unique_time_zones.generate(generator, "TimeZoneNames"sv, "s_time_zones"sv, 30); locale_data.unique_time_zone_lists.generate(generator, s_time_zone_index_type, "s_time_zone_lists"sv); - locale_data.unique_time_zone_formats.generate(generator, "TimeZoneFormat"sv, "s_time_zone_formats"sv, 30); + locale_data.unique_time_zone_formats.generate(generator, "TimeZoneFormatImpl"sv, "s_time_zone_formats"sv, 30); locale_data.unique_day_periods.generate(generator, "DayPeriodData"sv, "s_day_periods"sv, 30); locale_data.unique_day_period_lists.generate(generator, s_day_period_index_type, "s_day_period_lists"sv); locale_data.unique_hour_cycle_lists.generate(generator, "u8"sv, "s_hour_cycle_lists"sv); @@ -2068,16 +2139,17 @@ Optional get_calendar_day_period_symbol_for_hour(StringView locale, return get_calendar_day_period_symbol(locale, calendar, style, DayPeriod::PM); } -static TimeZoneFormat const* find_time_zone_formats(StringView locale) +Optional get_time_zone_format(StringView locale) { auto locale_value = locale_from_string(locale); if (!locale_value.has_value()) - return nullptr; + return {}; auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - auto time_zone_format_index = s_locale_time_zone_formats.at(locale_index); - return &s_time_zone_formats.at(time_zone_format_index); + + auto const& time_zone_format = s_time_zone_formats.at(time_zone_format_index); + return time_zone_format.to_time_zone_format(); } static TimeZoneNames const* find_time_zone_names(StringView locale, StringView time_zone) @@ -2111,14 +2183,12 @@ Optional get_time_zone_name(StringView locale, StringView time_zone, return s_string_list[time_zone_index]; } } else { - // FIXME: This becomes more complicated when time zones other than UTC are supported. We will need to know the GMT offset - // of each time zone (which must be parsed from the time zone database, not the CLDR). For now, assuming UTC means - // we can assume a GMT offset of 0, for which the CLDR has a specific format string for the offset styles. Further, - // we will need to parse the "generic" time zone names from timeZoneNames.json. + // FIXME: We will need to parse the "generic" time zone names from timeZoneNames.json + // to support time zones other than UTC. VERIFY(time_zone == "UTC"sv); - if (auto const* formats = find_time_zone_formats(locale); formats != nullptr) - return s_string_list[formats->gmt_zero_format]; + if (auto formats = get_time_zone_format(locale); formats.has_value()) + return formats->gmt_zero_format; } return {}; diff --git a/Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp b/Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp index 8cb76cecfef..4077c80b5ff 100644 --- a/Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp +++ b/Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp @@ -8,6 +8,7 @@ #include #include +#include #include TEST_CASE(time_zone_name) @@ -22,15 +23,11 @@ TEST_CASE(time_zone_name) constexpr auto test_data = Array { TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "Coordinated Universal Time"sv }, TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv }, - TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "GMT"sv }, - TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "GMT"sv }, TestData { "en"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "GMT"sv }, TestData { "en"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "GMT"sv }, TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "التوقيت العالمي المنسق"sv }, TestData { "ar"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv }, - TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "غرينتش"sv }, - TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "غرينتش"sv }, TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "غرينتش"sv }, TestData { "ar"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "غرينتش"sv }, @@ -65,3 +62,60 @@ TEST_CASE(time_zone_name) EXPECT_EQ(*time_zone, test.expected_result); } } + +TEST_CASE(format_time_zone_offset) +{ + constexpr auto jan_1_1833 = AK::Time::from_seconds(-4323283200); // Tuesday, January 1, 1833 12:00:00 AM + constexpr auto jan_1_2022 = AK::Time::from_seconds(1640995200); // Saturday, January 1, 2022 12:00:00 AM + + struct TestData { + StringView locale; + Unicode::CalendarPatternStyle style; + AK::Time time; + StringView time_zone; + StringView expected_result; + }; + + constexpr auto test_data = Array { + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, {}, "UTC"sv, "GMT"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, {}, "UTC"sv, "GMT"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, {}, "UTC"sv, "غرينتش"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, {}, "UTC"sv, "غرينتش"sv }, + + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "America/Los_Angeles"sv, "GMT-7:52:58"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "America/Los_Angeles"sv, "GMT-8"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "America/Los_Angeles"sv, "GMT-07:52:58"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "America/Los_Angeles"sv, "GMT-08:00"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "America/Los_Angeles"sv, "غرينتش-٧:٥٢:٥٨"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "America/Los_Angeles"sv, "غرينتش-٨"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "America/Los_Angeles"sv, "غرينتش-٠٧:٥٢:٥٨"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "America/Los_Angeles"sv, "غرينتش-٠٨:٠٠"sv }, + + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Europe/London"sv, "GMT-0:01:15"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Europe/London"sv, "GMT"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Europe/London"sv, "GMT-00:01:15"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Europe/London"sv, "GMT"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Europe/London"sv, "غرينتش-٠:٠١:١٥"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Europe/London"sv, "غرينتش"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Europe/London"sv, "غرينتش-٠٠:٠١:١٥"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Europe/London"sv, "غرينتش"sv }, + + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Asia/Kathmandu"sv, "GMT+5:41:16"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Asia/Kathmandu"sv, "GMT+5:45"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Asia/Kathmandu"sv, "GMT+05:41:16"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Asia/Kathmandu"sv, "GMT+05:45"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_1833, "Asia/Kathmandu"sv, "غرينتش+٥:٤١:١٦"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, jan_1_2022, "Asia/Kathmandu"sv, "غرينتش+٥:٤٥"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_1833, "Asia/Kathmandu"sv, "غرينتش+٠٥:٤١:١٦"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, jan_1_2022, "Asia/Kathmandu"sv, "غرينتش+٠٥:٤٥"sv }, + }; + + for (auto const& test : test_data) { + auto time_zone = Unicode::format_time_zone(test.locale, test.time_zone, test.style, test.time); + EXPECT_EQ(time_zone, test.expected_result); + } +} diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index c59470cca08..b9ed32e8589 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -16,7 +16,7 @@ set(SOURCES ) serenity_lib(LibUnicode unicode) -target_link_libraries(LibUnicode LibCore) +target_link_libraries(LibUnicode LibCore LibTimeZone) target_compile_definitions(LibUnicode PRIVATE ENABLE_UNICODE_DATA=$) if (DEFINED UNICODE_DATA_SOURCES) diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp index c3afa8cdf10..4a899e8a5e3 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp @@ -6,8 +6,11 @@ #include #include +#include #include #include +#include +#include namespace Unicode { @@ -191,6 +194,83 @@ Optional __attribute__((weak)) get_calendar_month_symbol(StringView, Optional __attribute__((weak)) get_calendar_weekday_symbol(StringView, StringView, CalendarPatternStyle, Weekday) { return {}; } Optional __attribute__((weak)) get_calendar_day_period_symbol(StringView, StringView, CalendarPatternStyle, DayPeriod) { return {}; } Optional __attribute__((weak)) get_calendar_day_period_symbol_for_hour(StringView, StringView, CalendarPatternStyle, u8) { return {}; } + Optional __attribute__((weak)) get_time_zone_name(StringView, StringView, CalendarPatternStyle) { return {}; } +Optional __attribute__((weak)) get_time_zone_format(StringView) { return {}; } + +static Optional format_time_zone_offset(StringView locale, StringView time_zone, CalendarPatternStyle style, AK::Time time) +{ + auto formats = get_time_zone_format(locale); + if (!formats.has_value()) + return {}; + + auto number_system = get_default_number_system(locale); + if (!number_system.has_value()) + return {}; + + auto offset_seconds = TimeZone::get_time_zone_offset(time_zone, time); + if (!offset_seconds.has_value()) + return {}; + if (*offset_seconds == 0) + return formats->gmt_zero_format; + + auto sign = *offset_seconds > 0 ? formats->symbol_ahead_sign : formats->symbol_behind_sign; + auto separator = *offset_seconds > 0 ? formats->symbol_ahead_separator : formats->symbol_behind_separator; + *offset_seconds = llabs(*offset_seconds); + + auto offset_hours = *offset_seconds / 3'600; + *offset_seconds %= 3'600; + + auto offset_minutes = *offset_seconds / 60; + *offset_seconds %= 60; + + StringBuilder builder; + builder.append(sign); + + switch (style) { + // The long format always uses 2-digit hours field and minutes field, with optional 2-digit seconds field. + case CalendarPatternStyle::LongOffset: + builder.appendff("{:02}{}{:02}", offset_hours, separator, offset_minutes); + if (*offset_seconds > 0) + builder.appendff("{}{:02}", separator, *offset_seconds); + break; + + // The short format is intended for the shortest representation and uses hour fields without leading zero, with optional 2-digit minutes and seconds fields. + case CalendarPatternStyle::ShortOffset: + builder.appendff("{}", offset_hours); + if (offset_minutes > 0) { + builder.appendff("{}{:02}", separator, offset_minutes); + if (*offset_seconds > 0) + builder.appendff("{}{:02}", separator, *offset_seconds); + } + break; + + default: + VERIFY_NOT_REACHED(); + } + + // The digits used for hours, minutes and seconds fields in this format are the locale's default decimal digits. + auto offset = replace_digits_for_number_system(*number_system, builder.build()); + return formats->gmt_format.replace("{0}"sv, offset); +} + +// https://unicode.org/reports/tr35/tr35-dates.html#Time_Zone_Format_Terminology +String format_time_zone(StringView locale, StringView time_zone, CalendarPatternStyle style, AK::Time time) +{ + switch (style) { + case CalendarPatternStyle::Short: + case CalendarPatternStyle::Long: + case CalendarPatternStyle::ShortGeneric: + case CalendarPatternStyle::LongGeneric: + return get_time_zone_name(locale, time_zone, style).value_or(time_zone); + + case CalendarPatternStyle::ShortOffset: + case CalendarPatternStyle::LongOffset: + return format_time_zone_offset(locale, time_zone, style, time).value_or(time_zone); + + default: + VERIFY_NOT_REACHED(); + } +} } diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.h b/Userland/Libraries/LibUnicode/DateTimeFormat.h index e1104b30981..b84a3f8cdaf 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.h +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -167,6 +168,17 @@ enum class CalendarSymbol : u8 { Weekday, }; +struct TimeZoneFormat { + StringView symbol_ahead_sign {}; + StringView symbol_ahead_separator {}; + + StringView symbol_behind_sign {}; + StringView symbol_behind_separator {}; + + StringView gmt_format {}; + StringView gmt_zero_format {}; +}; + HourCycle hour_cycle_from_string(StringView hour_cycle); StringView hour_cycle_to_string(HourCycle hour_cycle); @@ -197,6 +209,8 @@ Optional get_calendar_weekday_symbol(StringView locale, StringView c Optional get_calendar_day_period_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::DayPeriod value); Optional get_calendar_day_period_symbol_for_hour(StringView locale, StringView calendar, CalendarPatternStyle style, u8 hour); +String format_time_zone(StringView locale, StringView time_zone, CalendarPatternStyle style, AK::Time time); Optional get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style); +Optional get_time_zone_format(StringView locale); }