From 6d7d9dd3247109b9d730c047d3c85e18000bf81e Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 6 Jan 2022 10:40:18 -0500 Subject: [PATCH] LibUnicode: Do not assume time zones & meta zones have a 1-to-1 mapping The generator parses metaZones.json to form a mapping of meta zones to time zones (AKA "golden zone" in TR-35). This parser errantly assumed this was a 1-to-1 mapping. --- .../GenerateUnicodeDateTimeFormat.cpp | 23 ++++--- Tests/LibUnicode/CMakeLists.txt | 1 + .../LibUnicode/TestUnicodeDateTimeFormat.cpp | 67 +++++++++++++++++++ Userland/Libraries/LibUnicode/Forward.h | 2 +- 4 files changed, 83 insertions(+), 10 deletions(-) create mode 100644 Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp index e2d22afcbd3..102dddd920f 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp @@ -519,7 +519,7 @@ struct UnicodeLocaleData { HashMap hour_cycles; Vector hour_cycle_regions; - HashMap meta_zones; + HashMap> meta_zones; Vector time_zones { "UTC"sv }; Vector calendars; @@ -618,11 +618,12 @@ static ErrorOr parse_meta_zones(String core_path, UnicodeLocaleData& local auto const& meta_zone = mapping.as_object().get("_other"sv); auto const& golden_zone = mapping.as_object().get("_type"sv); - locale_data.meta_zones.set(meta_zone.as_string(), golden_zone.as_string()); + auto& golden_zones = locale_data.meta_zones.ensure(meta_zone.as_string()); + golden_zones.append(golden_zone.as_string()); }); // UTC does not appear in metaZones.json. Define it for convenience so other parsers don't need to check for its existence. - locale_data.meta_zones.set("UTC"sv, "UTC"sv); + locale_data.meta_zones.set("UTC"sv, { "UTC"sv }); return {}; }; @@ -1402,7 +1403,7 @@ static ErrorOr parse_time_zone_names(String locale_time_zone_names_path, U time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string()); auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) { - auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value; + auto const& golden_zones = locale_data.meta_zones.find(meta_zone)->value; TimeZone time_zone {}; if (auto long_name = parse_name("long"sv, meta_zone_object); long_name.has_value()) @@ -1410,15 +1411,19 @@ static ErrorOr parse_time_zone_names(String locale_time_zone_names_path, U if (auto short_name = parse_name("short"sv, meta_zone_object); short_name.has_value()) time_zone.short_name = short_name.value(); - auto time_zone_index = locale_data.time_zones.find_first_index(golden_zone).value(); - time_zones[time_zone_index] = locale_data.unique_time_zones.ensure(move(time_zone)); + for (auto const& golden_zone : golden_zones) { + auto time_zone_index = locale_data.time_zones.find_first_index(golden_zone).value(); + time_zones[time_zone_index] = locale_data.unique_time_zones.ensure(move(time_zone)); + } }; meta_zone_object.as_object().for_each_member([&](auto const& meta_zone, JsonValue const&) { - auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value; + auto const& golden_zones = locale_data.meta_zones.find(meta_zone)->value; - if (!locale_data.time_zones.contains_slow(golden_zone)) - locale_data.time_zones.append(golden_zone); + for (auto const& golden_zone : golden_zones) { + if (!locale_data.time_zones.contains_slow(golden_zone)) + locale_data.time_zones.append(golden_zone); + } }); time_zones.resize(locale_data.time_zones.size()); diff --git a/Tests/LibUnicode/CMakeLists.txt b/Tests/LibUnicode/CMakeLists.txt index 416993bb5cd..42c89a6d804 100644 --- a/Tests/LibUnicode/CMakeLists.txt +++ b/Tests/LibUnicode/CMakeLists.txt @@ -1,5 +1,6 @@ set(TEST_SOURCES TestUnicodeCharacterTypes.cpp + TestUnicodeDateTimeFormat.cpp TestUnicodeLocale.cpp ) diff --git a/Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp b/Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp new file mode 100644 index 00000000000..8cb76cecfef --- /dev/null +++ b/Tests/LibUnicode/TestUnicodeDateTimeFormat.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2022, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#include +#include +#include + +TEST_CASE(time_zone_name) +{ + struct TestData { + StringView locale; + Unicode::CalendarPatternStyle style; + StringView time_zone; + StringView expected_result; + }; + + constexpr auto test_data = Array { + TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "Coordinated Universal Time"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "GMT"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "GMT"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "GMT"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "GMT"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "التوقيت العالمي المنسق"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "غرينتش"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "غرينتش"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "غرينتش"sv }, + TestData { "ar"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "غرينتش"sv }, + + TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "America/Los_Angeles"sv, "Pacific Daylight Time"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "America/Los_Angeles"sv, "PDT"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "America/Los_Angeles"sv, "توقيت المحيط الهادي الصيفي"sv }, + // The "ar" locale does not have a short name for PDT. LibUnicode will need to fall back to GMT offset when we have that data. + + TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "America/Vancouver"sv, "Pacific Daylight Time"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "America/Vancouver"sv, "PDT"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "America/Vancouver"sv, "توقيت المحيط الهادي الصيفي"sv }, + // The "ar" locale does not have a short name for PDT. LibUnicode will need to fall back to GMT offset when we have that data. + + TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "Europe/London"sv, "Greenwich Mean Time"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "Europe/London"sv, "GMT"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "Europe/London"sv, "توقيت غرينتش"sv }, + // The "ar" locale does not have a short name for GMT. LibUnicode will need to fall back to GMT offset when we have that data. + + TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "Africa/Accra"sv, "Greenwich Mean Time"sv }, + TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "Africa/Accra"sv, "GMT"sv }, + + TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "Africa/Accra"sv, "توقيت غرينتش"sv }, + // The "ar" locale does not have a short name for GMT. LibUnicode will need to fall back to GMT offset when we have that data. + }; + + for (auto const& test : test_data) { + auto time_zone = Unicode::get_time_zone_name(test.locale, test.time_zone, test.style); + VERIFY(time_zone.has_value()); + EXPECT_EQ(*time_zone, test.expected_result); + } +} diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index 1ad8ffcacc8..dc093bcd2c2 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -35,7 +35,7 @@ enum class ScriptTag : u8; enum class StandardNumberFormatType : u8; enum class Style : u8; enum class Territory : u8; -enum class TimeZone : u8; +enum class TimeZone : u16; enum class Weekday : u8; enum class WordBreakProperty : u8;