LibUnicode: Do not assume time zones & meta zones have a 1-to-1 mapping

The generator parses metaZones.json to form a mapping of meta zones to
time zones (AKA "golden zone" in TR-35). This parser errantly assumed
this was a 1-to-1 mapping.
This commit is contained in:
Timothy Flynn 2022-01-06 10:40:18 -05:00 committed by Andreas Kling
parent 62d8d1fdfd
commit 6d7d9dd324
Notes: sideshowbarker 2024-07-17 21:31:51 +09:00
4 changed files with 83 additions and 10 deletions

View File

@ -519,7 +519,7 @@ struct UnicodeLocaleData {
HashMap<String, HourCycleListIndexType> hour_cycles;
Vector<String> hour_cycle_regions;
HashMap<String, String> meta_zones;
HashMap<String, Vector<String>> meta_zones;
Vector<String> time_zones { "UTC"sv };
Vector<String> calendars;
@ -618,11 +618,12 @@ static ErrorOr<void> parse_meta_zones(String core_path, UnicodeLocaleData& local
auto const& meta_zone = mapping.as_object().get("_other"sv);
auto const& golden_zone = mapping.as_object().get("_type"sv);
locale_data.meta_zones.set(meta_zone.as_string(), golden_zone.as_string());
auto& golden_zones = locale_data.meta_zones.ensure(meta_zone.as_string());
golden_zones.append(golden_zone.as_string());
});
// UTC does not appear in metaZones.json. Define it for convenience so other parsers don't need to check for its existence.
locale_data.meta_zones.set("UTC"sv, "UTC"sv);
locale_data.meta_zones.set("UTC"sv, { "UTC"sv });
return {};
};
@ -1402,7 +1403,7 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
time_zone_formats.gmt_zero_format = locale_data.unique_strings.ensure(gmt_zero_format_string.as_string());
auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) {
auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value;
auto const& golden_zones = locale_data.meta_zones.find(meta_zone)->value;
TimeZone time_zone {};
if (auto long_name = parse_name("long"sv, meta_zone_object); long_name.has_value())
@ -1410,15 +1411,19 @@ static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, U
if (auto short_name = parse_name("short"sv, meta_zone_object); short_name.has_value())
time_zone.short_name = short_name.value();
auto time_zone_index = locale_data.time_zones.find_first_index(golden_zone).value();
time_zones[time_zone_index] = locale_data.unique_time_zones.ensure(move(time_zone));
for (auto const& golden_zone : golden_zones) {
auto time_zone_index = locale_data.time_zones.find_first_index(golden_zone).value();
time_zones[time_zone_index] = locale_data.unique_time_zones.ensure(move(time_zone));
}
};
meta_zone_object.as_object().for_each_member([&](auto const& meta_zone, JsonValue const&) {
auto const& golden_zone = locale_data.meta_zones.find(meta_zone)->value;
auto const& golden_zones = locale_data.meta_zones.find(meta_zone)->value;
if (!locale_data.time_zones.contains_slow(golden_zone))
locale_data.time_zones.append(golden_zone);
for (auto const& golden_zone : golden_zones) {
if (!locale_data.time_zones.contains_slow(golden_zone))
locale_data.time_zones.append(golden_zone);
}
});
time_zones.resize(locale_data.time_zones.size());

View File

@ -1,5 +1,6 @@
set(TEST_SOURCES
TestUnicodeCharacterTypes.cpp
TestUnicodeDateTimeFormat.cpp
TestUnicodeLocale.cpp
)

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2022, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <AK/Array.h>
#include <AK/StringView.h>
#include <LibUnicode/DateTimeFormat.h>
TEST_CASE(time_zone_name)
{
struct TestData {
StringView locale;
Unicode::CalendarPatternStyle style;
StringView time_zone;
StringView expected_result;
};
constexpr auto test_data = Array {
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "Coordinated Universal Time"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "GMT"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "GMT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "UTC"sv, "التوقيت العالمي المنسق"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Short, "UTC"sv, "UTC"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortOffset, "UTC"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongOffset, "UTC"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::ShortGeneric, "UTC"sv, "غرينتش"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::LongGeneric, "UTC"sv, "غرينتش"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "America/Los_Angeles"sv, "Pacific Daylight Time"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "America/Los_Angeles"sv, "PDT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "America/Los_Angeles"sv, "توقيت المحيط الهادي الصيفي"sv },
// The "ar" locale does not have a short name for PDT. LibUnicode will need to fall back to GMT offset when we have that data.
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "America/Vancouver"sv, "Pacific Daylight Time"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "America/Vancouver"sv, "PDT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "America/Vancouver"sv, "توقيت المحيط الهادي الصيفي"sv },
// The "ar" locale does not have a short name for PDT. LibUnicode will need to fall back to GMT offset when we have that data.
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "Europe/London"sv, "Greenwich Mean Time"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "Europe/London"sv, "GMT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "Europe/London"sv, "توقيت غرينتش"sv },
// The "ar" locale does not have a short name for GMT. LibUnicode will need to fall back to GMT offset when we have that data.
TestData { "en"sv, Unicode::CalendarPatternStyle::Long, "Africa/Accra"sv, "Greenwich Mean Time"sv },
TestData { "en"sv, Unicode::CalendarPatternStyle::Short, "Africa/Accra"sv, "GMT"sv },
TestData { "ar"sv, Unicode::CalendarPatternStyle::Long, "Africa/Accra"sv, "توقيت غرينتش"sv },
// The "ar" locale does not have a short name for GMT. LibUnicode will need to fall back to GMT offset when we have that data.
};
for (auto const& test : test_data) {
auto time_zone = Unicode::get_time_zone_name(test.locale, test.time_zone, test.style);
VERIFY(time_zone.has_value());
EXPECT_EQ(*time_zone, test.expected_result);
}
}

View File

@ -35,7 +35,7 @@ enum class ScriptTag : u8;
enum class StandardNumberFormatType : u8;
enum class Style : u8;
enum class Territory : u8;
enum class TimeZone : u8;
enum class TimeZone : u16;
enum class Weekday : u8;
enum class WordBreakProperty : u8;