LibUnicode: Create a nearly empty generator for date-time formatting

Similar to number formatting, the data for date-time formatting will be
located in its own generated file. This extracts the cldr-dates package
from the CLDR and sets up the generator plumbing to create the date-time
data files.
This commit is contained in:
Timothy Flynn 2021-11-19 11:36:28 -05:00 committed by Linus Groh
parent 914675e826
commit 5c57341672
Notes: sideshowbarker 2024-07-17 23:22:57 +09:00
3 changed files with 150 additions and 0 deletions

View File

@ -52,6 +52,9 @@ set(CLDR_ZIP_PATH "${CLDR_PATH}/cldr.zip")
set(CLDR_CORE_SOURCE cldr-core) set(CLDR_CORE_SOURCE cldr-core)
set(CLDR_CORE_PATH "${CLDR_PATH}/${CLDR_CORE_SOURCE}") set(CLDR_CORE_PATH "${CLDR_PATH}/${CLDR_CORE_SOURCE}")
set(CLDR_DATES_SOURCE cldr-dates-modern)
set(CLDR_DATES_PATH "${CLDR_PATH}/${CLDR_DATES_SOURCE}")
set(CLDR_LOCALES_SOURCE cldr-localenames-modern) set(CLDR_LOCALES_SOURCE cldr-localenames-modern)
set(CLDR_LOCALES_PATH "${CLDR_PATH}/${CLDR_LOCALES_SOURCE}") set(CLDR_LOCALES_PATH "${CLDR_PATH}/${CLDR_LOCALES_SOURCE}")
@ -140,6 +143,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
endif() endif()
extract_cldr_file("${CLDR_CORE_SOURCE}" "${CLDR_CORE_PATH}") extract_cldr_file("${CLDR_CORE_SOURCE}" "${CLDR_CORE_PATH}")
extract_cldr_file("${CLDR_DATES_SOURCE}" "${CLDR_DATES_PATH}")
extract_cldr_file("${CLDR_LOCALES_SOURCE}" "${CLDR_LOCALES_PATH}") extract_cldr_file("${CLDR_LOCALES_SOURCE}" "${CLDR_LOCALES_PATH}")
extract_cldr_file("${CLDR_MISC_SOURCE}" "${CLDR_MISC_PATH}") extract_cldr_file("${CLDR_MISC_SOURCE}" "${CLDR_MISC_PATH}")
extract_cldr_file("${CLDR_NUMBERS_SOURCE}" "${CLDR_NUMBERS_PATH}") extract_cldr_file("${CLDR_NUMBERS_SOURCE}" "${CLDR_NUMBERS_PATH}")
@ -148,6 +152,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
set(UNICODE_DATA_HEADER LibUnicode/UnicodeData.h) set(UNICODE_DATA_HEADER LibUnicode/UnicodeData.h)
set(UNICODE_DATA_IMPLEMENTATION LibUnicode/UnicodeData.cpp) set(UNICODE_DATA_IMPLEMENTATION LibUnicode/UnicodeData.cpp)
set(UNICODE_DATE_TIME_FORMAT_HEADER LibUnicode/UnicodeDateTimeFormat.h)
set(UNICODE_DATE_TIME_FORMAT_IMPLEMENTATION LibUnicode/UnicodeDateTimeFormat.cpp)
set(UNICODE_LOCALE_HEADER LibUnicode/UnicodeLocale.h) set(UNICODE_LOCALE_HEADER LibUnicode/UnicodeLocale.h)
set(UNICODE_LOCALE_IMPLEMENTATION LibUnicode/UnicodeLocale.cpp) set(UNICODE_LOCALE_IMPLEMENTATION LibUnicode/UnicodeLocale.cpp)
@ -160,6 +167,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
set(UNICODE_DATA_HEADER UnicodeData.h) set(UNICODE_DATA_HEADER UnicodeData.h)
set(UNICODE_DATA_IMPLEMENTATION UnicodeData.cpp) set(UNICODE_DATA_IMPLEMENTATION UnicodeData.cpp)
set(UNICODE_DATE_TIME_FORMAT_HEADER UnicodeDateTimeFormat.h)
set(UNICODE_DATE_TIME_FORMAT_IMPLEMENTATION UnicodeDateTimeFormat.cpp)
set(UNICODE_LOCALE_HEADER UnicodeLocale.h) set(UNICODE_LOCALE_HEADER UnicodeLocale.h)
set(UNICODE_LOCALE_IMPLEMENTATION UnicodeLocale.cpp) set(UNICODE_LOCALE_IMPLEMENTATION UnicodeLocale.cpp)
@ -176,6 +186,13 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
"${UNICODE_DATA_IMPLEMENTATION}" "${UNICODE_DATA_IMPLEMENTATION}"
arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -m "${NAME_ALIAS_PATH}" -n "${NORM_PROPS_PATH}" arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -m "${NAME_ALIAS_PATH}" -n "${NORM_PROPS_PATH}"
) )
invoke_generator(
"UnicodeDateTimeFormat"
Lagom::GenerateUnicodeDateTimeFormat
"${UNICODE_DATE_TIME_FORMAT_HEADER}"
"${UNICODE_DATE_TIME_FORMAT_IMPLEMENTATION}"
arguments -d "${CLDR_DATES_PATH}"
)
invoke_generator( invoke_generator(
"UnicodeLocale" "UnicodeLocale"
Lagom::GenerateUnicodeLocale Lagom::GenerateUnicodeLocale
@ -194,6 +211,8 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
set(UNICODE_DATA_SOURCES set(UNICODE_DATA_SOURCES
${UNICODE_DATA_HEADER} ${UNICODE_DATA_HEADER}
${UNICODE_DATA_IMPLEMENTATION} ${UNICODE_DATA_IMPLEMENTATION}
${UNICODE_DATE_TIME_FORMAT_HEADER}
${UNICODE_DATE_TIME_FORMAT_IMPLEMENTATION}
${UNICODE_LOCALE_HEADER} ${UNICODE_LOCALE_HEADER}
${UNICODE_LOCALE_IMPLEMENTATION} ${UNICODE_LOCALE_IMPLEMENTATION}
${UNICODE_NUMBER_FORMAT_HEADER} ${UNICODE_NUMBER_FORMAT_HEADER}

View File

@ -1,3 +1,4 @@
lagom_tool(GenerateUnicodeData SOURCES GenerateUnicodeData.cpp LIBS LagomMain) lagom_tool(GenerateUnicodeData SOURCES GenerateUnicodeData.cpp LIBS LagomMain)
lagom_tool(GenerateUnicodeDateTimeFormat SOURCES GenerateUnicodeDateTimeFormat.cpp LIBS LagomMain)
lagom_tool(GenerateUnicodeLocale SOURCES GenerateUnicodeLocale.cpp LIBS LagomMain) lagom_tool(GenerateUnicodeLocale SOURCES GenerateUnicodeLocale.cpp LIBS LagomMain)
lagom_tool(GenerateUnicodeNumberFormat SOURCES GenerateUnicodeNumberFormat.cpp LIBS LagomMain) lagom_tool(GenerateUnicodeNumberFormat SOURCES GenerateUnicodeNumberFormat.cpp LIBS LagomMain)

View File

@ -0,0 +1,130 @@
/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "GeneratorUtil.h"
#include <AK/Format.h>
#include <AK/HashMap.h>
#include <AK/JsonObject.h>
#include <AK/JsonParser.h>
#include <AK/JsonValue.h>
#include <AK/LexicalPath.h>
#include <AK/SourceGenerator.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <LibCore/ArgsParser.h>
#include <LibCore/DirIterator.h>
#include <LibCore/File.h>
using StringIndexType = u16;
constexpr auto s_string_index_type = "u16"sv;
struct Locale {
};
struct UnicodeLocaleData {
UniqueStringStorage<StringIndexType> unique_strings;
HashMap<String, Locale> locales;
};
static ErrorOr<void> parse_all_locales(String dates_path, UnicodeLocaleData& locale_data)
{
auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
auto parsed_locale = TRY(CanonicalLanguageID<StringIndexType>::parse(locale_data.unique_strings, LexicalPath::basename(path)));
StringBuilder builder;
builder.append(locale_data.unique_strings.get(parsed_locale.language));
if (auto script = locale_data.unique_strings.get(parsed_locale.script); !script.is_empty())
builder.appendff("-{}", script);
if (auto region = locale_data.unique_strings.get(parsed_locale.region); !region.is_empty())
builder.appendff("-{}", region);
return builder.build();
};
while (dates_iterator.has_next()) {
auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
auto language = TRY(remove_variants_from_path(dates_path));
[[maybe_unused]] auto& locale = locale_data.locales.ensure(language);
}
return {};
}
static void generate_unicode_locale_header(Core::File& file, UnicodeLocaleData&)
{
StringBuilder builder;
SourceGenerator generator { builder };
generator.append(R"~~~(
#pragma once
#include <AK/StringView.h>
#include <LibUnicode/Forward.h>
namespace Unicode {
}
)~~~");
VERIFY(file.write(generator.as_string_view()));
}
static void generate_unicode_locale_implementation(Core::File& file, UnicodeLocaleData& locale_data)
{
StringBuilder builder;
SourceGenerator generator { builder };
generator.set("string_index_type"sv, s_string_index_type);
generator.append(R"~~~(
#include <AK/Array.h>
#include <LibUnicode/UnicodeDateTimeFormat.h>
namespace Unicode::Detail {
)~~~");
locale_data.unique_strings.generate(generator);
generator.append(R"~~~(
}
)~~~");
VERIFY(file.write(generator.as_string_view()));
}
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
StringView generated_header_path;
StringView generated_implementation_path;
StringView dates_path;
Core::ArgsParser args_parser;
args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
args_parser.parse(arguments);
auto open_file = [&](StringView path) -> ErrorOr<NonnullRefPtr<Core::File>> {
if (path.is_empty()) {
args_parser.print_usage(stderr, arguments.argv[0]);
return Error::from_string_literal("Must provide all command line options"sv);
}
return Core::File::open(path, Core::OpenMode::ReadWrite);
};
auto generated_header_file = TRY(open_file(generated_header_path));
auto generated_implementation_file = TRY(open_file(generated_implementation_path));
UnicodeLocaleData locale_data;
TRY(parse_all_locales(dates_path, locale_data));
generate_unicode_locale_header(generated_header_file, locale_data);
generate_unicode_locale_implementation(generated_implementation_file, locale_data);
return 0;
}