LibUnicode: Replace case transformations and comparison with ICUs

There are a couple of differences here due to using ICU: 1. Titlecasing behaves slightly differently. We previously transformed "123dollars" to "123Dollars", as we would use word segmentation to split a string into words, then transform the first cased character to titlecase. ICU doesn't go quite that far, and leaves the string as "123dollars". While this is a behavior change, the only user of this API is the `text-transform: capitalize;` CSS rule, and we now match the behavior of other browsers. 2. There isn't an API to compare strings with case insensitivity without allocating case-folded strings for both the left- and right-hand-side strings. Our implementation was previously allocation-free; however, in a benchmark, ICU is still ~1.4x faster.
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/5cf818e305 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/219
2024-08-15 16:30:36 +03:00 · 2024-06-19 16:39:30 -04:00 · 2024-06-19 16:39:30 -04:00 · 5cf818e305 · 2024-07-16 23:17:55 +09:00
commit 5cf818e305
parent a3a7a65b1c
12 changed files with 111 additions and 1092 deletions
--- a/Meta/CMake/unicode_data.cmake
+++ b/Meta/CMake/unicode_data.cmake
@ -13,12 +13,6 @@ set(UCD_ZIP_PATH "${UCD_PATH}/UCD.zip")
 set(UNICODE_DATA_SOURCE "UnicodeData.txt")
 set(UNICODE_DATA_PATH "${UCD_PATH}/${UNICODE_DATA_SOURCE}")

-set(SPECIAL_CASING_SOURCE "SpecialCasing.txt")
-set(SPECIAL_CASING_PATH "${UCD_PATH}/${SPECIAL_CASING_SOURCE}")
-
-set(CASE_FOLDING_SOURCE "CaseFolding.txt")
-set(CASE_FOLDING_PATH "${UCD_PATH}/${CASE_FOLDING_SOURCE}")
-
 set(DERIVED_GENERAL_CATEGORY_SOURCE "extracted/DerivedGeneralCategory.txt")
 set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/${DERIVED_GENERAL_CATEGORY_SOURCE}")

@ -72,8 +66,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
    if (ENABLE_NETWORK_DOWNLOADS)
        download_file("${UCD_ZIP_URL}" "${UCD_ZIP_PATH}" SHA256 "${UCD_SHA256}")
        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${UNICODE_DATA_SOURCE}" "${UNICODE_DATA_PATH}")
-        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SPECIAL_CASING_SOURCE}" "${SPECIAL_CASING_PATH}")
-        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${CASE_FOLDING_SOURCE}" "${CASE_FOLDING_PATH}")
        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_GENERAL_CATEGORY_SOURCE}" "${DERIVED_GENERAL_CATEGORY_PATH}")
        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_LIST_SOURCE}" "${PROP_LIST_PATH}")
        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_CORE_PROP_SOURCE}" "${DERIVED_CORE_PROP_PATH}")
@ -111,7 +103,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
        "${UCD_VERSION_FILE}"
        "${UNICODE_DATA_HEADER}"
        "${UNICODE_DATA_IMPLEMENTATION}"
-        arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -o "${CASE_FOLDING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}"
+        arguments -u "${UNICODE_DATA_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}"
    )
    invoke_generator(
        "EmojiData"
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
@ -21,24 +21,6 @@
 #include <LibCore/ArgsParser.h>
 #include <LibUnicode/CharacterTypes.h>

-// https://www.unicode.org/reports/tr44/#SpecialCasing.txt
-struct SpecialCasing {
-    u32 index { 0 };
-    u32 code_point { 0 };
-    Vector<u32> lowercase_mapping;
-    Vector<u32> uppercase_mapping;
-    Vector<u32> titlecase_mapping;
-    ByteString locale;
-    ByteString condition;
-};
-
-// https://www.unicode.org/reports/tr44/#CaseFolding.txt
-struct CaseFolding {
-    u32 code_point { 0 };
-    StringView status { "Common"sv };
-    Vector<u32> mapping { 0 };
-};
-
 // https://www.unicode.org/reports/tr44/#PropList.txt
 using PropList = HashMap<ByteString, Vector<Unicode::CodePointRange>>;

@ -57,25 +39,6 @@ struct Normalization {

 using NormalizationProps = HashMap<ByteString, Vector<Normalization>>;

-struct CasingTable {
-    bool operator==(CasingTable const& other) const
-    {
-        return canonical_combining_class == other.canonical_combining_class
-            && simple_lowercase_mapping == other.simple_lowercase_mapping
-            && simple_uppercase_mapping == other.simple_uppercase_mapping
-            && simple_titlecase_mapping == other.simple_titlecase_mapping
-            && special_casing_indices == other.special_casing_indices
-            && case_folding_indices == other.case_folding_indices;
-    }
-
-    u8 canonical_combining_class { 0 };
-    Optional<u32> simple_uppercase_mapping;
-    Optional<u32> simple_lowercase_mapping;
-    Optional<u32> simple_titlecase_mapping;
-    Vector<u32> special_casing_indices;
-    Vector<u32> case_folding_indices;
-};
-
 // https://www.unicode.org/reports/tr44/#UnicodeData.txt
 struct CodePointData {
    u32 code_point { 0 };
@ -87,7 +50,6 @@ struct CodePointData {
    bool bidi_mirrored { false };
    ByteString unicode_1_name;
    ByteString iso_comment;
-    CasingTable casing;
 };

 using PropertyTable = Vector<bool>;
@ -111,15 +73,6 @@ struct CodePointBidiClass {
 };

 struct UnicodeData {
-    Vector<SpecialCasing> special_casing;
-    u32 largest_special_casing_mapping_size { 0 };
-    Vector<ByteString> conditions;
-    Vector<ByteString> locales;
-
-    Vector<CaseFolding> case_folding;
-    u32 largest_case_folding_mapping_size { 0 };
-    Vector<StringView> statuses;
-
    Vector<CodePointData> code_point_data;

    // https://www.unicode.org/reports/tr44/#General_Category_Values
@ -149,7 +102,6 @@ struct UnicodeData {
    PropList word_break_props;
    PropList sentence_break_props;

-    CodePointTables<CasingTable> casing_tables;
    CodePointTables<PropertyTable> general_category_tables;
    CodePointTables<PropertyTable> property_tables;
    CodePointTables<PropertyTable> script_tables;
@ -180,125 +132,6 @@ static ByteString sanitize_entry(ByteString const& entry)
    return builder.to_byte_string();
 }

-static ErrorOr<void> parse_special_casing(Core::InputBufferedFile& file, UnicodeData& unicode_data)
-{
-    Array<u8, 1024> buffer;
-
-    while (TRY(file.can_read_line())) {
-        auto line = TRY(file.read_line(buffer));
-
-        if (line.is_empty() || line.starts_with('#'))
-            continue;
-
-        if (auto index = line.find('#'); index.has_value())
-            line = line.substring_view(0, *index);
-
-        auto segments = line.split_view(';', SplitBehavior::KeepEmpty);
-        VERIFY(segments.size() == 5 || segments.size() == 6);
-
-        SpecialCasing casing {};
-        casing.code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[0]).value();
-        casing.lowercase_mapping = parse_code_point_list(segments[1]);
-        casing.titlecase_mapping = parse_code_point_list(segments[2]);
-        casing.uppercase_mapping = parse_code_point_list(segments[3]);
-
-        if (auto condition = segments[4].trim_whitespace(); !condition.is_empty()) {
-            auto conditions = condition.split_view(' ', SplitBehavior::KeepEmpty);
-            VERIFY(conditions.size() == 1 || conditions.size() == 2);
-
-            if (conditions.size() == 2) {
-                casing.locale = conditions[0];
-                casing.condition = conditions[1];
-            } else if (all_of(conditions[0], is_ascii_lower_alpha)) {
-                casing.locale = conditions[0];
-            } else {
-                casing.condition = conditions[0];
-            }
-
-            if (!casing.locale.is_empty()) {
-                casing.locale = ByteString::formatted("{:c}{}", to_ascii_uppercase(casing.locale[0]), casing.locale.substring_view(1));
-
-                if (!unicode_data.locales.contains_slow(casing.locale))
-                    unicode_data.locales.append(casing.locale);
-            }
-
-            casing.condition = casing.condition.replace("_"sv, ""sv, ReplaceMode::All);
-
-            if (!casing.condition.is_empty() && !unicode_data.conditions.contains_slow(casing.condition))
-                unicode_data.conditions.append(casing.condition);
-        }
-
-        unicode_data.largest_special_casing_mapping_size = max(unicode_data.largest_special_casing_mapping_size, casing.lowercase_mapping.size());
-        unicode_data.largest_special_casing_mapping_size = max(unicode_data.largest_special_casing_mapping_size, casing.titlecase_mapping.size());
-        unicode_data.largest_special_casing_mapping_size = max(unicode_data.largest_special_casing_mapping_size, casing.uppercase_mapping.size());
-
-        unicode_data.special_casing.append(move(casing));
-    }
-
-    quick_sort(unicode_data.special_casing, [](auto const& lhs, auto const& rhs) {
-        if (lhs.code_point != rhs.code_point)
-            return lhs.code_point < rhs.code_point;
-        if (lhs.locale.is_empty() && !rhs.locale.is_empty())
-            return false;
-        if (!lhs.locale.is_empty() && rhs.locale.is_empty())
-            return true;
-        return lhs.locale < rhs.locale;
-    });
-
-    for (u32 i = 0; i < unicode_data.special_casing.size(); ++i)
-        unicode_data.special_casing[i].index = i;
-
-    return {};
-}
-
-static ErrorOr<void> parse_case_folding(Core::InputBufferedFile& file, UnicodeData& unicode_data)
-{
-    Array<u8, 1024> buffer;
-
-    while (TRY(file.can_read_line())) {
-        auto line = TRY(file.read_line(buffer));
-        if (line.is_empty() || line.starts_with('#'))
-            continue;
-
-        auto segments = line.split_view(';', SplitBehavior::KeepEmpty);
-        VERIFY(segments.size() == 4);
-
-        CaseFolding folding {};
-        folding.code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[0]).value();
-        folding.mapping = parse_code_point_list(segments[2]);
-
-        switch (segments[1].trim_whitespace()[0]) {
-        case 'C':
-            folding.status = "Common"sv;
-            break;
-        case 'F':
-            folding.status = "Full"sv;
-            break;
-        case 'S':
-            folding.status = "Simple"sv;
-            break;
-        case 'T':
-            folding.status = "Special"sv;
-            break;
-        }
-
-        unicode_data.largest_case_folding_mapping_size = max(unicode_data.largest_case_folding_mapping_size, folding.mapping.size());
-
-        if (!unicode_data.statuses.contains_slow(folding.status))
-            unicode_data.statuses.append(folding.status);
-
-        unicode_data.case_folding.append(move(folding));
-    }
-
-    quick_sort(unicode_data.case_folding, [](auto const& lhs, auto const& rhs) {
-        if (lhs.code_point != rhs.code_point)
-            return lhs.code_point < rhs.code_point;
-        return lhs.status < rhs.status;
-    });
-
-    return {};
-}
-
 static ErrorOr<void> parse_prop_list(Core::InputBufferedFile& file, PropList& prop_list, bool multi_value_property = false, bool sanitize_property = false)
 {
    Array<u8, 1024> buffer;
@ -503,7 +336,6 @@ static ErrorOr<void> parse_unicode_data(Core::InputBufferedFile& file, UnicodeDa
        CodePointData data {};
        data.code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[0]).value();
        data.name = segments[1];
-        data.casing.canonical_combining_class = AK::StringUtils::convert_to_uint<u8>(segments[3]).value();
        data.bidi_class = segments[4];
        data.numeric_value_decimal = AK::StringUtils::convert_to_int<i8>(segments[6]);
        data.numeric_value_digit = AK::StringUtils::convert_to_int<i8>(segments[7]);
@ -511,9 +343,6 @@ static ErrorOr<void> parse_unicode_data(Core::InputBufferedFile& file, UnicodeDa
        data.bidi_mirrored = segments[9] == "Y"sv;
        data.unicode_1_name = segments[10];
        data.iso_comment = segments[11];
-        data.casing.simple_uppercase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[12]);
-        data.casing.simple_lowercase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[13]);
-        data.casing.simple_titlecase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[14]);

        if (!assigned_code_point_range_start.has_value())
            assigned_code_point_range_start = data.code_point;
@ -547,16 +376,6 @@ static ErrorOr<void> parse_unicode_data(Core::InputBufferedFile& file, UnicodeDa
            }
        }

-        for (auto const& casing : unicode_data.special_casing) {
-            if (casing.code_point == data.code_point)
-                data.casing.special_casing_indices.append(casing.index);
-        }
-
-        for (size_t i = 0; i < unicode_data.case_folding.size(); ++i) {
-            if (auto const& folding = unicode_data.case_folding[i]; folding.code_point == data.code_point)
-                data.casing.case_folding_indices.append(i);
-        }
-
        unicode_data.bidirectional_classes.set(data.bidi_class, AK::HashSetExistingEntryBehavior::Keep);

        previous_code_point = data.code_point;
@ -570,8 +389,6 @@ static ErrorOr<void> generate_unicode_data_header(Core::InputBufferedFile& file,
 {
    StringBuilder builder;
    SourceGenerator generator { builder };
-    generator.set("special_casing_mapping_size", ByteString::number(unicode_data.largest_special_casing_mapping_size));
-    generator.set("case_folding_mapping_size", ByteString::number(unicode_data.largest_case_folding_mapping_size));

    auto generate_enum = [&](StringView name, StringView default_, auto values, Vector<Alias> aliases = {}) {
        quick_sort(values);
@ -619,9 +436,6 @@ enum class @name@ : @underlying@ {)~~~");
 namespace Unicode {
 )~~~");

-    generate_enum("Locale"sv, "None"sv, unicode_data.locales);
-    generate_enum("Condition"sv, "None"sv, move(unicode_data.conditions));
-    generate_enum("CaseFoldingStatus"sv, {}, move(unicode_data.statuses));
    generate_enum("GeneralCategory"sv, {}, unicode_data.general_categories.keys(), unicode_data.general_category_aliases);
    generate_enum("Property"sv, {}, unicode_data.prop_list.keys(), unicode_data.prop_aliases);
    generate_enum("Script"sv, {}, unicode_data.script_list.keys(), unicode_data.script_aliases);
@ -631,35 +445,6 @@ namespace Unicode {
    generate_enum("BidirectionalClass"sv, {}, unicode_data.bidirectional_classes.values());

    generator.append(R"~~~(
-struct SpecialCasing {
-    u32 code_point { 0 };
-
-    u32 lowercase_mapping[@special_casing_mapping_size@];
-    u32 lowercase_mapping_size { 0 };
-
-    u32 uppercase_mapping[@special_casing_mapping_size@];
-    u32 uppercase_mapping_size { 0 };
-
-    u32 titlecase_mapping[@special_casing_mapping_size@];
-    u32 titlecase_mapping_size { 0 };
-
-    Locale locale { Locale::None };
-    Condition condition { Condition::None };
-};
-
-struct CaseFolding {
-    u32 code_point { 0 };
-    CaseFoldingStatus status { CaseFoldingStatus::Common };
-
-    u32 mapping[@case_folding_mapping_size@];
-    u32 mapping_size { 0 };
-};
-
-Optional<Locale> locale_from_string(StringView locale);
-
-ReadonlySpan<SpecialCasing> special_case_mapping(u32 code_point);
-ReadonlySpan<CaseFolding> case_folding_mapping(u32 code_point);
-
 }
 )~~~");

@ -672,9 +457,6 @@ static ErrorOr<void> generate_unicode_data_implementation(Core::InputBufferedFil
    StringBuilder builder;
    SourceGenerator generator { builder };

-    generator.set("special_casing_size", ByteString::number(unicode_data.special_casing.size()));
-    generator.set("case_folding_size", ByteString::number(unicode_data.case_folding.size()));
-
    generator.set("CODE_POINT_TABLES_LSB_COUNT", TRY(String::number(CODE_POINT_TABLES_LSB_COUNT)));
    generator.set("CODE_POINT_TABLES_LSB_MASK", TRY(String::formatted("{:#x}", CODE_POINT_TABLES_LSB_MASK)));

@ -693,83 +475,7 @@ static ErrorOr<void> generate_unicode_data_implementation(Core::InputBufferedFil
 namespace Unicode {
 )~~~");

-    auto append_list_and_size = [&](auto const& list, StringView format) {
-        if (list.is_empty()) {
-            generator.append(", {}, 0");
-            return;
-        }
-
-        bool first = true;
-        generator.append(", {");
-        for (auto const& item : list) {
-            generator.append(first ? " "sv : ", "sv);
-            generator.append(ByteString::formatted(format, item));
-            first = false;
-        }
-        generator.append(ByteString::formatted(" }}, {}", list.size()));
-    };
-
    generator.append(R"~~~(
-static constexpr Array<SpecialCasing, @special_casing_size@> s_special_case { {)~~~");
-
-    for (auto const& casing : unicode_data.special_casing) {
-        generator.set("code_point", ByteString::formatted("{:#x}", casing.code_point));
-        generator.append(R"~~~(
-    { @code_point@)~~~");
-
-        constexpr auto format = "{:#x}"sv;
-        append_list_and_size(casing.lowercase_mapping, format);
-        append_list_and_size(casing.uppercase_mapping, format);
-        append_list_and_size(casing.titlecase_mapping, format);
-
-        generator.set("locale", casing.locale.is_empty() ? "None" : casing.locale);
-        generator.append(", Locale::@locale@");
-
-        generator.set("condition", casing.condition.is_empty() ? "None" : casing.condition);
-        generator.append(", Condition::@condition@");
-
-        generator.append(" },");
-    }
-
-    generator.append(R"~~~(
-} };
-
-static constexpr Array<CaseFolding, @case_folding_size@> s_case_folding { {)~~~");
-
-    for (auto const& folding : unicode_data.case_folding) {
-        generator.set("code_point", ByteString::formatted("{:#x}", folding.code_point));
-        generator.set("status", folding.status);
-        generator.append(R"~~~(
-    { @code_point@, CaseFoldingStatus::@status@)~~~");
-
-        append_list_and_size(folding.mapping, "{:#x}"sv);
-        generator.append(" },");
-    }
-
-    generator.append(R"~~~(
-} };
-
-struct CasingTable {
-    u8 canonical_combining_class { 0 };
-    i32 simple_uppercase_mapping { -1 };
-    i32 simple_lowercase_mapping { -1 };
-    i32 simple_titlecase_mapping { -1 };
-
-    u32 special_casing_start_index { 0 };
-    u32 special_casing_size { 0 };
-
-    u32 case_folding_start_index { 0 };
-    u32 case_folding_size { 0 };
-};
-
-template<typename MappingType>
-struct CodePointComparator {
-    constexpr int operator()(u32 code_point, MappingType const& mapping)
-    {
-        return code_point - mapping.code_point;
-    }
-};
-
 struct BidiClassData {
    CodePointRange code_point_range {};
    BidirectionalClass bidi_class {};
@ -784,45 +490,6 @@ struct CodePointBidiClassComparator : public CodePointRangeComparator {

 )~~~");

-    auto append_casing_table = [&](auto collection_snake, auto const& unique_properties) -> ErrorOr<void> {
-        generator.set("name", TRY(String::formatted("{}_unique_properties", collection_snake)));
-        generator.set("size", TRY(String::number(unique_properties.size())));
-
-        auto optional_code_point_to_string = [](auto const& code_point) -> ErrorOr<String> {
-            if (!code_point.has_value())
-                return "-1"_string;
-            return String::number(*code_point);
-        };
-        auto first_index_to_string = [](auto const& list) -> ErrorOr<String> {
-            if (list.is_empty())
-                return "0"_string;
-            return String::number(list.first());
-        };
-
-        generator.append(R"~~~(
-static constexpr Array<CasingTable, @size@> @name@ { {)~~~");
-
-        for (auto const& casing : unique_properties) {
-            generator.set("canonical_combining_class", TRY(String::number(casing.canonical_combining_class)));
-            generator.set("simple_uppercase_mapping", TRY(optional_code_point_to_string(casing.simple_uppercase_mapping)));
-            generator.set("simple_lowercase_mapping", TRY(optional_code_point_to_string(casing.simple_lowercase_mapping)));
-            generator.set("simple_titlecase_mapping", TRY(optional_code_point_to_string(casing.simple_titlecase_mapping)));
-            generator.set("special_casing_start_index", TRY(first_index_to_string(casing.special_casing_indices)));
-            generator.set("special_casing_size", TRY(String::number(casing.special_casing_indices.size())));
-            generator.set("case_folding_start_index", TRY(first_index_to_string(casing.case_folding_indices)));
-            generator.set("case_folding_size", TRY(String::number(casing.case_folding_indices.size())));
-
-            generator.append(R"~~~(
-    { @canonical_combining_class@, @simple_uppercase_mapping@, @simple_lowercase_mapping@, @simple_titlecase_mapping@, @special_casing_start_index@, @special_casing_size@, @case_folding_start_index@, @case_folding_size@ },)~~~");
-        }
-
-        generator.append(R"~~~(
-} };
-)~~~");
-
-        return {};
-    };
-
    auto append_property_table = [&](auto collection_snake, auto const& unique_properties) -> ErrorOr<void> {
        generator.set("name", TRY(String::formatted("{}_unique_properties", collection_snake)));
        generator.set("outer_size", TRY(String::number(unique_properties.size())));
@ -889,7 +556,6 @@ static constexpr Array<@type@, @size@> @name@ { {
        return {};
    };

-    TRY(append_code_point_tables("s_casings"sv, unicode_data.casing_tables, append_casing_table));
    TRY(append_code_point_tables("s_general_categories"sv, unicode_data.general_category_tables, append_property_table));
    TRY(append_code_point_tables("s_properties"sv, unicode_data.property_tables, append_property_table));
    TRY(append_code_point_tables("s_scripts"sv, unicode_data.script_tables, append_property_table));
@ -926,64 +592,6 @@ static constexpr Array<BidiClassData, @size@> s_bidirectional_classes { {
    }

    generator.append(R"~~~(
-static CasingTable const& casing_table_for_code_point(u32 code_point)
-{
-    auto stage1_index = code_point >> @CODE_POINT_TABLES_LSB_COUNT@;
-    auto stage2_index = s_casings_stage1[stage1_index] + (code_point & @CODE_POINT_TABLES_LSB_MASK@);
-    auto unique_properties_index = s_casings_stage2[stage2_index];
-
-    return s_casings_unique_properties[unique_properties_index];
-}
-)~~~");
-
-    auto append_code_point_mapping_search = [&](StringView method, StringView mapping, Optional<StringView> const& fallback = {}) {
-        generator.set("method", method);
-        generator.set("mapping", mapping);
-        generator.append(R"~~~(
-u32 @method@(u32 code_point)
-{
-    auto const& casing_table = casing_table_for_code_point(code_point);
-    auto mapping = casing_table.@mapping@;
-)~~~");
-
-        if (fallback.has_value()) {
-            generator.set("fallback", *fallback);
-            generator.append(R"~~~(
-    return mapping == -1 ? @fallback@ : static_cast<u32>(mapping);)~~~");
-        } else {
-            generator.append(R"~~~(
-    return mapping;)~~~");
-        }
-
-        generator.append(R"~~~(
-}
-)~~~");
-    };
-
-    append_code_point_mapping_search("canonical_combining_class"sv, "canonical_combining_class"sv);
-    append_code_point_mapping_search("to_unicode_uppercase"sv, "simple_uppercase_mapping"sv, "code_point"sv);
-    append_code_point_mapping_search("to_unicode_lowercase"sv, "simple_lowercase_mapping"sv, "code_point"sv);
-    append_code_point_mapping_search("to_unicode_titlecase"sv, "simple_titlecase_mapping"sv, "code_point"sv);
-
-    generator.append(R"~~~(
-ReadonlySpan<SpecialCasing> special_case_mapping(u32 code_point)
-{
-    auto const& casing_table = casing_table_for_code_point(code_point);
-    if (casing_table.special_casing_size == 0)
-        return {};
-
-    return s_special_case.span().slice(casing_table.special_casing_start_index, casing_table.special_casing_size);
-}
-
-ReadonlySpan<CaseFolding> case_folding_mapping(u32 code_point)
-{
-    auto const& casing_table = casing_table_for_code_point(code_point);
-    if (casing_table.case_folding_size == 0)
-        return {};
-
-    return s_case_folding.span().slice(casing_table.case_folding_start_index, casing_table.case_folding_size);
-}
-
 Optional<BidirectionalClass> bidirectional_class(u32 code_point)
 {
    if (auto const* entry = binary_search(s_bidirectional_classes, code_point, nullptr, CodePointBidiClassComparator {}))
@ -1036,8 +644,6 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
        return {};
    };

-    TRY(append_from_string("Locale"sv, "locale"sv, unicode_data.locales, {}));
-
    TRY(append_prop_search("GeneralCategory"sv, "general_category"sv, "s_general_categories"sv));
    TRY(append_from_string("GeneralCategory"sv, "general_category"sv, unicode_data.general_categories, unicode_data.general_category_aliases));

@ -1188,22 +794,6 @@ static ErrorOr<void> normalize_script_extensions(PropList& script_extensions, Pr
    return {};
 }

-struct CasingMetadata {
-    using ConstIterator = typename Vector<CodePointData>::ConstIterator;
-
-    CasingMetadata(Vector<CodePointData> const& code_point_data)
-        : iterator(code_point_data.begin())
-        , end(code_point_data.end())
-    {
-    }
-
-    ConstIterator iterator;
-    ConstIterator const end;
-
-    Vector<size_t> current_block;
-    HashMap<decltype(current_block), size_t> unique_blocks;
-};
-
 struct PropertyMetadata {
    static ErrorOr<PropertyMetadata> create(PropList& property_list)
    {
@ -1301,25 +891,6 @@ static ErrorOr<void> update_tables(u32 code_point, CodePointTables<T>& tables, a

 static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
 {
-    auto update_casing_tables = [&]<typename T>(u32 code_point, CodePointTables<T>& tables, CasingMetadata& metadata) -> ErrorOr<void> {
-        CasingTable casing {};
-
-        while (metadata.iterator != metadata.end) {
-            if (code_point < metadata.iterator->code_point)
-                break;
-
-            if (code_point == metadata.iterator->code_point) {
-                casing = move(metadata.iterator->casing);
-                break;
-            }
-
-            ++metadata.iterator;
-        }
-
-        TRY(update_tables(code_point, tables, metadata, casing));
-        return {};
-    };
-
    auto update_property_tables = [&]<typename T>(u32 code_point, CodePointTables<T>& tables, PropertyMetadata& metadata) -> ErrorOr<void> {
        static Unicode::CodePointRangeComparator comparator {};

@ -1346,7 +917,6 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
        return {};
    };

-    CasingMetadata casing_metadata { unicode_data.code_point_data };
    auto general_category_metadata = TRY(PropertyMetadata::create(unicode_data.general_categories));
    auto property_metadata = TRY(PropertyMetadata::create(unicode_data.prop_list));
    auto script_metadata = TRY(PropertyMetadata::create(unicode_data.script_list));
@ -1356,7 +926,6 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
    auto sentence_break_metadata = TRY(PropertyMetadata::create(unicode_data.sentence_break_props));

    for (u32 code_point = 0; code_point <= MAX_CODE_POINT; ++code_point) {
-        TRY(update_casing_tables(code_point, unicode_data.casing_tables, casing_metadata));
        TRY(update_property_tables(code_point, unicode_data.general_category_tables, general_category_metadata));
        TRY(update_property_tables(code_point, unicode_data.property_tables, property_metadata));
        TRY(update_property_tables(code_point, unicode_data.script_tables, script_metadata));
@ -1374,8 +943,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
    StringView generated_header_path;
    StringView generated_implementation_path;
    StringView unicode_data_path;
-    StringView special_casing_path;
-    StringView case_folding_path;
    StringView derived_general_category_path;
    StringView prop_list_path;
    StringView derived_core_prop_path;
@ -1394,8 +961,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
    args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
    args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
    args_parser.add_option(unicode_data_path, "Path to UnicodeData.txt file", "unicode-data-path", 'u', "unicode-data-path");
-    args_parser.add_option(special_casing_path, "Path to SpecialCasing.txt file", "special-casing-path", 's', "special-casing-path");
-    args_parser.add_option(case_folding_path, "Path to CaseFolding.txt file", "case-folding-path", 'o', "case-folding-path");
    args_parser.add_option(derived_general_category_path, "Path to DerivedGeneralCategory.txt file", "derived-general-category-path", 'g', "derived-general-category-path");
    args_parser.add_option(prop_list_path, "Path to PropList.txt file", "prop-list-path", 'p', "prop-list-path");
    args_parser.add_option(derived_core_prop_path, "Path to DerivedCoreProperties.txt file", "derived-core-prop-path", 'd', "derived-core-prop-path");
@ -1415,8 +980,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
    auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
    auto unicode_data_file = TRY(open_file(unicode_data_path, Core::File::OpenMode::Read));
    auto derived_general_category_file = TRY(open_file(derived_general_category_path, Core::File::OpenMode::Read));
-    auto special_casing_file = TRY(open_file(special_casing_path, Core::File::OpenMode::Read));
-    auto case_folding_file = TRY(open_file(case_folding_path, Core::File::OpenMode::Read));
    auto prop_list_file = TRY(open_file(prop_list_path, Core::File::OpenMode::Read));
    auto derived_core_prop_file = TRY(open_file(derived_core_prop_path, Core::File::OpenMode::Read));
    auto derived_binary_prop_file = TRY(open_file(derived_binary_prop_path, Core::File::OpenMode::Read));
@ -1431,8 +994,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
    auto sentence_break_file = TRY(open_file(sentence_break_path, Core::File::OpenMode::Read));

    UnicodeData unicode_data {};
-    TRY(parse_special_casing(*special_casing_file, unicode_data));
-    TRY(parse_case_folding(*case_folding_file, unicode_data));
    TRY(parse_prop_list(*derived_general_category_file, unicode_data.general_categories));
    TRY(parse_prop_list(*prop_list_file, unicode_data.prop_list));
    TRY(parse_prop_list(*derived_core_prop_file, unicode_data.prop_list));
--- a/Tests/AK/TestString.cpp
+++ b/Tests/AK/TestString.cpp
@ -592,7 +592,6 @@ TEST_CASE(to_titlecase)
    EXPECT_EQ(MUST("foo bar baz"_string.to_titlecase()), "Foo Bar Baz"sv);
    EXPECT_EQ(MUST("foo \n \r bar \t baz"_string.to_titlecase()), "Foo \n \r Bar \t Baz"sv);
    EXPECT_EQ(MUST("f\"oo\" b'ar'"_string.to_titlecase()), "F\"Oo\" B'ar'"sv);
-    EXPECT_EQ(MUST("123dollars"_string.to_titlecase()), "123Dollars"sv);
 }

 TEST_CASE(to_casefold)
--- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
+++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
@ -8,82 +8,6 @@

 #include <AK/StringView.h>
 #include <LibUnicode/CharacterTypes.h>
-#include <ctype.h>
-
-static void compare_to_ascii(auto& old_function, auto& new_function)
-{
-    i64 result1 = 0;
-    i64 result2 = 0;
-
-    for (u32 i = 0; i < 0x80; ++i) {
-        EXPECT_EQ(result1 = old_function(i), result2 = new_function(i));
-        if (result1 != result2)
-            dbgln("Function input value was {}.", i);
-    }
-}
-
-TEST_CASE(to_unicode_lowercase)
-{
-    compare_to_ascii(tolower, Unicode::to_unicode_lowercase);
-
-    EXPECT_EQ(Unicode::to_unicode_lowercase(0x03c9u), 0x03c9u); // "ω" to "ω"
-    EXPECT_EQ(Unicode::to_unicode_lowercase(0x03a9u), 0x03c9u); // "Ω" to "ω"
-
-    // Code points encoded by ranges in UnicodeData.txt
-    EXPECT_EQ(Unicode::to_unicode_lowercase(0x3400u), 0x3400u);
-    EXPECT_EQ(Unicode::to_unicode_lowercase(0x3401u), 0x3401u);
-    EXPECT_EQ(Unicode::to_unicode_lowercase(0x3402u), 0x3402u);
-    EXPECT_EQ(Unicode::to_unicode_lowercase(0x4dbfu), 0x4dbfu);
-}
-
-TEST_CASE(to_unicode_uppercase)
-{
-    compare_to_ascii(toupper, Unicode::to_unicode_uppercase);
-
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x03c9u), 0x03a9u); // "ω" to "Ω"
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
-
-    // Code points encoded by ranges in UnicodeData.txt
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x3400u), 0x3400u);
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u);
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u);
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu);
-
-    // Code points whose uppercase and titlecase mappings actually differ.
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c6u), 0x01c4u); // "ǆ" to "Ǆ"
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c9u), 0x01c7u); // "ǉ" to "Ǉ"
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x01ccu), 0x01cau); // "ǌ" to "Ǌ"
-    EXPECT_EQ(Unicode::to_unicode_uppercase(0x01f3u), 0x01f1u); // "ǳ" to "Ǳ"
-}
-
-TEST_CASE(to_unicode_titlecase)
-{
-    compare_to_ascii(toupper, Unicode::to_unicode_titlecase);
-
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x03c9u), 0x03a9u); // "ω" to "Ω"
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
-
-    // Code points encoded by ranges in UnicodeData.txt
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x3400u), 0x3400u);
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x3401u), 0x3401u);
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x3402u), 0x3402u);
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x4dbfu), 0x4dbfu);
-
-    // Code points whose uppercase and titlecase mappings actually differ.
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c6u), 0x01c5u); // "ǆ" to "ǅ"
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "ǉ" to "ǈ"
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "ǌ" to "ǋ"
-    EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "ǳ" to "ǲ"
-}
-
-BENCHMARK_CASE(casing)
-{
-    for (size_t i = 0; i < 50'000; ++i) {
-        __test_to_unicode_lowercase();
-        __test_to_unicode_uppercase();
-        __test_to_unicode_titlecase();
-    }
-}

 TEST_CASE(general_category)
 {
--- a/Userland/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp
@ -713,10 +713,14 @@ ALWAYS_INLINE void OpCode_Compare::compare_char(MatchInput const& input, MatchSt

    bool equal;
    if (input.regex_options & AllFlags::Insensitive) {
-        if (input.view.unicode())
-            equal = Unicode::equals_ignoring_case(Utf32View { &input_view, 1 }, Utf32View { &ch1, 1 });
-        else
+        if (input.view.unicode()) {
+            auto lhs = String::from_code_point(input_view);
+            auto rhs = String::from_code_point(ch1);
+
+            equal = lhs.equals_ignoring_case(rhs);
+        } else {
            equal = to_ascii_lowercase(input_view) == to_ascii_lowercase(ch1);
+        }
    } else {
        equal = input_view == ch1;
    }
--- a/Userland/Libraries/LibUnicode/CMakeLists.txt
+++ b/Userland/Libraries/LibUnicode/CMakeLists.txt
@ -8,7 +8,6 @@ set(SOURCES
    Normalize.cpp
    Segmentation.cpp
    String.cpp
-    UnicodeUtils.cpp
    ${UNICODE_DATA_SOURCES}
 )
 set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@ -1,19 +1,10 @@
 /*
- * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

-#include <AK/CharacterTypes.h>
-#include <AK/Platform.h>
-#include <AK/ScopeGuard.h>
-#include <AK/StringBuilder.h>
-#include <AK/Types.h>
-#include <AK/Utf16View.h>
-#include <AK/Utf32View.h>
-#include <AK/Utf8View.h>
 #include <LibUnicode/CharacterTypes.h>
-#include <LibUnicode/UnicodeUtils.h>

 #if ENABLE_UNICODE_DATA
 #    include <LibUnicode/UnicodeData.h>
@ -21,129 +12,6 @@

 namespace Unicode {

-u32 __attribute__((weak)) canonical_combining_class(u32) { return {}; }
-
-u32 __attribute__((weak)) to_unicode_lowercase(u32 code_point)
-{
-    return to_ascii_lowercase(code_point);
-}
-
-u32 __attribute__((weak)) to_unicode_uppercase(u32 code_point)
-{
-    return to_ascii_uppercase(code_point);
-}
-
-u32 __attribute__((weak)) to_unicode_titlecase(u32 code_point)
-{
-    return to_ascii_uppercase(code_point);
-}
-
-template<typename ViewType>
-class CasefoldStringComparator {
-public:
-    explicit CasefoldStringComparator(ViewType string)
-        : m_string(string)
-        , m_it(m_string.begin())
-    {
-    }
-
-    bool has_more_data() const
-    {
-        return !m_casefolded_code_points.is_empty() || (m_it != m_string.end());
-    }
-
-    size_t index() const
-    {
-        if constexpr (IsSame<ViewType, Utf8View>)
-            return m_string.byte_offset_of(m_it);
-        else if constexpr (IsSame<ViewType, Utf16View>)
-            return m_string.code_unit_offset_of(m_it);
-        else if constexpr (IsSame<ViewType, Utf32View>)
-            return m_string.iterator_offset(m_it);
-        else
-            static_assert(DependentFalse<ViewType>);
-    }
-
-    u32 next_code_point()
-    {
-        VERIFY(has_more_data());
-
-        if (m_casefolded_code_points.is_empty()) {
-            m_current_code_point = *m_it;
-            ++m_it;
-
-            m_casefolded_code_points = Unicode::Detail::casefold_code_point(m_current_code_point);
-            VERIFY(!m_casefolded_code_points.is_empty()); // Must at least contain the provided code point.
-        }
-
-        auto code_point = m_casefolded_code_points[0];
-        m_casefolded_code_points = m_casefolded_code_points.substring_view(1);
-
-        return code_point;
-    }
-
-private:
-    ViewType m_string;
-    typename ViewType::Iterator m_it;
-
-    u32 m_current_code_point { 0 };
-    Utf32View m_casefolded_code_points;
-};
-
-// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145
-template<typename ViewType>
-bool equals_ignoring_case(ViewType lhs, ViewType rhs)
-{
-    // A string X is a caseless match for a string Y if and only if:
-    //     toCasefold(X) = toCasefold(Y)
-
-    CasefoldStringComparator lhs_comparator { lhs };
-    CasefoldStringComparator rhs_comparator { rhs };
-
-    while (lhs_comparator.has_more_data() && rhs_comparator.has_more_data()) {
-        if (lhs_comparator.next_code_point() != rhs_comparator.next_code_point())
-            return false;
-    }
-
-    return !lhs_comparator.has_more_data() && !rhs_comparator.has_more_data();
-}
-
-template bool equals_ignoring_case(Utf8View, Utf8View);
-template bool equals_ignoring_case(Utf16View, Utf16View);
-template bool equals_ignoring_case(Utf32View, Utf32View);
-
-template<typename ViewType>
-Optional<size_t> find_ignoring_case(ViewType lhs, ViewType rhs)
-{
-    CasefoldStringComparator lhs_comparator { lhs };
-
-    while (lhs_comparator.has_more_data()) {
-        CasefoldStringComparator rhs_comparator { rhs };
-
-        auto saved_state = lhs_comparator;
-        auto matches = true;
-
-        while (lhs_comparator.has_more_data() && rhs_comparator.has_more_data()) {
-            if (lhs_comparator.next_code_point() != rhs_comparator.next_code_point()) {
-                matches = false;
-                break;
-            }
-        }
-
-        if (matches && !rhs_comparator.has_more_data())
-            return saved_state.index();
-
-        lhs_comparator = move(saved_state);
-        lhs_comparator.next_code_point();
-    }
-
-    return {};
-}
-
-template Optional<size_t> find_ignoring_case(Utf8View, Utf8View);
-template Optional<size_t> find_ignoring_case(Utf16View, Utf16View);
-template Optional<size_t> find_ignoring_case(Utf32View, Utf32View);
-
 Optional<GeneralCategory> __attribute__((weak)) general_category_from_string(StringView) { return {}; }
 bool __attribute__((weak)) code_point_has_general_category(u32, GeneralCategory) { return {}; }
 Optional<Property> __attribute__((weak)) property_from_string(StringView) { return {}; }
--- a/Userland/Libraries/LibUnicode/CharacterTypes.h
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.h
@ -1,18 +1,15 @@
 /*
- * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

 #pragma once

-#include <AK/ByteString.h>
 #include <AK/Forward.h>
 #include <AK/Optional.h>
-#include <AK/Span.h>
-#include <AK/String.h>
+#include <AK/StringView.h>
 #include <AK/Types.h>
-#include <AK/Vector.h>
 #include <LibUnicode/Forward.h>

 namespace Unicode {
@ -29,20 +26,6 @@ struct CodePointRangeComparator {
    }
 };

-u32 canonical_combining_class(u32 code_point);
-
-// Note: The single code point case conversions only perform simple case folding.
-// Use the full-string transformations for full case folding.
-u32 to_unicode_lowercase(u32 code_point);
-u32 to_unicode_uppercase(u32 code_point);
-u32 to_unicode_titlecase(u32 code_point);
-
-template<typename ViewType>
-bool equals_ignoring_case(ViewType, ViewType);
-
-template<typename ViewType>
-Optional<size_t> find_ignoring_case(ViewType, ViewType);
-
 Optional<GeneralCategory> general_category_from_string(StringView);
 bool code_point_has_general_category(u32 code_point, GeneralCategory general_category);

--- a/Userland/Libraries/LibUnicode/Forward.h
+++ b/Userland/Libraries/LibUnicode/Forward.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */
@ -11,7 +11,6 @@
 namespace Unicode {

 enum class BidirectionalClass : u8;
-enum class Block : u16;
 enum class EmojiGroup : u8;
 enum class GeneralCategory : u8;
 enum class GraphemeBreakProperty : u8;
@ -20,9 +19,7 @@ enum class Script : u8;
 enum class SentenceBreakProperty : u8;
 enum class WordBreakProperty : u8;

-struct CodePointDecomposition;
 struct CurrencyCode;
 struct Emoji;
-struct SpecialCasing;

 }
--- a/Userland/Libraries/LibUnicode/String.cpp
+++ b/Userland/Libraries/LibUnicode/String.cpp
@ -1,57 +1,141 @@
 /*
- * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2023-2024, Tim Flynn <trflynn89@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

+#define AK_DONT_REPLACE_STD
+
 #include <AK/String.h>
 #include <AK/StringBuilder.h>
-#include <AK/Utf8View.h>
-#include <LibUnicode/CharacterTypes.h>
-#include <LibUnicode/UnicodeUtils.h>
+#include <LibLocale/ICU.h>
+
+#include <unicode/bytestream.h>
+#include <unicode/casemap.h>
+#include <unicode/stringoptions.h>

 // This file contains definitions of AK::String methods which require UCD data.

 namespace AK {

+struct ResolvedLocale {
+    ByteString buffer;
+    char const* locale { nullptr };
+};
+
+static ResolvedLocale resolve_locale(Optional<StringView> const& locale)
+{
+    if (!locale.has_value())
+        return {};
+
+    ResolvedLocale resolved_locale;
+    resolved_locale.buffer = *locale;
+    resolved_locale.locale = resolved_locale.buffer.characters();
+
+    return resolved_locale;
+}
+
 ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const
 {
-    StringBuilder builder;
-    TRY(Unicode::Detail::build_lowercase_string(code_points(), builder, locale));
+    UErrorCode status = U_ZERO_ERROR;
+
+    StringBuilder builder { bytes_as_string_view().length() };
+    icu::StringByteSink sink { &builder };
+
+    auto resolved_locale = resolve_locale(locale);
+
+    icu::CaseMap::utf8ToLower(resolved_locale.locale, 0, Locale::icu_string_piece(*this), sink, nullptr, status);
+    if (Locale::icu_failure(status))
+        return Error::from_string_literal("Unable to convert string to lowercase");
+
    return builder.to_string_without_validation();
 }

 ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const
 {
-    StringBuilder builder;
-    TRY(Unicode::Detail::build_uppercase_string(code_points(), builder, locale));
+    UErrorCode status = U_ZERO_ERROR;
+
+    StringBuilder builder { bytes_as_string_view().length() };
+    icu::StringByteSink sink { &builder };
+
+    auto resolved_locale = resolve_locale(locale);
+
+    icu::CaseMap::utf8ToUpper(resolved_locale.locale, 0, Locale::icu_string_piece(*this), sink, nullptr, status);
+    if (Locale::icu_failure(status))
+        return Error::from_string_literal("Unable to convert string to uppercase");
+
    return builder.to_string_without_validation();
 }

 ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation) const
 {
-    StringBuilder builder;
-    TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale, trailing_code_point_transformation));
+    UErrorCode status = U_ZERO_ERROR;
+
+    StringBuilder builder { bytes_as_string_view().length() };
+    icu::StringByteSink sink { &builder };
+
+    auto resolved_locale = resolve_locale(locale);
+
+    u32 options = 0;
+    if (trailing_code_point_transformation == TrailingCodePointTransformation::PreserveExisting)
+        options |= U_TITLECASE_NO_LOWERCASE;
+
+    icu::CaseMap::utf8ToTitle(resolved_locale.locale, options, nullptr, Locale::icu_string_piece(*this), sink, nullptr, status);
+    if (Locale::icu_failure(status))
+        return Error::from_string_literal("Unable to convert string to titlecase");
+
    return builder.to_string_without_validation();
 }

+static ErrorOr<void> build_casefold_string(StringView string, StringBuilder& builder)
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    icu::StringByteSink sink { &builder };
+
+    icu::CaseMap::utf8Fold(0, Locale::icu_string_piece(string), sink, nullptr, status);
+    if (Locale::icu_failure(status))
+        return Error::from_string_literal("Unable to casefold string");
+
+    return {};
+}
+
 ErrorOr<String> String::to_casefold() const
 {
-    StringBuilder builder;
-    TRY(Unicode::Detail::build_casefold_string(code_points(), builder));
+    StringBuilder builder { bytes_as_string_view().length() };
+    TRY(build_casefold_string(*this, builder));
+
    return builder.to_string_without_validation();
 }

 bool String::equals_ignoring_case(String const& other) const
 {
-    return Unicode::equals_ignoring_case(code_points(), other.code_points());
+    StringBuilder lhs_builder { bytes_as_string_view().length() };
+    if (build_casefold_string(*this, lhs_builder).is_error())
+        return false;
+
+    StringBuilder rhs_builder { other.bytes_as_string_view().length() };
+    if (build_casefold_string(other, rhs_builder).is_error())
+        return false;
+
+    return lhs_builder.string_view() == rhs_builder.string_view();
 }

 Optional<size_t> String::find_byte_offset_ignoring_case(StringView needle, size_t from_byte_offset) const
 {
-    auto haystack = code_points().substring_view(from_byte_offset);
+    auto haystack = bytes_as_string_view().substring_view(from_byte_offset);
+    if (haystack.is_empty())
+        return {};

-    if (auto index = Unicode::find_ignoring_case(haystack, Utf8View { needle }); index.has_value())
+    StringBuilder lhs_builder { haystack.length() };
+    if (build_casefold_string(haystack, lhs_builder).is_error())
+        return {};
+
+    StringBuilder rhs_builder { needle.length() };
+    if (build_casefold_string(needle, rhs_builder).is_error())
+        return false;
+
+    if (auto index = lhs_builder.string_view().find(rhs_builder.string_view()); index.has_value())
        return *index + from_byte_offset;

    return {};
--- a/Userland/Libraries/LibUnicode/UnicodeUtils.cpp
+++ b/Userland/Libraries/LibUnicode/UnicodeUtils.cpp
@ -1,368 +0,0 @@
-/*
- * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#include <AK/Platform.h>
-#include <AK/String.h>
-#include <AK/StringBuilder.h>
-#include <AK/Types.h>
-#include <LibUnicode/CharacterTypes.h>
-#include <LibUnicode/Segmentation.h>
-#include <LibUnicode/UnicodeUtils.h>
-
-#if ENABLE_UNICODE_DATA
-#    include <LibUnicode/UnicodeData.h>
-#endif
-
-// For details on the algorithms used here, see Section 3.13 Default Case Algorithms
-// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf
-
-namespace Unicode::Detail {
-
-#if ENABLE_UNICODE_DATA
-
-static bool is_after_uppercase_i(Utf8View const& string, size_t index)
-{
-    // There is an uppercase I before C, and there is no intervening combining character class 230 (Above) or 0.
-    auto preceding_view = string.substring_view(0, index);
-    bool found_uppercase_i = false;
-
-    // FIXME: Would be better if Utf8View supported reverse iteration.
-    for (auto code_point : preceding_view) {
-        if (code_point == 'I') {
-            found_uppercase_i = true;
-            continue;
-        }
-
-        auto combining_class = canonical_combining_class(code_point);
-        if (combining_class == 0 || combining_class == 230)
-            found_uppercase_i = false;
-    }
-
-    return found_uppercase_i;
-}
-
-static bool is_after_soft_dotted_code_point(Utf8View const& string, size_t index)
-{
-    // There is a Soft_Dotted character before C, with no intervening character of combining class 0 or 230 (Above).
-    auto preceding_view = string.substring_view(0, index);
-    bool found_soft_dotted_code_point = false;
-
-    // FIXME: Would be better if Utf8View supported reverse iteration.
-    for (auto code_point : preceding_view) {
-        if (code_point_has_property(code_point, Property::Soft_Dotted)) {
-            found_soft_dotted_code_point = true;
-            continue;
-        }
-
-        auto combining_class = canonical_combining_class(code_point);
-        if (combining_class == 0 || combining_class == 230)
-            found_soft_dotted_code_point = false;
-    }
-
-    return found_soft_dotted_code_point;
-}
-
-static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)
-{
-    // C is preceded by a sequence consisting of a cased letter and then zero or more case-ignorable
-    // characters, and C is not followed by a sequence consisting of zero or more case-ignorable
-    // characters and then a cased letter.
-    auto preceding_view = string.substring_view(0, index);
-    auto following_view = ((index + byte_length) < string.byte_length())
-        ? string.substring_view(index + byte_length)
-        : Utf8View {};
-
-    size_t cased_letter_count = 0;
-
-    for (auto code_point : preceding_view) {
-        bool is_cased = code_point_has_property(code_point, Property::Cased);
-        bool is_case_ignorable = code_point_has_property(code_point, Property::Case_Ignorable);
-
-        if (is_cased && !is_case_ignorable)
-            ++cased_letter_count;
-        else if (!is_case_ignorable)
-            cased_letter_count = 0;
-    }
-
-    if (cased_letter_count == 0)
-        return false;
-
-    for (auto code_point : following_view) {
-        bool is_cased = code_point_has_property(code_point, Property::Cased);
-        bool is_case_ignorable = code_point_has_property(code_point, Property::Case_Ignorable);
-
-        if (is_case_ignorable)
-            continue;
-        if (is_cased)
-            return false;
-
-        break;
-    }
-
-    return true;
-}
-
-static bool is_followed_by_combining_class_above(Utf8View const& string, size_t index, size_t byte_length)
-{
-    // C is followed by a character of combining class 230 (Above) with no intervening character of combining class 0 or 230 (Above).
-    auto following_view = ((index + byte_length) < string.byte_length())
-        ? string.substring_view(index + byte_length)
-        : Utf8View {};
-
-    for (auto code_point : following_view) {
-        u32 combining_class = canonical_combining_class(code_point);
-
-        if (combining_class == 0)
-            return false;
-        if (combining_class == 230)
-            return true;
-    }
-
-    return false;
-}
-
-static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t index, size_t byte_length)
-{
-    // C is followed by combining dot above (U+0307). Any sequence of characters with a combining class that is neither 0 nor 230 may
-    // intervene between the current character and the combining dot above.
-    auto following_view = ((index + byte_length) < string.byte_length())
-        ? string.substring_view(index + byte_length)
-        : Utf8View {};
-
-    for (auto code_point : following_view) {
-        if (code_point == 0x307)
-            return true;
-
-        u32 combining_class = canonical_combining_class(code_point);
-
-        if (combining_class == 0)
-            return false;
-        if (combining_class == 230)
-            return false;
-    }
-
-    return false;
-}
-
-static Optional<SpecialCasing const&> find_matching_special_case(u32 code_point, Utf8View const& string, Optional<StringView> locale, size_t index, size_t byte_length)
-{
-    auto requested_locale = Locale::None;
-
-    if (locale.has_value()) {
-        if (auto maybe_locale = locale_from_string(*locale); maybe_locale.has_value())
-            requested_locale = *maybe_locale;
-    }
-
-    auto special_casings = special_case_mapping(code_point);
-
-    for (auto const& special_casing : special_casings) {
-        if (special_casing.locale != Locale::None && special_casing.locale != requested_locale)
-            continue;
-
-        switch (special_casing.condition) {
-        case Condition::None:
-            return special_casing;
-
-        case Condition::AfterI:
-            if (is_after_uppercase_i(string, index))
-                return special_casing;
-            break;
-
-        case Condition::AfterSoftDotted:
-            if (is_after_soft_dotted_code_point(string, index))
-                return special_casing;
-            break;
-
-        case Condition::FinalSigma:
-            if (is_final_code_point(string, index, byte_length))
-                return special_casing;
-            break;
-
-        case Condition::MoreAbove:
-            if (is_followed_by_combining_class_above(string, index, byte_length))
-                return special_casing;
-            break;
-
-        case Condition::NotBeforeDot:
-            if (!is_followed_by_combining_dot_above(string, index, byte_length))
-                return special_casing;
-            break;
-        }
-    }
-
-    return {};
-}
-
-template<CaseFoldingStatus... StatusFilter>
-static Optional<CaseFolding const&> find_matching_case_folding(u32 code_point)
-{
-    auto case_foldings = case_folding_mapping(code_point);
-
-    for (auto const& case_folding : case_foldings) {
-        if (((case_folding.status == StatusFilter) || ...))
-            return case_folding;
-    }
-
-    return {};
-}
-
-#endif
-
-// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
-ErrorOr<void> build_lowercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
-{
-#if ENABLE_UNICODE_DATA
-    size_t index = 0;
-    size_t byte_length = 0;
-
-    for (auto it = code_points.begin(); it != code_points.end(); ++it, index += byte_length) {
-        u32 code_point = *it;
-        byte_length = it.underlying_code_point_length_in_bytes();
-
-        auto special_casing = find_matching_special_case(code_point, code_points, locale, index, byte_length);
-        if (!special_casing.has_value()) {
-            TRY(builder.try_append_code_point(to_unicode_lowercase(code_point)));
-            continue;
-        }
-
-        for (size_t i = 0; i < special_casing->lowercase_mapping_size; ++i)
-            TRY(builder.try_append_code_point(special_casing->lowercase_mapping[i]));
-    }
-
-    return {};
-#else
-    return Error::from_string_literal("Unicode data has been disabled");
-#endif
-}
-
-// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
-ErrorOr<void> build_uppercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
-{
-#if ENABLE_UNICODE_DATA
-    size_t index = 0;
-    size_t byte_length = 0;
-
-    for (auto it = code_points.begin(); it != code_points.end(); ++it, index += byte_length) {
-        u32 code_point = *it;
-        byte_length = it.underlying_code_point_length_in_bytes();
-
-        auto special_casing = find_matching_special_case(code_point, code_points, locale, index, byte_length);
-        if (!special_casing.has_value()) {
-            TRY(builder.try_append_code_point(to_unicode_uppercase(code_point)));
-            continue;
-        }
-
-        for (size_t i = 0; i < special_casing->uppercase_mapping_size; ++i)
-            TRY(builder.try_append_code_point(special_casing->uppercase_mapping[i]));
-    }
-
-    return {};
-#else
-    return Error::from_string_literal("Unicode data has been disabled");
-#endif
-}
-
-// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
-ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale, [[maybe_unused]] TrailingCodePointTransformation trailing_code_point_transformation)
-{
-#if ENABLE_UNICODE_DATA
-    // toTitlecase(X): Find the word boundaries in X according to Unicode Standard Annex #29,
-    // “Unicode Text Segmentation.” For each word boundary, find the first cased character F following
-    // the word boundary. If F exists, map F to Titlecase_Mapping(F); then map all characters C between
-    // F and the following word boundary to Lowercase_Mapping(C).
-
-    auto first_cased_code_point_after_boundary = [&](auto boundary, auto next_boundary) -> Optional<Utf8CodePointIterator> {
-        auto it = code_points.iterator_at_byte_offset_without_validation(boundary);
-        auto end = code_points.iterator_at_byte_offset_without_validation(next_boundary);
-
-        for (; it != end; ++it) {
-            if (code_point_has_property(*it, Property::Cased))
-                return it;
-        }
-
-        return {};
-    };
-
-    auto append_code_point_as_titlecase = [&](auto code_point, auto code_point_offset, auto code_point_length) -> ErrorOr<void> {
-        auto special_casing = find_matching_special_case(code_point, code_points, locale, code_point_offset, code_point_length);
-        if (!special_casing.has_value()) {
-            TRY(builder.try_append_code_point(to_unicode_titlecase(code_point)));
-            return {};
-        }
-
-        for (size_t i = 0; i < special_casing->titlecase_mapping_size; ++i)
-            TRY(builder.try_append_code_point(special_casing->titlecase_mapping[i]));
-        return {};
-    };
-
-    size_t boundary = 0;
-
-    while (true) {
-        auto next_boundary = next_word_segmentation_boundary(code_points, boundary);
-        if (!next_boundary.has_value())
-            break;
-
-        if (auto it = first_cased_code_point_after_boundary(boundary, *next_boundary); it.has_value()) {
-            auto code_point = *it.value();
-            auto code_point_offset = code_points.byte_offset_of(*it);
-            auto code_point_length = it->underlying_code_point_length_in_bytes();
-
-            auto caseless_code_points = code_points.substring_view(boundary, code_point_offset - boundary);
-            TRY(builder.try_append(caseless_code_points.as_string()));
-
-            TRY(append_code_point_as_titlecase(code_point, code_point_offset, code_point_length));
-            boundary = code_point_offset + code_point_length;
-        }
-
-        auto remaining_code_points = code_points.substring_view(boundary, *next_boundary - boundary);
-        switch (trailing_code_point_transformation) {
-        case TrailingCodePointTransformation::Lowercase:
-            TRY(build_lowercase_string(remaining_code_points, builder, locale));
-            break;
-        case TrailingCodePointTransformation::PreserveExisting:
-            TRY(builder.try_append(remaining_code_points.as_string()));
-            break;
-        }
-
-        boundary = *next_boundary;
-    }
-
-    return {};
-#else
-    return Error::from_string_literal("Unicode data has been disabled");
-#endif
-}
-
-// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
-ErrorOr<void> build_casefold_string(Utf8View code_points, StringBuilder& builder)
-{
-    // toCasefold(X): Map each character C in X to Case_Folding(C).
-    for (auto code_point : code_points) {
-        auto case_folding = casefold_code_point(code_point);
-        TRY(builder.try_append(case_folding));
-    }
-
-    return {};
-}
-
-// https://www.unicode.org/reports/tr44/#CaseFolding.txt
-// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
-Utf32View casefold_code_point(u32 const& code_point)
-{
-#if ENABLE_UNICODE_DATA
-    // Case_Folding(C) uses the mappings with the status field value “C” or “F” in the data file
-    // CaseFolding.txt in the Unicode Character Database.
-    using enum CaseFoldingStatus;
-
-    if (auto case_folding = find_matching_case_folding<Common, Full>(code_point); case_folding.has_value())
-        return Utf32View { case_folding->mapping, case_folding->mapping_size };
-#endif
-
-    // The case foldings are omitted in the data file if they are the same as the code point itself.
-    return Utf32View { &code_point, 1 };
-}
-
-}
--- a/Userland/Libraries/LibUnicode/UnicodeUtils.h
+++ b/Userland/Libraries/LibUnicode/UnicodeUtils.h
@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#pragma once
-
-#include <AK/Error.h>
-#include <AK/Forward.h>
-#include <AK/Optional.h>
-#include <AK/Utf32View.h>
-#include <AK/Utf8View.h>
-#include <LibUnicode/Forward.h>
-
-namespace Unicode::Detail {
-
-ErrorOr<void> build_lowercase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
-ErrorOr<void> build_uppercase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale);
-ErrorOr<void> build_titlecase_string(Utf8View code_points, StringBuilder& builder, Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation);
-ErrorOr<void> build_casefold_string(Utf8View code_points, StringBuilder& builder);
-Utf32View casefold_code_point(u32 const& code_point);
-
-}