ladybird/Tests/AK/TestString.cpp

/*
 * Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

// This is included first on purpose. We specifically do not want LibTest to override VERIFY here so
// that we can actually test that some String factory methods cause a crash with invalid input.
#include <AK/String.h>

#include <LibTest/TestCase.h>

#include <AK/StringBuilder.h>
#include <AK/Try.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>

TEST_CASE(construct_empty)
{
    String empty;
    EXPECT(empty.is_empty());
    EXPECT_EQ(empty.bytes().size(), 0u);

    auto empty2 = MUST(String::from_utf8(""sv));
    EXPECT(empty2.is_empty());
    EXPECT_EQ(empty, empty2);
    EXPECT_EQ(empty, ""sv);
}

TEST_CASE(move_assignment)
{
    String string1 = MUST(String::from_utf8("hello"sv));
    string1 = MUST(String::from_utf8("friends!"sv));
    EXPECT_EQ(string1, "friends!"sv);
}

TEST_CASE(short_strings)
{
#ifdef AK_ARCH_64_BIT
    auto string1 = MUST(String::from_utf8("abcdefg"sv));
    EXPECT_EQ(string1.is_short_string(), true);
    EXPECT_EQ(string1.bytes().size(), 7u);
    EXPECT_EQ(string1.bytes_as_string_view(), "abcdefg"sv);

    constexpr auto string2 = String::from_utf8_short_string("abcdefg"sv);
    EXPECT_EQ(string2.is_short_string(), true);
    EXPECT_EQ(string2.bytes().size(), 7u);
    EXPECT_EQ(string2, string1);
#else
    auto string1 = MUST(String::from_utf8("abc"sv));
    EXPECT_EQ(string1.is_short_string(), true);
    EXPECT_EQ(string1.bytes().size(), 3u);
    EXPECT_EQ(string1.bytes_as_string_view(), "abc"sv);

    constexpr auto string2 = String::from_utf8_short_string("abc"sv);
    EXPECT_EQ(string2.is_short_string(), true);
    EXPECT_EQ(string2.bytes().size(), 3u);
    EXPECT_EQ(string2, string1);
#endif
}

TEST_CASE(long_strings)
{
    auto string = MUST(String::from_utf8("abcdefgh"sv));
    EXPECT_EQ(string.is_short_string(), false);
    EXPECT_EQ(string.bytes().size(), 8u);
    EXPECT_EQ(string.bytes_as_string_view(), "abcdefgh"sv);
}

TEST_CASE(from_code_points)
{
    for (u32 code_point = 0; code_point < 0x80; ++code_point) {
        auto string = String::from_code_point(code_point);

        auto ch = static_cast<char>(code_point);
        StringView view { &ch, 1 };

        EXPECT_EQ(string, view);
    }

    auto string = String::from_code_point(0x10ffff);
    EXPECT_EQ(string, "\xF4\x8F\xBF\xBF"sv);

    EXPECT_CRASH("Creating a string from an invalid code point", [] {
        String::from_code_point(0xffffffff);
        return Test::Crash::Failure::DidNotCrash;
    });
}

TEST_CASE(substring)
{
    auto superstring = MUST(String::from_utf8("Hello I am a long string"sv));
    auto short_substring = MUST(superstring.substring_from_byte_offset(0, 5));
    EXPECT_EQ(short_substring, "Hello"sv);

    auto long_substring = MUST(superstring.substring_from_byte_offset(0, 10));
    EXPECT_EQ(long_substring, "Hello I am"sv);
}

TEST_CASE(code_points)
{
    auto string = MUST(String::from_utf8("🦬🪒"sv));

    Vector<u32> code_points;
    for (auto code_point : string.code_points())
        code_points.append(code_point);

    EXPECT_EQ(code_points[0], 0x1f9acu);
    EXPECT_EQ(code_points[1], 0x1fa92u);
}

TEST_CASE(string_builder)
{
    StringBuilder builder;
    builder.append_code_point(0x1f9acu);
    builder.append_code_point(0x1fa92u);

    auto string = MUST(builder.to_string());
    EXPECT_EQ(string, "🦬🪒"sv);
    EXPECT_EQ(string.bytes().size(), 8u);
}

TEST_CASE(ak_format)
{
    auto foo = MUST(String::formatted("Hello {}", MUST(String::from_utf8("friends"sv))));
    EXPECT_EQ(foo, "Hello friends"sv);
}

TEST_CASE(replace)
{
    {
        auto haystack = MUST(String::from_utf8("Hello enemies"sv));
        auto result = MUST(haystack.replace("enemies"sv, "friends"sv, ReplaceMode::All));
        EXPECT_EQ(result, "Hello friends"sv);
    }

    {
        auto base_title = MUST(String::from_utf8("anon@courage:~"sv));
        auto result = MUST(base_title.replace("[*]"sv, "(*)"sv, ReplaceMode::FirstOnly));
        EXPECT_EQ(result, "anon@courage:~"sv);
    }
}

TEST_CASE(reverse)
{
    auto test_reverse = [](auto test, auto expected) {
        auto string = MUST(String::from_utf8(test));
        auto result = MUST(string.reverse());

        EXPECT_EQ(result, expected);
    };

    test_reverse(""sv, ""sv);
    test_reverse("a"sv, "a"sv);
    test_reverse("ab"sv, "ba"sv);
    test_reverse("ab cd ef"sv, "fe dc ba"sv);
    test_reverse("😀"sv, "😀"sv);
    test_reverse("ab😀cd"sv, "dc😀ba"sv);
}

TEST_CASE(to_lowercase)
{
    {
        auto string = MUST(String::from_utf8("Aa"sv));
        auto result = MUST(string.to_lowercase());
        EXPECT_EQ(result, "aa"sv);
    }
    {
        auto string = MUST(String::from_utf8("Ωω"sv));
        auto result = MUST(string.to_lowercase());
        EXPECT_EQ(result, "ωω"sv);
    }
    {
        auto string = MUST(String::from_utf8("İi̇"sv));
        auto result = MUST(string.to_lowercase());
        EXPECT_EQ(result, "i̇i̇"sv);
    }
}

TEST_CASE(to_uppercase)
{
    {
        auto string = MUST(String::from_utf8("Aa"sv));
        auto result = MUST(string.to_uppercase());
        EXPECT_EQ(result, "AA"sv);
    }
    {
        auto string = MUST(String::from_utf8("Ωω"sv));
        auto result = MUST(string.to_uppercase());
        EXPECT_EQ(result, "ΩΩ"sv);
    }
    {
        auto string = MUST(String::from_utf8("ŉ"sv));
        auto result = MUST(string.to_uppercase());
        EXPECT_EQ(result, "ʼN"sv);
    }
}

TEST_CASE(to_titlecase)
{
    {
        auto string = MUST(String::from_utf8("foo bar baz"sv));
        auto result = MUST(string.to_titlecase());
        EXPECT_EQ(result, "Foo Bar Baz"sv);
    }
    {
        auto string = MUST(String::from_utf8("foo \n \r bar \t baz"sv));
        auto result = MUST(string.to_titlecase());
        EXPECT_EQ(result, "Foo \n \r Bar \t Baz"sv);
    }
    {
        auto string = MUST(String::from_utf8("f\"oo\" b'ar'"sv));
        auto result = MUST(string.to_titlecase());
        EXPECT_EQ(result, "F\"Oo\" B'Ar'"sv);
    }
    {
        auto string = MUST(String::from_utf8("123dollars"sv));
        auto result = MUST(string.to_titlecase());
        EXPECT_EQ(result, "123Dollars"sv);
    }
}

TEST_CASE(equals_ignoring_case)
{
    {
        String string1 {};
        String string2 {};

        EXPECT(MUST(string1.equals_ignoring_case(string2)));
    }
    {
        auto string1 = MUST(String::from_utf8("abcd"sv));
        auto string2 = MUST(String::from_utf8("ABCD"sv));
        auto string3 = MUST(String::from_utf8("AbCd"sv));
        auto string4 = MUST(String::from_utf8("dcba"sv));

        EXPECT(MUST(string1.equals_ignoring_case(string2)));
        EXPECT(MUST(string1.equals_ignoring_case(string3)));
        EXPECT(!MUST(string1.equals_ignoring_case(string4)));

        EXPECT(MUST(string2.equals_ignoring_case(string1)));
        EXPECT(MUST(string2.equals_ignoring_case(string3)));
        EXPECT(!MUST(string2.equals_ignoring_case(string4)));

        EXPECT(MUST(string3.equals_ignoring_case(string1)));
        EXPECT(MUST(string3.equals_ignoring_case(string2)));
        EXPECT(!MUST(string3.equals_ignoring_case(string4)));
    }
    {
        auto string1 = MUST(String::from_utf8("\u00DF"sv)); // LATIN SMALL LETTER SHARP S
        auto string2 = MUST(String::from_utf8("SS"sv));
        auto string3 = MUST(String::from_utf8("Ss"sv));
        auto string4 = MUST(String::from_utf8("ss"sv));
        auto string5 = MUST(String::from_utf8("S"sv));
        auto string6 = MUST(String::from_utf8("s"sv));

        EXPECT(MUST(string1.equals_ignoring_case(string2)));
        EXPECT(MUST(string1.equals_ignoring_case(string3)));
        EXPECT(MUST(string1.equals_ignoring_case(string4)));
        EXPECT(!MUST(string1.equals_ignoring_case(string5)));
        EXPECT(!MUST(string1.equals_ignoring_case(string6)));

        EXPECT(MUST(string2.equals_ignoring_case(string1)));
        EXPECT(MUST(string2.equals_ignoring_case(string3)));
        EXPECT(MUST(string2.equals_ignoring_case(string4)));
        EXPECT(!MUST(string2.equals_ignoring_case(string5)));
        EXPECT(!MUST(string2.equals_ignoring_case(string6)));

        EXPECT(MUST(string3.equals_ignoring_case(string1)));
        EXPECT(MUST(string3.equals_ignoring_case(string2)));
        EXPECT(MUST(string3.equals_ignoring_case(string4)));
        EXPECT(!MUST(string3.equals_ignoring_case(string5)));
        EXPECT(!MUST(string3.equals_ignoring_case(string6)));

        EXPECT(MUST(string4.equals_ignoring_case(string1)));
        EXPECT(MUST(string4.equals_ignoring_case(string2)));
        EXPECT(MUST(string4.equals_ignoring_case(string3)));
        EXPECT(!MUST(string4.equals_ignoring_case(string5)));
        EXPECT(!MUST(string4.equals_ignoring_case(string6)));
    }
}

TEST_CASE(is_one_of)
{
    auto foo = MUST(String::from_utf8("foo"sv));
    auto bar = MUST(String::from_utf8("bar"sv));

    EXPECT(foo.is_one_of(foo));
    EXPECT(foo.is_one_of(foo, bar));
    EXPECT(foo.is_one_of(bar, foo));
    EXPECT(!foo.is_one_of(bar));

    EXPECT(!bar.is_one_of("foo"sv));
    EXPECT(bar.is_one_of("foo"sv, "bar"sv));
    EXPECT(bar.is_one_of("bar"sv, "foo"sv));
    EXPECT(bar.is_one_of("bar"sv));
}

TEST_CASE(split)
{
    {
        auto test = MUST(String::from_utf8("foo bar baz"sv));
        auto parts = MUST(test.split(' '));
        EXPECT_EQ(parts.size(), 3u);
        EXPECT_EQ(parts[0], "foo");
        EXPECT_EQ(parts[1], "bar");
        EXPECT_EQ(parts[2], "baz");
    }
    {
        auto test = MUST(String::from_utf8("ωΣ2ωΣω"sv));
        auto parts = MUST(test.split(0x03A3u));
        EXPECT_EQ(parts.size(), 3u);
        EXPECT_EQ(parts[0], "ω"sv);
        EXPECT_EQ(parts[1], "2ω"sv);
        EXPECT_EQ(parts[2], "ω"sv);
    }
}

TEST_CASE(find_byte_offset)
{
    {
        String string {};
        auto index1 = string.find_byte_offset(0);
        EXPECT(!index1.has_value());

        auto index2 = string.find_byte_offset(""sv);
        EXPECT(!index2.has_value());
    }
    {
        auto string = MUST(String::from_utf8("foo"sv));

        auto index1 = string.find_byte_offset('f');
        EXPECT_EQ(index1, 0u);

        auto index2 = string.find_byte_offset('o');
        EXPECT_EQ(index2, 1u);

        auto index3 = string.find_byte_offset('o', *index2 + 1);
        EXPECT_EQ(index3, 2u);

        auto index4 = string.find_byte_offset('b');
        EXPECT(!index4.has_value());
    }
    {
        auto string = MUST(String::from_utf8("foo"sv));

        auto index1 = string.find_byte_offset("fo"sv);
        EXPECT_EQ(index1, 0u);

        auto index2 = string.find_byte_offset("oo"sv);
        EXPECT_EQ(index2, 1u);

        auto index3 = string.find_byte_offset("o"sv, *index2 + 1);
        EXPECT_EQ(index3, 2u);

        auto index4 = string.find_byte_offset("fooo"sv);
        EXPECT(!index4.has_value());
    }
    {
        auto string = MUST(String::from_utf8("ωΣωΣω"sv));

        auto index1 = string.find_byte_offset(0x03C9U);
        EXPECT_EQ(index1, 0u);

        auto index2 = string.find_byte_offset(0x03A3u);
        EXPECT_EQ(index2, 2u);

        auto index3 = string.find_byte_offset(0x03C9U, 2);
        EXPECT_EQ(index3, 4u);

        auto index4 = string.find_byte_offset(0x03A3u, 4);
        EXPECT_EQ(index4, 6u);

        auto index5 = string.find_byte_offset(0x03C9U, 6);
        EXPECT_EQ(index5, 8u);
    }
    {
        auto string = MUST(String::from_utf8("ωΣωΣω"sv));

        auto index1 = string.find_byte_offset("ω"sv);
        EXPECT_EQ(index1, 0u);

        auto index2 = string.find_byte_offset("Σ"sv);
        EXPECT_EQ(index2, 2u);

        auto index3 = string.find_byte_offset("ω"sv, 2);
        EXPECT_EQ(index3, 4u);

        auto index4 = string.find_byte_offset("Σ"sv, 4);
        EXPECT_EQ(index4, 6u);

        auto index5 = string.find_byte_offset("ω"sv, 6);
        EXPECT_EQ(index5, 8u);
    }
}

TEST_CASE(repeated)
{
    {
        auto string1 = MUST(String::repeated('a', 0));
        EXPECT(string1.is_short_string());
        EXPECT(string1.is_empty());

        auto string2 = MUST(String::repeated(0x03C9U, 0));
        EXPECT(string2.is_short_string());
        EXPECT(string2.is_empty());

        auto string3 = MUST(String::repeated(0x10300, 0));
        EXPECT(string3.is_short_string());
        EXPECT(string3.is_empty());
    }
    {
        auto string1 = MUST(String::repeated('a', 1));
        EXPECT(string1.is_short_string());
        EXPECT_EQ(string1.bytes_as_string_view().length(), 1u);
        EXPECT_EQ(string1, "a"sv);

        auto string2 = MUST(String::repeated(0x03C9U, 1));
        EXPECT(string2.is_short_string());
        EXPECT_EQ(string2.bytes_as_string_view().length(), 2u);
        EXPECT_EQ(string2, "ω"sv);

        auto string3 = MUST(String::repeated(0x10300, 1));
#ifdef AK_ARCH_64_BIT
        EXPECT(string3.is_short_string());
#else
        EXPECT(!string3.is_short_string());
#endif
        EXPECT_EQ(string3.bytes_as_string_view().length(), 4u);
        EXPECT_EQ(string3, "𐌀"sv);
    }
    {
        auto string1 = MUST(String::repeated('a', 3));
        EXPECT(string1.is_short_string());
        EXPECT_EQ(string1.bytes_as_string_view().length(), 3u);
        EXPECT_EQ(string1, "aaa"sv);

        auto string2 = MUST(String::repeated(0x03C9U, 3));
#ifdef AK_ARCH_64_BIT
        EXPECT(string2.is_short_string());
#else
        EXPECT(!string2.is_short_string());
#endif
        EXPECT_EQ(string2.bytes_as_string_view().length(), 6u);
        EXPECT_EQ(string2, "ωωω"sv);

        auto string3 = MUST(String::repeated(0x10300, 3));
        EXPECT(!string3.is_short_string());
        EXPECT_EQ(string3.bytes_as_string_view().length(), 12u);
        EXPECT_EQ(string3, "𐌀𐌀𐌀"sv);
    }
    {
        auto string1 = MUST(String::repeated('a', 10));
        EXPECT(!string1.is_short_string());
        EXPECT_EQ(string1.bytes_as_string_view().length(), 10u);
        EXPECT_EQ(string1, "aaaaaaaaaa"sv);

        auto string2 = MUST(String::repeated(0x03C9U, 10));
        EXPECT(!string2.is_short_string());
        EXPECT_EQ(string2.bytes_as_string_view().length(), 20u);
        EXPECT_EQ(string2, "ωωωωωωωωωω"sv);

        auto string3 = MUST(String::repeated(0x10300, 10));
        EXPECT(!string3.is_short_string());
        EXPECT_EQ(string3.bytes_as_string_view().length(), 40u);
        EXPECT_EQ(string3, "𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀"sv);
    }

    EXPECT_CRASH("Creating a string from an invalid code point", [] {
        (void)String::repeated(0xffffffff, 1);
        return Test::Crash::Failure::DidNotCrash;
    });
}

TEST_CASE(join)
{
    auto string1 = MUST(String::join(',', Vector<i32> {}));
    EXPECT(string1.is_empty());

    auto string2 = MUST(String::join(',', Array { 1 }));
    EXPECT_EQ(string2, "1"sv);

    auto string3 = MUST(String::join(':', Array { 1 }, "[{}]"sv));
    EXPECT_EQ(string3, "[1]"sv);

    auto string4 = MUST(String::join(',', Array { 1, 2, 3 }));
    EXPECT_EQ(string4, "1,2,3"sv);

    auto string5 = MUST(String::join(',', Array { 1, 2, 3 }, "[{}]"sv));
    EXPECT_EQ(string5, "[1],[2],[3]"sv);

    auto string6 = MUST(String::join(String::from_utf8_short_string("!!!"sv), Array { "foo"sv, "bar"sv, "baz"sv }));
    EXPECT_EQ(string6, "foo!!!bar!!!baz"sv);

    auto string7 = MUST(String::join(" - "sv, Array { 1, 16, 256, 4096 }, "[{:#04x}]"sv));
    EXPECT_EQ(string7, "[0x0001] - [0x0010] - [0x0100] - [0x1000]"sv);
}

TEST_CASE(trim)
{
    {
        String string {};

        auto result = MUST(string.trim(" "sv, TrimMode::Both));
        EXPECT(result.is_empty());

        result = MUST(string.trim(" "sv, TrimMode::Left));
        EXPECT(result.is_empty());

        result = MUST(string.trim(" "sv, TrimMode::Right));
        EXPECT(result.is_empty());
    }
    {
        auto string = MUST(String::from_utf8("word"sv));

        auto result = MUST(string.trim(" "sv, TrimMode::Both));
        EXPECT_EQ(result, "word"sv);

        result = MUST(string.trim(" "sv, TrimMode::Left));
        EXPECT_EQ(result, "word"sv);

        result = MUST(string.trim(" "sv, TrimMode::Right));
        EXPECT_EQ(result, "word"sv);
    }
    {
        auto string = MUST(String::from_utf8("    word"sv));

        auto result = MUST(string.trim(" "sv, TrimMode::Both));
        EXPECT_EQ(result, "word"sv);

        result = MUST(string.trim(" "sv, TrimMode::Left));
        EXPECT_EQ(result, "word"sv);

        result = MUST(string.trim(" "sv, TrimMode::Right));
        EXPECT_EQ(result, "    word"sv);
    }
    {
        auto string = MUST(String::from_utf8("word    "sv));

        auto result = MUST(string.trim(" "sv, TrimMode::Both));
        EXPECT_EQ(result, "word"sv);

        result = MUST(string.trim(" "sv, TrimMode::Left));
        EXPECT_EQ(result, "word    "sv);

        result = MUST(string.trim(" "sv, TrimMode::Right));
        EXPECT_EQ(result, "word"sv);
    }
    {
        auto string = MUST(String::from_utf8("    word    "sv));

        auto result = MUST(string.trim(" "sv, TrimMode::Both));
        EXPECT_EQ(result, "word"sv);

        result = MUST(string.trim(" "sv, TrimMode::Left));
        EXPECT_EQ(result, "word    "sv);

        result = MUST(string.trim(" "sv, TrimMode::Right));
        EXPECT_EQ(result, "    word"sv);
    }
    {
        auto string = MUST(String::from_utf8("    word    "sv));

        auto result = MUST(string.trim("\t"sv, TrimMode::Both));
        EXPECT_EQ(result, "    word    "sv);

        result = MUST(string.trim("\t"sv, TrimMode::Left));
        EXPECT_EQ(result, "    word    "sv);

        result = MUST(string.trim("\t"sv, TrimMode::Right));
        EXPECT_EQ(result, "    word    "sv);
    }
    {
        auto string = MUST(String::from_utf8("ωΣωΣω"sv));

        auto result = MUST(string.trim("ω"sv, TrimMode::Both));
        EXPECT_EQ(result, "ΣωΣ"sv);

        result = MUST(string.trim("ω"sv, TrimMode::Left));
        EXPECT_EQ(result, "ΣωΣω"sv);

        result = MUST(string.trim("ω"sv, TrimMode::Right));
        EXPECT_EQ(result, "ωΣωΣ"sv);
    }
    {
        auto string = MUST(String::from_utf8("ωΣωΣω"sv));

        auto result = MUST(string.trim("ωΣ"sv, TrimMode::Both));
        EXPECT(result.is_empty());

        result = MUST(string.trim("ωΣ"sv, TrimMode::Left));
        EXPECT(result.is_empty());

        result = MUST(string.trim("ωΣ"sv, TrimMode::Right));
        EXPECT(result.is_empty());
    }
    {
        auto string = MUST(String::from_utf8("ωΣωΣω"sv));

        auto result = MUST(string.trim("Σω"sv, TrimMode::Both));
        EXPECT(result.is_empty());

        result = MUST(string.trim("Σω"sv, TrimMode::Left));
        EXPECT(result.is_empty());

        result = MUST(string.trim("Σω"sv, TrimMode::Right));
        EXPECT(result.is_empty());
    }
}
-												AK: Introduce the new String, replacement for DeprecatedString

DeprecatedString (formerly String) has been with us since the start,
and it has served us well. However, it has a number of shortcomings
that I'd like to address.

Some of these issues are hard if not impossible to solve incrementally
inside of DeprecatedString, so instead of doing that, let's build a new
String class and then incrementally move over to it instead.

Problems in DeprecatedString:

- It assumes string allocation never fails. This makes it impossible
  to use in allocation-sensitive contexts, and is the reason we had to
  ban DeprecatedString from the kernel entirely.

- The awkward null state. DeprecatedString can be null. It's different
  from the empty state, although null strings are considered empty.
  All code is immediately nicer when using Optional<DeprecatedString>
  but DeprecatedString came before Optional, which is how we ended up
  like this.

- The encoding of the underlying data is ambiguous. For the most part,
  we use it as if it's always UTF-8, but there have been cases where
  we pass around strings in other encodings (e.g ISO8859-1)

- operator[] and length() are used to iterate over DeprecatedString one
  byte at a time. This is done all over the codebase, and will *not*
  give the right results unless the string is all ASCII.

How we solve these issues in the new String:

- Functions that may allocate now return ErrorOr<String> so that ENOMEM
  errors can be passed to the caller.

- String has no null state. Use Optional<String> when needed.

- String is always UTF-8. This is validated when constructing a String.
  We may need to add a bypass for this in the future, for cases where
  you have a known-good string, but for now: validate all the things!

- There is no operator[] or length(). You can get the underlying data
  with bytes(), but for iterating over code points, you should be using
  an UTF-8 iterator.

Furthermore, it has two nifty new features:

- String implements a small string optimization (SSO) for strings that
  can fit entirely within a pointer. This means up to 3 bytes on 32-bit
  platforms, and 7 bytes on 64-bit platforms. Such small strings will
  not be heap-allocated.

- String can create substrings without making a deep copy of the
  substring. Instead, the superstring gets +1 refcount from the
  substring, and it acts like a view into the superstring. To make
  substrings like this, use the substring_with_shared_superstring() API.

One caveat:

- String does not guarantee that the underlying data is null-terminated
  like DeprecatedString does today. While this was nifty in a handful of
  places where we were calling C functions, it did stand in the way of
  shared-superstring substrings.

											
										
										
											2022-12-01 15:27:43 +03:00
+								/*
 								 * Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
 								 *
 								 * SPDX-License-Identifier: BSD-2-Clause
 								 */
-												AK: Add a String factory to create a string from a single code point

											
										
										
											2023-01-21 20:34:01 +03:00
+								// This is included first on purpose. We specifically do not want LibTest to override VERIFY here so
 								// that we can actually test that some String factory methods cause a crash with invalid input.
 								#include <AK/String.h>
-												AK: Introduce the new String, replacement for DeprecatedString

DeprecatedString (formerly String) has been with us since the start,
and it has served us well. However, it has a number of shortcomings
that I'd like to address.

Some of these issues are hard if not impossible to solve incrementally
inside of DeprecatedString, so instead of doing that, let's build a new
String class and then incrementally move over to it instead.

Problems in DeprecatedString:

- It assumes string allocation never fails. This makes it impossible
  to use in allocation-sensitive contexts, and is the reason we had to
  ban DeprecatedString from the kernel entirely.

- The awkward null state. DeprecatedString can be null. It's different
  from the empty state, although null strings are considered empty.
  All code is immediately nicer when using Optional<DeprecatedString>
  but DeprecatedString came before Optional, which is how we ended up
  like this.

- The encoding of the underlying data is ambiguous. For the most part,
  we use it as if it's always UTF-8, but there have been cases where
  we pass around strings in other encodings (e.g ISO8859-1)

- operator[] and length() are used to iterate over DeprecatedString one
  byte at a time. This is done all over the codebase, and will *not*
  give the right results unless the string is all ASCII.

How we solve these issues in the new String:

- Functions that may allocate now return ErrorOr<String> so that ENOMEM
  errors can be passed to the caller.

- String has no null state. Use Optional<String> when needed.

- String is always UTF-8. This is validated when constructing a String.
  We may need to add a bypass for this in the future, for cases where
  you have a known-good string, but for now: validate all the things!

- There is no operator[] or length(). You can get the underlying data
  with bytes(), but for iterating over code points, you should be using
  an UTF-8 iterator.

Furthermore, it has two nifty new features:

- String implements a small string optimization (SSO) for strings that
  can fit entirely within a pointer. This means up to 3 bytes on 32-bit
  platforms, and 7 bytes on 64-bit platforms. Such small strings will
  not be heap-allocated.

- String can create substrings without making a deep copy of the
  substring. Instead, the superstring gets +1 refcount from the
  substring, and it acts like a view into the superstring. To make
  substrings like this, use the substring_with_shared_superstring() API.

One caveat:

- String does not guarantee that the underlying data is null-terminated
  like DeprecatedString does today. While this was nifty in a handful of
  places where we were calling C functions, it did stand in the way of
  shared-superstring substrings.

											
										
										
											2022-12-01 15:27:43 +03:00
+								#include <LibTest/TestCase.h>
 								#include <AK/StringBuilder.h>
 								#include <AK/Try.h>
 								#include <AK/Utf8View.h>
 								#include <AK/Vector.h>
 								TEST_CASE(construct_empty)
 								{
 								    String empty;
 								    EXPECT(empty.is_empty());
 								    EXPECT_EQ(empty.bytes().size(), 0u);
 								    auto empty2 = MUST(String::from_utf8(""sv));
 								    EXPECT(empty2.is_empty());
 								    EXPECT_EQ(empty, empty2);
 								    EXPECT_EQ(empty, ""sv);
 								}
-												AK: Unref old m_data in String's move assignment

We were overridding the data pointer without unreffing it,
causing a memory leak when assigning a String.

											
										
										
											2022-12-08 20:30:04 +03:00
+								TEST_CASE(move_assignment)
 								{
 								    String string1 = MUST(String::from_utf8("hello"sv));
 								    string1 = MUST(String::from_utf8("friends!"sv));
 								    EXPECT_EQ(string1, "friends!"sv);
 								}
-												AK: Introduce the new String, replacement for DeprecatedString

DeprecatedString (formerly String) has been with us since the start,
and it has served us well. However, it has a number of shortcomings
that I'd like to address.

Some of these issues are hard if not impossible to solve incrementally
inside of DeprecatedString, so instead of doing that, let's build a new
String class and then incrementally move over to it instead.

Problems in DeprecatedString:

- It assumes string allocation never fails. This makes it impossible
  to use in allocation-sensitive contexts, and is the reason we had to
  ban DeprecatedString from the kernel entirely.

- The awkward null state. DeprecatedString can be null. It's different
  from the empty state, although null strings are considered empty.
  All code is immediately nicer when using Optional<DeprecatedString>
  but DeprecatedString came before Optional, which is how we ended up
  like this.

- The encoding of the underlying data is ambiguous. For the most part,
  we use it as if it's always UTF-8, but there have been cases where
  we pass around strings in other encodings (e.g ISO8859-1)

- operator[] and length() are used to iterate over DeprecatedString one
  byte at a time. This is done all over the codebase, and will *not*
  give the right results unless the string is all ASCII.

How we solve these issues in the new String:

- Functions that may allocate now return ErrorOr<String> so that ENOMEM
  errors can be passed to the caller.

- String has no null state. Use Optional<String> when needed.

- String is always UTF-8. This is validated when constructing a String.
  We may need to add a bypass for this in the future, for cases where
  you have a known-good string, but for now: validate all the things!

- There is no operator[] or length(). You can get the underlying data
  with bytes(), but for iterating over code points, you should be using
  an UTF-8 iterator.

Furthermore, it has two nifty new features:

- String implements a small string optimization (SSO) for strings that
  can fit entirely within a pointer. This means up to 3 bytes on 32-bit
  platforms, and 7 bytes on 64-bit platforms. Such small strings will
  not be heap-allocated.

- String can create substrings without making a deep copy of the
  substring. Instead, the superstring gets +1 refcount from the
  substring, and it acts like a view into the superstring. To make
  substrings like this, use the substring_with_shared_superstring() API.

One caveat:

- String does not guarantee that the underlying data is null-terminated
  like DeprecatedString does today. While this was nifty in a handful of
  places where we were calling C functions, it did stand in the way of
  shared-superstring substrings.

											
										
										
											2022-12-01 15:27:43 +03:00
+								TEST_CASE(short_strings)
 								{
 								#ifdef AK_ARCH_64_BIT
-												AK: Support creating known short string literals at compile time

In cases where we know a string literal will fit in the short string
storage, we can do so at compile time without needing to handle error
propagation. If the provided string literal is too long, a compilation
error will be emitted due to the failed VERIFY statement being a non-
constant expression.

											
										
										
											2023-01-20 15:20:01 +03:00
+								    auto string1 = MUST(String::from_utf8("abcdefg"sv));
 								    EXPECT_EQ(string1.is_short_string(), true);
 								    EXPECT_EQ(string1.bytes().size(), 7u);
 								    EXPECT_EQ(string1.bytes_as_string_view(), "abcdefg"sv);
 								    constexpr auto string2 = String::from_utf8_short_string("abcdefg"sv);
 								    EXPECT_EQ(string2.is_short_string(), true);
 								    EXPECT_EQ(string2.bytes().size(), 7u);
 								    EXPECT_EQ(string2, string1);
-												AK: Introduce the new String, replacement for DeprecatedString

DeprecatedString (formerly String) has been with us since the start,
and it has served us well. However, it has a number of shortcomings
that I'd like to address.

Some of these issues are hard if not impossible to solve incrementally
inside of DeprecatedString, so instead of doing that, let's build a new
String class and then incrementally move over to it instead.

Problems in DeprecatedString:

- It assumes string allocation never fails. This makes it impossible
  to use in allocation-sensitive contexts, and is the reason we had to
  ban DeprecatedString from the kernel entirely.

- The awkward null state. DeprecatedString can be null. It's different
  from the empty state, although null strings are considered empty.
  All code is immediately nicer when using Optional<DeprecatedString>
  but DeprecatedString came before Optional, which is how we ended up
  like this.

- The encoding of the underlying data is ambiguous. For the most part,
  we use it as if it's always UTF-8, but there have been cases where
  we pass around strings in other encodings (e.g ISO8859-1)

- operator[] and length() are used to iterate over DeprecatedString one
  byte at a time. This is done all over the codebase, and will *not*
  give the right results unless the string is all ASCII.

How we solve these issues in the new String:

- Functions that may allocate now return ErrorOr<String> so that ENOMEM
  errors can be passed to the caller.

- String has no null state. Use Optional<String> when needed.

- String is always UTF-8. This is validated when constructing a String.
  We may need to add a bypass for this in the future, for cases where
  you have a known-good string, but for now: validate all the things!

- There is no operator[] or length(). You can get the underlying data
  with bytes(), but for iterating over code points, you should be using
  an UTF-8 iterator.

Furthermore, it has two nifty new features:

- String implements a small string optimization (SSO) for strings that
  can fit entirely within a pointer. This means up to 3 bytes on 32-bit
  platforms, and 7 bytes on 64-bit platforms. Such small strings will
  not be heap-allocated.

- String can create substrings without making a deep copy of the
  substring. Instead, the superstring gets +1 refcount from the
  substring, and it acts like a view into the superstring. To make
  substrings like this, use the substring_with_shared_superstring() API.

One caveat:

- String does not guarantee that the underlying data is null-terminated
  like DeprecatedString does today. While this was nifty in a handful of
  places where we were calling C functions, it did stand in the way of
  shared-superstring substrings.

											
										
										
											2022-12-01 15:27:43 +03:00
+								#else
-												AK: Support creating known short string literals at compile time

In cases where we know a string literal will fit in the short string
storage, we can do so at compile time without needing to handle error
propagation. If the provided string literal is too long, a compilation
error will be emitted due to the failed VERIFY statement being a non-
constant expression.

											
										
										
											2023-01-20 15:20:01 +03:00
+								    auto string1 = MUST(String::from_utf8("abc"sv));
 								    EXPECT_EQ(string1.is_short_string(), true);
 								    EXPECT_EQ(string1.bytes().size(), 3u);
 								    EXPECT_EQ(string1.bytes_as_string_view(), "abc"sv);
 								    constexpr auto string2 = String::from_utf8_short_string("abc"sv);
 								    EXPECT_EQ(string2.is_short_string(), true);
 								    EXPECT_EQ(string2.bytes().size(), 3u);
 								    EXPECT_EQ(string2, string1);
-												AK: Introduce the new String, replacement for DeprecatedString

DeprecatedString (formerly String) has been with us since the start,
and it has served us well. However, it has a number of shortcomings
that I'd like to address.

Some of these issues are hard if not impossible to solve incrementally
inside of DeprecatedString, so instead of doing that, let's build a new
String class and then incrementally move over to it instead.

Problems in DeprecatedString:

- It assumes string allocation never fails. This makes it impossible
  to use in allocation-sensitive contexts, and is the reason we had to
  ban DeprecatedString from the kernel entirely.

- The awkward null state. DeprecatedString can be null. It's different
  from the empty state, although null strings are considered empty.
  All code is immediately nicer when using Optional<DeprecatedString>
  but DeprecatedString came before Optional, which is how we ended up
  like this.

- The encoding of the underlying data is ambiguous. For the most part,
  we use it as if it's always UTF-8, but there have been cases where
  we pass around strings in other encodings (e.g ISO8859-1)

- operator[] and length() are used to iterate over DeprecatedString one
  byte at a time. This is done all over the codebase, and will *not*
  give the right results unless the string is all ASCII.

How we solve these issues in the new String:

- Functions that may allocate now return ErrorOr<String> so that ENOMEM
  errors can be passed to the caller.

- String has no null state. Use Optional<String> when needed.

- String is always UTF-8. This is validated when constructing a String.
  We may need to add a bypass for this in the future, for cases where
  you have a known-good string, but for now: validate all the things!

- There is no operator[] or length(). You can get the underlying data
  with bytes(), but for iterating over code points, you should be using
  an UTF-8 iterator.

Furthermore, it has two nifty new features:

- String implements a small string optimization (SSO) for strings that
  can fit entirely within a pointer. This means up to 3 bytes on 32-bit
  platforms, and 7 bytes on 64-bit platforms. Such small strings will
  not be heap-allocated.

- String can create substrings without making a deep copy of the
  substring. Instead, the superstring gets +1 refcount from the
  substring, and it acts like a view into the superstring. To make
  substrings like this, use the substring_with_shared_superstring() API.

One caveat:

- String does not guarantee that the underlying data is null-terminated
  like DeprecatedString does today. While this was nifty in a handful of
  places where we were calling C functions, it did stand in the way of
  shared-superstring substrings.

											
										
										
											2022-12-01 15:27:43 +03:00
+								#endif
 								}
 								TEST_CASE(long_strings)
 								{
 								    auto string = MUST(String::from_utf8("abcdefgh"sv));
 								    EXPECT_EQ(string.is_short_string(), false);
 								    EXPECT_EQ(string.bytes().size(), 8u);
 								    EXPECT_EQ(string.bytes_as_string_view(), "abcdefgh"sv);
 								}
-												AK: Add a String factory to create a string from a single code point

											
										
										
											2023-01-21 20:34:01 +03:00
+								TEST_CASE(from_code_points)
 								{
 								    for (u32 code_point = 0; code_point < 0x80; ++code_point) {
 								        auto string = String::from_code_point(code_point);
 								        auto ch = static_cast<char>(code_point);
 								        StringView view { &ch, 1 };
 								        EXPECT_EQ(string, view);
 								    }
 								    auto string = String::from_code_point(0x10ffff);
 								    EXPECT_EQ(string, "\xF4\x8F\xBF\xBF"sv);
 								    EXPECT_CRASH("Creating a string from an invalid code point", [] {
 								        String::from_code_point(0xffffffff);
 								        return Test::Crash::Failure::DidNotCrash;
 								    });
 								}
-												AK: Introduce the new String, replacement for DeprecatedString

DeprecatedString (formerly String) has been with us since the start,
and it has served us well. However, it has a number of shortcomings
that I'd like to address.

Some of these issues are hard if not impossible to solve incrementally
inside of DeprecatedString, so instead of doing that, let's build a new
String class and then incrementally move over to it instead.

Problems in DeprecatedString:

- It assumes string allocation never fails. This makes it impossible
  to use in allocation-sensitive contexts, and is the reason we had to
  ban DeprecatedString from the kernel entirely.

- The awkward null state. DeprecatedString can be null. It's different
  from the empty state, although null strings are considered empty.
  All code is immediately nicer when using Optional<DeprecatedString>
  but DeprecatedString came before Optional, which is how we ended up
  like this.

- The encoding of the underlying data is ambiguous. For the most part,
  we use it as if it's always UTF-8, but there have been cases where
  we pass around strings in other encodings (e.g ISO8859-1)

- operator[] and length() are used to iterate over DeprecatedString one
  byte at a time. This is done all over the codebase, and will *not*
  give the right results unless the string is all ASCII.

How we solve these issues in the new String:

- Functions that may allocate now return ErrorOr<String> so that ENOMEM
  errors can be passed to the caller.

- String has no null state. Use Optional<String> when needed.

- String is always UTF-8. This is validated when constructing a String.
  We may need to add a bypass for this in the future, for cases where
  you have a known-good string, but for now: validate all the things!

- There is no operator[] or length(). You can get the underlying data
  with bytes(), but for iterating over code points, you should be using
  an UTF-8 iterator.

Furthermore, it has two nifty new features:

- String implements a small string optimization (SSO) for strings that
  can fit entirely within a pointer. This means up to 3 bytes on 32-bit
  platforms, and 7 bytes on 64-bit platforms. Such small strings will
  not be heap-allocated.

- String can create substrings without making a deep copy of the
  substring. Instead, the superstring gets +1 refcount from the
  substring, and it acts like a view into the superstring. To make
  substrings like this, use the substring_with_shared_superstring() API.

One caveat:

- String does not guarantee that the underlying data is null-terminated
  like DeprecatedString does today. While this was nifty in a handful of
  places where we were calling C functions, it did stand in the way of
  shared-superstring substrings.

											
										
										
											2022-12-01 15:27:43 +03:00
+								TEST_CASE(substring)
 								{
 								    auto superstring = MUST(String::from_utf8("Hello I am a long string"sv));
 								    auto short_substring = MUST(superstring.substring_from_byte_offset(0, 5));
 								    EXPECT_EQ(short_substring, "Hello"sv);
 								    auto long_substring = MUST(superstring.substring_from_byte_offset(0, 10));
 								    EXPECT_EQ(long_substring, "Hello I am"sv);
 								}
 								TEST_CASE(code_points)
 								{
 								    auto string = MUST(String::from_utf8("🦬🪒"sv));
 								    Vector<u32> code_points;
 								    for (auto code_point : string.code_points())
 								        code_points.append(code_point);
 								    EXPECT_EQ(code_points[0], 0x1f9acu);
 								    EXPECT_EQ(code_points[1], 0x1fa92u);
 								}
 								TEST_CASE(string_builder)
 								{
 								    StringBuilder builder;
 								    builder.append_code_point(0x1f9acu);
 								    builder.append_code_point(0x1fa92u);
 								    auto string = MUST(builder.to_string());
 								    EXPECT_EQ(string, "🦬🪒"sv);
 								    EXPECT_EQ(string.bytes().size(), 8u);
 								}
 								TEST_CASE(ak_format)
 								{
 								    auto foo = MUST(String::formatted("Hello {}", MUST(String::from_utf8("friends"sv))));
 								    EXPECT_EQ(foo, "Hello friends"sv);
 								}
 								TEST_CASE(replace)
 								{
 								    {
 								        auto haystack = MUST(String::from_utf8("Hello enemies"sv));
 								        auto result = MUST(haystack.replace("enemies"sv, "friends"sv, ReplaceMode::All));
 								        EXPECT_EQ(result, "Hello friends"sv);
 								    }
 								    {
 								        auto base_title = MUST(String::from_utf8("anon@courage:~"sv));
 								        auto result = MUST(base_title.replace("[*]"sv, "(*)"sv, ReplaceMode::FirstOnly));
 								        EXPECT_EQ(result, "anon@courage:~"sv);
 								    }
 								}
-												AK+LibUnicode: Provide Unicode-aware String case transformations

Since AK can't refer to LibUnicode directly, the strategy here is that
if you need case transformations, you can link LibUnicode and receive
them. If you try to use either of these methods without linking it, then
you'll of course get a linker error (note we don't do any fallbacks to
e.g. ASCII case transformations). If you don't need these methods, you
don't have to link LibUnicode.

											
										
										
											2023-01-09 00:33:30 +03:00
-												AK: Add a somewhat naive implementation of String::reverse

This will reverse the String's code points (i.e. not just its bytes),
but is not aware of grapheme clusters.

											
										
										
											2023-01-13 19:34:00 +03:00
+								TEST_CASE(reverse)
 								{
 								    auto test_reverse = [](auto test, auto expected) {
 								        auto string = MUST(String::from_utf8(test));
 								        auto result = MUST(string.reverse());
 								        EXPECT_EQ(result, expected);
 								    };
 								    test_reverse(""sv, ""sv);
 								    test_reverse("a"sv, "a"sv);
 								    test_reverse("ab"sv, "ba"sv);
 								    test_reverse("ab cd ef"sv, "fe dc ba"sv);
 								    test_reverse("😀"sv, "😀"sv);
 								    test_reverse("ab😀cd"sv, "dc😀ba"sv);
 								}
-												AK+LibUnicode: Provide Unicode-aware String case transformations

Since AK can't refer to LibUnicode directly, the strategy here is that
if you need case transformations, you can link LibUnicode and receive
them. If you try to use either of these methods without linking it, then
you'll of course get a linker error (note we don't do any fallbacks to
e.g. ASCII case transformations). If you don't need these methods, you
don't have to link LibUnicode.

											
										
										
											2023-01-09 00:33:30 +03:00
+								TEST_CASE(to_lowercase)
 								{
 								    {
 								        auto string = MUST(String::from_utf8("Aa"sv));
 								        auto result = MUST(string.to_lowercase());
 								        EXPECT_EQ(result, "aa"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("Ωω"sv));
 								        auto result = MUST(string.to_lowercase());
 								        EXPECT_EQ(result, "ωω"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("İi̇"sv));
 								        auto result = MUST(string.to_lowercase());
 								        EXPECT_EQ(result, "i̇i̇"sv);
 								    }
 								}
 								TEST_CASE(to_uppercase)
 								{
 								    {
 								        auto string = MUST(String::from_utf8("Aa"sv));
 								        auto result = MUST(string.to_uppercase());
 								        EXPECT_EQ(result, "AA"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("Ωω"sv));
 								        auto result = MUST(string.to_uppercase());
 								        EXPECT_EQ(result, "ΩΩ"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("ŉ"sv));
 								        auto result = MUST(string.to_uppercase());
 								        EXPECT_EQ(result, "ʼN"sv);
 								    }
 								}
-												AK: Add String::is_one_of for variadic string comparison

											
										
										
											2023-01-14 17:59:18 +03:00
-												AK+LibUnicode: Provide Unicode-aware String titlecase transformation

											
										
										
											2023-01-16 19:28:27 +03:00
+								TEST_CASE(to_titlecase)
 								{
 								    {
 								        auto string = MUST(String::from_utf8("foo bar baz"sv));
 								        auto result = MUST(string.to_titlecase());
 								        EXPECT_EQ(result, "Foo Bar Baz"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("foo \n \r bar \t baz"sv));
 								        auto result = MUST(string.to_titlecase());
 								        EXPECT_EQ(result, "Foo \n \r Bar \t Baz"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("f\"oo\" b'ar'"sv));
 								        auto result = MUST(string.to_titlecase());
 								        EXPECT_EQ(result, "F\"Oo\" B'Ar'"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("123dollars"sv));
 								        auto result = MUST(string.to_titlecase());
 								        EXPECT_EQ(result, "123Dollars"sv);
 								    }
 								}
-												AK+LibUnicode: Provide Unicode-aware caseless String matching

The Unicode spec defines much more complicated caseless matching
algorithms in its Collation spec. This implements the "basic" case
folding comparison.

											
										
										
											2023-01-17 19:30:10 +03:00
+								TEST_CASE(equals_ignoring_case)
 								{
 								    {
 								        String string1 {};
 								        String string2 {};
 								        EXPECT(MUST(string1.equals_ignoring_case(string2)));
 								    }
 								    {
 								        auto string1 = MUST(String::from_utf8("abcd"sv));
 								        auto string2 = MUST(String::from_utf8("ABCD"sv));
 								        auto string3 = MUST(String::from_utf8("AbCd"sv));
 								        auto string4 = MUST(String::from_utf8("dcba"sv));
 								        EXPECT(MUST(string1.equals_ignoring_case(string2)));
 								        EXPECT(MUST(string1.equals_ignoring_case(string3)));
 								        EXPECT(!MUST(string1.equals_ignoring_case(string4)));
 								        EXPECT(MUST(string2.equals_ignoring_case(string1)));
 								        EXPECT(MUST(string2.equals_ignoring_case(string3)));
 								        EXPECT(!MUST(string2.equals_ignoring_case(string4)));
 								        EXPECT(MUST(string3.equals_ignoring_case(string1)));
 								        EXPECT(MUST(string3.equals_ignoring_case(string2)));
 								        EXPECT(!MUST(string3.equals_ignoring_case(string4)));
 								    }
 								    {
 								        auto string1 = MUST(String::from_utf8("\u00DF"sv)); // LATIN SMALL LETTER SHARP S
 								        auto string2 = MUST(String::from_utf8("SS"sv));
 								        auto string3 = MUST(String::from_utf8("Ss"sv));
 								        auto string4 = MUST(String::from_utf8("ss"sv));
 								        auto string5 = MUST(String::from_utf8("S"sv));
 								        auto string6 = MUST(String::from_utf8("s"sv));
 								        EXPECT(MUST(string1.equals_ignoring_case(string2)));
 								        EXPECT(MUST(string1.equals_ignoring_case(string3)));
 								        EXPECT(MUST(string1.equals_ignoring_case(string4)));
 								        EXPECT(!MUST(string1.equals_ignoring_case(string5)));
 								        EXPECT(!MUST(string1.equals_ignoring_case(string6)));
 								        EXPECT(MUST(string2.equals_ignoring_case(string1)));
 								        EXPECT(MUST(string2.equals_ignoring_case(string3)));
 								        EXPECT(MUST(string2.equals_ignoring_case(string4)));
 								        EXPECT(!MUST(string2.equals_ignoring_case(string5)));
 								        EXPECT(!MUST(string2.equals_ignoring_case(string6)));
 								        EXPECT(MUST(string3.equals_ignoring_case(string1)));
 								        EXPECT(MUST(string3.equals_ignoring_case(string2)));
 								        EXPECT(MUST(string3.equals_ignoring_case(string4)));
 								        EXPECT(!MUST(string3.equals_ignoring_case(string5)));
 								        EXPECT(!MUST(string3.equals_ignoring_case(string6)));
 								        EXPECT(MUST(string4.equals_ignoring_case(string1)));
 								        EXPECT(MUST(string4.equals_ignoring_case(string2)));
 								        EXPECT(MUST(string4.equals_ignoring_case(string3)));
 								        EXPECT(!MUST(string4.equals_ignoring_case(string5)));
 								        EXPECT(!MUST(string4.equals_ignoring_case(string6)));
 								    }
 								}
-												AK: Add String::is_one_of for variadic string comparison

											
										
										
											2023-01-14 17:59:18 +03:00
+								TEST_CASE(is_one_of)
 								{
 								    auto foo = MUST(String::from_utf8("foo"sv));
 								    auto bar = MUST(String::from_utf8("bar"sv));
 								    EXPECT(foo.is_one_of(foo));
 								    EXPECT(foo.is_one_of(foo, bar));
 								    EXPECT(foo.is_one_of(bar, foo));
 								    EXPECT(!foo.is_one_of(bar));
 								    EXPECT(!bar.is_one_of("foo"sv));
 								    EXPECT(bar.is_one_of("foo"sv, "bar"sv));
 								    EXPECT(bar.is_one_of("bar"sv, "foo"sv));
 								    EXPECT(bar.is_one_of("bar"sv));
 								}
-												AK: Add `split()` for `String`

											
										
										
											2023-01-16 19:12:53 +03:00
 								TEST_CASE(split)
 								{
 								    {
 								        auto test = MUST(String::from_utf8("foo bar baz"sv));
 								        auto parts = MUST(test.split(' '));
 								        EXPECT_EQ(parts.size(), 3u);
 								        EXPECT_EQ(parts[0], "foo");
 								        EXPECT_EQ(parts[1], "bar");
 								        EXPECT_EQ(parts[2], "baz");
 								    }
 								    {
 								        auto test = MUST(String::from_utf8("ωΣ2ωΣω"sv));
 								        auto parts = MUST(test.split(0x03A3u));
 								        EXPECT_EQ(parts.size(), 3u);
 								        EXPECT_EQ(parts[0], "ω"sv);
 								        EXPECT_EQ(parts[1], "2ω"sv);
 								        EXPECT_EQ(parts[2], "ω"sv);
 								    }
 								}
-												AK: Add a method to find the byte offset of a code point

											
										
										
											2023-01-22 17:24:12 +03:00
 								TEST_CASE(find_byte_offset)
 								{
 								    {
 								        String string {};
-												AK: Add an overload of String::find_byte_offset for StringView

											
										
										
											2023-01-27 18:17:34 +03:00
+								        auto index1 = string.find_byte_offset(0);
 								        EXPECT(!index1.has_value());
 								        auto index2 = string.find_byte_offset(""sv);
 								        EXPECT(!index2.has_value());
-												AK: Add a method to find the byte offset of a code point

											
										
										
											2023-01-22 17:24:12 +03:00
+								    }
 								    {
 								        auto string = MUST(String::from_utf8("foo"sv));
 								        auto index1 = string.find_byte_offset('f');
 								        EXPECT_EQ(index1, 0u);
 								        auto index2 = string.find_byte_offset('o');
 								        EXPECT_EQ(index2, 1u);
 								        auto index3 = string.find_byte_offset('o', *index2 + 1);
 								        EXPECT_EQ(index3, 2u);
 								        auto index4 = string.find_byte_offset('b');
 								        EXPECT(!index4.has_value());
 								    }
-												AK: Add an overload of String::find_byte_offset for StringView

											
										
										
											2023-01-27 18:17:34 +03:00
+								    {
 								        auto string = MUST(String::from_utf8("foo"sv));
 								        auto index1 = string.find_byte_offset("fo"sv);
 								        EXPECT_EQ(index1, 0u);
 								        auto index2 = string.find_byte_offset("oo"sv);
 								        EXPECT_EQ(index2, 1u);
 								        auto index3 = string.find_byte_offset("o"sv, *index2 + 1);
 								        EXPECT_EQ(index3, 2u);
 								        auto index4 = string.find_byte_offset("fooo"sv);
 								        EXPECT(!index4.has_value());
 								    }
-												AK: Add a method to find the byte offset of a code point

											
										
										
											2023-01-22 17:24:12 +03:00
+								    {
 								        auto string = MUST(String::from_utf8("ωΣωΣω"sv));
 								        auto index1 = string.find_byte_offset(0x03C9U);
 								        EXPECT_EQ(index1, 0u);
 								        auto index2 = string.find_byte_offset(0x03A3u);
 								        EXPECT_EQ(index2, 2u);
 								        auto index3 = string.find_byte_offset(0x03C9U, 2);
 								        EXPECT_EQ(index3, 4u);
 								        auto index4 = string.find_byte_offset(0x03A3u, 4);
 								        EXPECT_EQ(index4, 6u);
 								        auto index5 = string.find_byte_offset(0x03C9U, 6);
 								        EXPECT_EQ(index5, 8u);
 								    }
-												AK: Add an overload of String::find_byte_offset for StringView

											
										
										
											2023-01-27 18:17:34 +03:00
+								    {
 								        auto string = MUST(String::from_utf8("ωΣωΣω"sv));
 								        auto index1 = string.find_byte_offset("ω"sv);
 								        EXPECT_EQ(index1, 0u);
 								        auto index2 = string.find_byte_offset("Σ"sv);
 								        EXPECT_EQ(index2, 2u);
 								        auto index3 = string.find_byte_offset("ω"sv, 2);
 								        EXPECT_EQ(index3, 4u);
 								        auto index4 = string.find_byte_offset("Σ"sv, 4);
 								        EXPECT_EQ(index4, 6u);
 								        auto index5 = string.find_byte_offset("ω"sv, 6);
 								        EXPECT_EQ(index5, 8u);
 								    }
-												AK: Add a method to find the byte offset of a code point

											
										
										
											2023-01-22 17:24:12 +03:00
+								}
-												AK: Add a method to create a String with a repeated code point

											
										
										
											2023-01-22 18:17:48 +03:00
 								TEST_CASE(repeated)
 								{
 								    {
 								        auto string1 = MUST(String::repeated('a', 0));
 								        EXPECT(string1.is_short_string());
 								        EXPECT(string1.is_empty());
 								        auto string2 = MUST(String::repeated(0x03C9U, 0));
 								        EXPECT(string2.is_short_string());
 								        EXPECT(string2.is_empty());
 								        auto string3 = MUST(String::repeated(0x10300, 0));
 								        EXPECT(string3.is_short_string());
 								        EXPECT(string3.is_empty());
 								    }
 								    {
 								        auto string1 = MUST(String::repeated('a', 1));
 								        EXPECT(string1.is_short_string());
 								        EXPECT_EQ(string1.bytes_as_string_view().length(), 1u);
 								        EXPECT_EQ(string1, "a"sv);
 								        auto string2 = MUST(String::repeated(0x03C9U, 1));
 								        EXPECT(string2.is_short_string());
 								        EXPECT_EQ(string2.bytes_as_string_view().length(), 2u);
 								        EXPECT_EQ(string2, "ω"sv);
 								        auto string3 = MUST(String::repeated(0x10300, 1));
 								#ifdef AK_ARCH_64_BIT
 								        EXPECT(string3.is_short_string());
 								#else
 								        EXPECT(!string3.is_short_string());
 								#endif
 								        EXPECT_EQ(string3.bytes_as_string_view().length(), 4u);
 								        EXPECT_EQ(string3, "𐌀"sv);
 								    }
 								    {
 								        auto string1 = MUST(String::repeated('a', 3));
 								        EXPECT(string1.is_short_string());
 								        EXPECT_EQ(string1.bytes_as_string_view().length(), 3u);
 								        EXPECT_EQ(string1, "aaa"sv);
 								        auto string2 = MUST(String::repeated(0x03C9U, 3));
 								#ifdef AK_ARCH_64_BIT
 								        EXPECT(string2.is_short_string());
 								#else
 								        EXPECT(!string2.is_short_string());
 								#endif
 								        EXPECT_EQ(string2.bytes_as_string_view().length(), 6u);
 								        EXPECT_EQ(string2, "ωωω"sv);
 								        auto string3 = MUST(String::repeated(0x10300, 3));
 								        EXPECT(!string3.is_short_string());
 								        EXPECT_EQ(string3.bytes_as_string_view().length(), 12u);
 								        EXPECT_EQ(string3, "𐌀𐌀𐌀"sv);
 								    }
 								    {
 								        auto string1 = MUST(String::repeated('a', 10));
 								        EXPECT(!string1.is_short_string());
 								        EXPECT_EQ(string1.bytes_as_string_view().length(), 10u);
 								        EXPECT_EQ(string1, "aaaaaaaaaa"sv);
 								        auto string2 = MUST(String::repeated(0x03C9U, 10));
 								        EXPECT(!string2.is_short_string());
 								        EXPECT_EQ(string2.bytes_as_string_view().length(), 20u);
 								        EXPECT_EQ(string2, "ωωωωωωωωωω"sv);
 								        auto string3 = MUST(String::repeated(0x10300, 10));
 								        EXPECT(!string3.is_short_string());
 								        EXPECT_EQ(string3.bytes_as_string_view().length(), 40u);
 								        EXPECT_EQ(string3, "𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀"sv);
 								    }
 								    EXPECT_CRASH("Creating a string from an invalid code point", [] {
 								        (void)String::repeated(0xffffffff, 1);
 								        return Test::Crash::Failure::DidNotCrash;
 								    });
 								}
-												AK: Add String::join

											
										
										
											2023-01-27 22:06:05 +03:00
 								TEST_CASE(join)
 								{
 								    auto string1 = MUST(String::join(',', Vector<i32> {}));
 								    EXPECT(string1.is_empty());
 								    auto string2 = MUST(String::join(',', Array { 1 }));
 								    EXPECT_EQ(string2, "1"sv);
 								    auto string3 = MUST(String::join(':', Array { 1 }, "[{}]"sv));
 								    EXPECT_EQ(string3, "[1]"sv);
 								    auto string4 = MUST(String::join(',', Array { 1, 2, 3 }));
 								    EXPECT_EQ(string4, "1,2,3"sv);
 								    auto string5 = MUST(String::join(',', Array { 1, 2, 3 }, "[{}]"sv));
 								    EXPECT_EQ(string5, "[1],[2],[3]"sv);
 								    auto string6 = MUST(String::join(String::from_utf8_short_string("!!!"sv), Array { "foo"sv, "bar"sv, "baz"sv }));
 								    EXPECT_EQ(string6, "foo!!!bar!!!baz"sv);
 								    auto string7 = MUST(String::join(" - "sv, Array { 1, 16, 256, 4096 }, "[{:#04x}]"sv));
 								    EXPECT_EQ(string7, "[0x0001] - [0x0010] - [0x0100] - [0x1000]"sv);
 								}
-												AK: Add String::trim

											
										
										
											2023-01-27 22:37:40 +03:00
 								TEST_CASE(trim)
 								{
 								    {
 								        String string {};
 								        auto result = MUST(string.trim(" "sv, TrimMode::Both));
 								        EXPECT(result.is_empty());
 								        result = MUST(string.trim(" "sv, TrimMode::Left));
 								        EXPECT(result.is_empty());
 								        result = MUST(string.trim(" "sv, TrimMode::Right));
 								        EXPECT(result.is_empty());
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("word"sv));
 								        auto result = MUST(string.trim(" "sv, TrimMode::Both));
 								        EXPECT_EQ(result, "word"sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Left));
 								        EXPECT_EQ(result, "word"sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Right));
 								        EXPECT_EQ(result, "word"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("    word"sv));
 								        auto result = MUST(string.trim(" "sv, TrimMode::Both));
 								        EXPECT_EQ(result, "word"sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Left));
 								        EXPECT_EQ(result, "word"sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Right));
 								        EXPECT_EQ(result, "    word"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("word    "sv));
 								        auto result = MUST(string.trim(" "sv, TrimMode::Both));
 								        EXPECT_EQ(result, "word"sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Left));
 								        EXPECT_EQ(result, "word    "sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Right));
 								        EXPECT_EQ(result, "word"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("    word    "sv));
 								        auto result = MUST(string.trim(" "sv, TrimMode::Both));
 								        EXPECT_EQ(result, "word"sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Left));
 								        EXPECT_EQ(result, "word    "sv);
 								        result = MUST(string.trim(" "sv, TrimMode::Right));
 								        EXPECT_EQ(result, "    word"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("    word    "sv));
 								        auto result = MUST(string.trim("\t"sv, TrimMode::Both));
 								        EXPECT_EQ(result, "    word    "sv);
 								        result = MUST(string.trim("\t"sv, TrimMode::Left));
 								        EXPECT_EQ(result, "    word    "sv);
 								        result = MUST(string.trim("\t"sv, TrimMode::Right));
 								        EXPECT_EQ(result, "    word    "sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("ωΣωΣω"sv));
 								        auto result = MUST(string.trim("ω"sv, TrimMode::Both));
 								        EXPECT_EQ(result, "ΣωΣ"sv);
 								        result = MUST(string.trim("ω"sv, TrimMode::Left));
 								        EXPECT_EQ(result, "ΣωΣω"sv);
 								        result = MUST(string.trim("ω"sv, TrimMode::Right));
 								        EXPECT_EQ(result, "ωΣωΣ"sv);
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("ωΣωΣω"sv));
 								        auto result = MUST(string.trim("ωΣ"sv, TrimMode::Both));
 								        EXPECT(result.is_empty());
 								        result = MUST(string.trim("ωΣ"sv, TrimMode::Left));
 								        EXPECT(result.is_empty());
 								        result = MUST(string.trim("ωΣ"sv, TrimMode::Right));
 								        EXPECT(result.is_empty());
 								    }
 								    {
 								        auto string = MUST(String::from_utf8("ωΣωΣω"sv));
 								        auto result = MUST(string.trim("Σω"sv, TrimMode::Both));
 								        EXPECT(result.is_empty());
 								        result = MUST(string.trim("Σω"sv, TrimMode::Left));
 								        EXPECT(result.is_empty());
 								        result = MUST(string.trim("Σω"sv, TrimMode::Right));
 								        EXPECT(result.is_empty());
 								    }
 								}