2020-01-18 11:38:21 +03:00
|
|
|
/*
|
2021-04-17 02:18:39 +03:00
|
|
|
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
|
2020-01-18 11:38:21 +03:00
|
|
|
*
|
2021-04-22 11:24:48 +03:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-01-18 11:38:21 +03:00
|
|
|
*/
|
|
|
|
|
2019-04-15 15:56:37 +03:00
|
|
|
#pragma once
|
|
|
|
|
2020-04-30 00:17:54 +03:00
|
|
|
#include <AK/Assertions.h>
|
|
|
|
#include <AK/Checked.h>
|
2020-02-14 23:41:10 +03:00
|
|
|
#include <AK/Forward.h>
|
2021-11-10 13:05:21 +03:00
|
|
|
#include <AK/Optional.h>
|
2020-07-27 15:15:37 +03:00
|
|
|
#include <AK/Span.h>
|
2020-02-14 23:41:10 +03:00
|
|
|
#include <AK/StdLibExtras.h>
|
2021-05-14 16:21:50 +03:00
|
|
|
#include <AK/StringHash.h>
|
2020-02-26 10:25:24 +03:00
|
|
|
#include <AK/StringUtils.h>
|
2019-04-16 03:39:16 +03:00
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
2019-04-15 15:56:37 +03:00
|
|
|
class StringView {
|
|
|
|
public:
|
2021-01-11 02:29:28 +03:00
|
|
|
ALWAYS_INLINE constexpr StringView() = default;
|
2020-05-30 17:06:30 +03:00
|
|
|
ALWAYS_INLINE constexpr StringView(const char* characters, size_t length)
|
2019-05-28 12:53:16 +03:00
|
|
|
: m_characters(characters)
|
|
|
|
, m_length(length)
|
|
|
|
{
|
2021-06-27 21:11:38 +03:00
|
|
|
if (!is_constant_evaluated())
|
|
|
|
VERIFY(!Checked<uintptr_t>::addition_would_overflow((uintptr_t)characters, length));
|
2019-05-28 12:53:16 +03:00
|
|
|
}
|
2020-04-30 12:43:25 +03:00
|
|
|
ALWAYS_INLINE StringView(const unsigned char* characters, size_t length)
|
2019-05-28 12:53:16 +03:00
|
|
|
: m_characters((const char*)characters)
|
|
|
|
, m_length(length)
|
|
|
|
{
|
2021-02-23 22:42:32 +03:00
|
|
|
VERIFY(!Checked<uintptr_t>::addition_would_overflow((uintptr_t)characters, length));
|
2019-05-28 12:53:16 +03:00
|
|
|
}
|
2020-05-30 17:06:30 +03:00
|
|
|
ALWAYS_INLINE constexpr StringView(const char* cstring)
|
2019-04-15 15:56:37 +03:00
|
|
|
: m_characters(cstring)
|
2020-03-08 14:34:33 +03:00
|
|
|
, m_length(cstring ? __builtin_strlen(cstring) : 0)
|
2019-04-15 15:56:37 +03:00
|
|
|
{
|
|
|
|
}
|
2020-08-19 12:43:41 +03:00
|
|
|
ALWAYS_INLINE StringView(ReadonlyBytes bytes)
|
|
|
|
: m_characters(reinterpret_cast<const char*>(bytes.data()))
|
|
|
|
, m_length(bytes.size())
|
|
|
|
{
|
|
|
|
}
|
2019-06-29 13:03:28 +03:00
|
|
|
|
|
|
|
StringView(const ByteBuffer&);
|
|
|
|
StringView(const String&);
|
2020-03-22 12:12:55 +03:00
|
|
|
StringView(const FlyString&);
|
2019-04-15 15:56:37 +03:00
|
|
|
|
2021-09-11 00:26:22 +03:00
|
|
|
explicit StringView(ByteBuffer&&) = delete;
|
2021-09-04 17:53:43 +03:00
|
|
|
explicit StringView(String&&) = delete;
|
2021-09-11 11:40:47 +03:00
|
|
|
explicit StringView(FlyString&&) = delete;
|
2021-09-04 17:53:43 +03:00
|
|
|
|
2021-11-06 23:12:16 +03:00
|
|
|
[[nodiscard]] constexpr bool is_null() const { return m_characters == nullptr; }
|
2021-04-22 06:19:39 +03:00
|
|
|
[[nodiscard]] constexpr bool is_empty() const { return m_length == 0; }
|
2020-07-27 15:15:37 +03:00
|
|
|
|
2021-05-14 16:21:50 +03:00
|
|
|
[[nodiscard]] constexpr char const* characters_without_null_termination() const { return m_characters; }
|
2021-04-22 06:19:39 +03:00
|
|
|
[[nodiscard]] constexpr size_t length() const { return m_length; }
|
2020-07-27 15:15:37 +03:00
|
|
|
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] ReadonlyBytes bytes() const { return { m_characters, m_length }; }
|
2020-07-27 15:15:37 +03:00
|
|
|
|
2021-04-22 06:19:39 +03:00
|
|
|
constexpr const char& operator[](size_t index) const { return m_characters[index]; }
|
2020-03-10 11:13:29 +03:00
|
|
|
|
2020-09-06 22:14:08 +03:00
|
|
|
using ConstIterator = SimpleIterator<const StringView, const char>;
|
|
|
|
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] constexpr ConstIterator begin() const { return ConstIterator::begin(*this); }
|
|
|
|
[[nodiscard]] constexpr ConstIterator end() const { return ConstIterator::end(*this); }
|
2019-04-15 15:56:37 +03:00
|
|
|
|
2021-05-14 16:21:50 +03:00
|
|
|
[[nodiscard]] constexpr unsigned hash() const
|
|
|
|
{
|
|
|
|
if (is_empty())
|
|
|
|
return 0;
|
|
|
|
return string_hash(characters_without_null_termination(), length());
|
|
|
|
}
|
2019-08-24 23:31:06 +03:00
|
|
|
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] bool starts_with(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
|
|
|
[[nodiscard]] bool ends_with(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] bool starts_with(char) const;
|
|
|
|
[[nodiscard]] bool ends_with(char) const;
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] bool matches(StringView mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
|
|
|
[[nodiscard]] bool matches(StringView mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] bool contains(char) const;
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] bool contains(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
|
|
|
[[nodiscard]] bool equals_ignoring_case(StringView other) const;
|
2019-09-12 14:13:07 +03:00
|
|
|
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] StringView trim(StringView characters, TrimMode mode = TrimMode::Both) const { return StringUtils::trim(*this, characters, mode); }
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] StringView trim_whitespace(TrimMode mode = TrimMode::Both) const { return StringUtils::trim_whitespace(*this, mode); }
|
2020-09-20 16:35:04 +03:00
|
|
|
|
2021-07-01 14:45:59 +03:00
|
|
|
[[nodiscard]] String to_lowercase_string() const;
|
|
|
|
[[nodiscard]] String to_uppercase_string() const;
|
2021-08-26 20:55:41 +03:00
|
|
|
[[nodiscard]] String to_titlecase_string() const;
|
2021-07-01 14:45:59 +03:00
|
|
|
|
2021-07-01 15:58:37 +03:00
|
|
|
[[nodiscard]] Optional<size_t> find(char needle, size_t start = 0) const { return StringUtils::find(*this, needle, start); }
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] Optional<size_t> find(StringView needle, size_t start = 0) const { return StringUtils::find(*this, needle, start); }
|
2021-07-01 15:58:37 +03:00
|
|
|
[[nodiscard]] Optional<size_t> find_last(char needle) const { return StringUtils::find_last(*this, needle); }
|
2021-11-11 02:55:02 +03:00
|
|
|
// FIXME: Implement find_last(StringView) for API symmetry.
|
2021-01-12 22:58:45 +03:00
|
|
|
|
2021-11-10 13:05:21 +03:00
|
|
|
[[nodiscard]] Vector<size_t> find_all(StringView needle) const;
|
2021-07-01 18:00:34 +03:00
|
|
|
|
2021-07-01 19:12:21 +03:00
|
|
|
using SearchDirection = StringUtils::SearchDirection;
|
2021-10-31 23:53:22 +03:00
|
|
|
[[nodiscard]] Optional<size_t> find_any_of(StringView needles, SearchDirection direction = SearchDirection::Forward) const { return StringUtils::find_any_of(*this, needles, direction); }
|
2021-07-01 16:01:29 +03:00
|
|
|
|
2021-04-22 06:19:39 +03:00
|
|
|
[[nodiscard]] constexpr StringView substring_view(size_t start, size_t length) const
|
|
|
|
{
|
2021-06-27 21:11:38 +03:00
|
|
|
if (!is_constant_evaluated())
|
|
|
|
VERIFY(start + length <= m_length);
|
2021-04-22 06:19:39 +03:00
|
|
|
return { m_characters + start, length };
|
|
|
|
}
|
|
|
|
|
|
|
|
[[nodiscard]] constexpr StringView substring_view(size_t start) const
|
|
|
|
{
|
2021-10-25 00:29:42 +03:00
|
|
|
if (!is_constant_evaluated())
|
|
|
|
VERIFY(start <= length());
|
2021-04-22 06:19:39 +03:00
|
|
|
return substring_view(start, length() - start);
|
|
|
|
}
|
|
|
|
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] Vector<StringView> split_view(char, bool keep_empty = false) const;
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] Vector<StringView> split_view(StringView, bool keep_empty = false) const;
|
2019-08-04 12:44:20 +03:00
|
|
|
|
2021-11-10 13:05:21 +03:00
|
|
|
[[nodiscard]] Vector<StringView> split_view_if(Function<bool(char)> const& predicate, bool keep_empty = false) const;
|
2021-04-12 14:54:22 +03:00
|
|
|
|
2022-01-09 13:26:45 +03:00
|
|
|
template<VoidFunction<StringView> Callback>
|
|
|
|
void for_each_split_view(char separator, bool keep_empty, Callback callback) const
|
|
|
|
{
|
|
|
|
StringView seperator_view { &separator, 1 };
|
|
|
|
for_each_split_view(seperator_view, keep_empty, callback);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<VoidFunction<StringView> Callback>
|
|
|
|
void for_each_split_view(StringView separator, bool keep_empty, Callback callback) const
|
|
|
|
{
|
|
|
|
VERIFY(!separator.is_empty());
|
|
|
|
|
|
|
|
if (is_empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
StringView view { *this };
|
|
|
|
|
|
|
|
auto maybe_separator_index = find(separator);
|
|
|
|
while (maybe_separator_index.has_value()) {
|
|
|
|
auto separator_index = maybe_separator_index.value();
|
|
|
|
auto part_with_separator = view.substring_view(0, separator_index + separator.length());
|
|
|
|
if (keep_empty || separator_index > 0)
|
|
|
|
callback(part_with_separator.substring_view(0, separator_index));
|
|
|
|
view = view.substring_view_starting_after_substring(part_with_separator);
|
|
|
|
maybe_separator_index = view.find(separator);
|
|
|
|
}
|
|
|
|
if (keep_empty || !view.is_empty())
|
|
|
|
callback(view);
|
|
|
|
}
|
|
|
|
|
2019-12-02 15:42:33 +03:00
|
|
|
// Create a Vector of StringViews split by line endings. As of CommonMark
|
|
|
|
// 0.29, the spec defines a line ending as "a newline (U+000A), a carriage
|
|
|
|
// return (U+000D) not followed by a newline, or a carriage return and a
|
|
|
|
// following newline.".
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] Vector<StringView> lines(bool consider_cr = true) const;
|
2019-12-02 15:42:33 +03:00
|
|
|
|
2020-12-10 16:17:30 +03:00
|
|
|
template<typename T = int>
|
|
|
|
Optional<T> to_int() const;
|
|
|
|
template<typename T = unsigned>
|
|
|
|
Optional<T> to_uint() const;
|
2019-04-16 03:39:16 +03:00
|
|
|
|
2019-06-13 16:30:55 +03:00
|
|
|
// Create a new substring view of this string view, starting either at the beginning of
|
|
|
|
// the given substring view, or after its end, and continuing until the end of this string
|
|
|
|
// view (that is, for the remaining part of its length). For example,
|
|
|
|
//
|
|
|
|
// StringView str { "foobar" };
|
|
|
|
// StringView substr = str.substring_view(1, 2); // "oo"
|
|
|
|
// StringView substr_from = str.substring_view_starting_from_substring(subst); // "oobar"
|
|
|
|
// StringView substr_after = str.substring_view_starting_after_substring(subst); // "bar"
|
|
|
|
//
|
|
|
|
// Note that this only works if the string view passed as an argument is indeed a substring
|
|
|
|
// view of this string view, such as one created by substring_view() and split_view(). It
|
|
|
|
// does not work for arbitrary strings; for example declaring substr in the example above as
|
|
|
|
//
|
|
|
|
// StringView substr { "oo" };
|
|
|
|
//
|
|
|
|
// would not work.
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] StringView substring_view_starting_from_substring(StringView substring) const;
|
|
|
|
[[nodiscard]] StringView substring_view_starting_after_substring(StringView substring) const;
|
2019-06-13 16:30:55 +03:00
|
|
|
|
2021-06-27 21:11:38 +03:00
|
|
|
constexpr bool operator==(const char* cstring) const
|
2019-06-07 20:22:58 +03:00
|
|
|
{
|
2019-06-08 19:30:40 +03:00
|
|
|
if (is_null())
|
2021-11-06 23:12:16 +03:00
|
|
|
return cstring == nullptr;
|
2019-06-08 19:30:40 +03:00
|
|
|
if (!cstring)
|
|
|
|
return false;
|
2021-02-23 16:13:57 +03:00
|
|
|
// NOTE: `m_characters` is not guaranteed to be null-terminated, but `cstring` is.
|
|
|
|
const char* cp = cstring;
|
|
|
|
for (size_t i = 0; i < m_length; ++i) {
|
2021-11-06 23:12:16 +03:00
|
|
|
if (*cp == '\0')
|
2021-02-25 00:10:32 +03:00
|
|
|
return false;
|
2021-02-23 16:13:57 +03:00
|
|
|
if (m_characters[i] != *(cp++))
|
|
|
|
return false;
|
|
|
|
}
|
2021-11-06 23:12:16 +03:00
|
|
|
return *cp == '\0';
|
2019-06-07 20:22:58 +03:00
|
|
|
}
|
2021-02-23 16:13:57 +03:00
|
|
|
|
2021-06-27 21:11:38 +03:00
|
|
|
constexpr bool operator!=(const char* cstring) const
|
2019-06-07 20:22:58 +03:00
|
|
|
{
|
|
|
|
return !(*this == cstring);
|
|
|
|
}
|
2019-04-16 03:39:16 +03:00
|
|
|
|
|
|
|
bool operator==(const String&) const;
|
|
|
|
|
2021-11-11 02:55:02 +03:00
|
|
|
constexpr bool operator==(StringView other) const
|
2019-08-15 15:07:23 +03:00
|
|
|
{
|
|
|
|
if (is_null())
|
|
|
|
return other.is_null();
|
|
|
|
if (other.is_null())
|
|
|
|
return false;
|
|
|
|
if (length() != other.length())
|
|
|
|
return false;
|
2021-11-06 23:12:16 +03:00
|
|
|
return __builtin_memcmp(m_characters, other.m_characters, m_length) == 0;
|
2019-08-15 15:07:23 +03:00
|
|
|
}
|
|
|
|
|
2021-11-11 02:55:02 +03:00
|
|
|
constexpr bool operator!=(StringView other) const
|
2019-08-15 15:07:23 +03:00
|
|
|
{
|
|
|
|
return !(*this == other);
|
|
|
|
}
|
|
|
|
|
2021-11-11 02:55:02 +03:00
|
|
|
bool operator<(StringView other) const
|
2020-08-10 04:55:32 +03:00
|
|
|
{
|
|
|
|
if (int c = __builtin_memcmp(m_characters, other.m_characters, min(m_length, other.m_length)))
|
|
|
|
return c < 0;
|
|
|
|
return m_length < other.m_length;
|
|
|
|
}
|
|
|
|
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] String to_string() const;
|
2020-05-06 19:53:05 +03:00
|
|
|
|
2021-04-11 11:30:53 +03:00
|
|
|
[[nodiscard]] bool is_whitespace() const { return StringUtils::is_whitespace(*this); }
|
2021-01-03 02:26:02 +03:00
|
|
|
|
2021-11-11 02:55:02 +03:00
|
|
|
[[nodiscard]] String replace(StringView needle, StringView replacement, bool all_occurrences = false) const;
|
|
|
|
[[nodiscard]] size_t count(StringView needle) const { return StringUtils::count(*this, needle); }
|
2021-09-11 01:02:24 +03:00
|
|
|
|
2021-06-04 12:46:29 +03:00
|
|
|
template<typename... Ts>
|
2021-08-01 02:27:25 +03:00
|
|
|
[[nodiscard]] ALWAYS_INLINE constexpr bool is_one_of(Ts&&... strings) const
|
2020-10-28 00:58:11 +03:00
|
|
|
{
|
2021-06-04 12:46:29 +03:00
|
|
|
return (... || this->operator==(forward<Ts>(strings)));
|
2020-10-28 00:58:11 +03:00
|
|
|
}
|
|
|
|
|
2019-04-15 15:56:37 +03:00
|
|
|
private:
|
2019-06-03 19:27:56 +03:00
|
|
|
friend class String;
|
2019-04-15 15:56:37 +03:00
|
|
|
const char* m_characters { nullptr };
|
2019-12-09 19:45:40 +03:00
|
|
|
size_t m_length { 0 };
|
2019-04-15 15:56:37 +03:00
|
|
|
};
|
2019-04-16 03:39:16 +03:00
|
|
|
|
2020-10-08 20:48:56 +03:00
|
|
|
template<>
|
2021-08-15 09:49:17 +03:00
|
|
|
struct Traits<StringView> : public GenericTraits<StringView> {
|
2021-11-11 02:55:02 +03:00
|
|
|
static unsigned hash(StringView s) { return s.hash(); }
|
2020-10-08 20:48:56 +03:00
|
|
|
};
|
|
|
|
|
2022-01-10 19:47:23 +03:00
|
|
|
struct CaseInsensitiveStringViewTraits : public Traits<StringView> {
|
|
|
|
static unsigned hash(StringView s)
|
|
|
|
{
|
|
|
|
if (s.is_empty())
|
|
|
|
return 0;
|
|
|
|
return case_insensitive_string_hash(s.characters_without_null_termination(), s.length());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-04-16 03:39:16 +03:00
|
|
|
}
|
|
|
|
|
2021-02-24 15:50:00 +03:00
|
|
|
[[nodiscard]] ALWAYS_INLINE constexpr AK::StringView operator"" sv(const char* cstring, size_t length)
|
|
|
|
{
|
|
|
|
return AK::StringView(cstring, length);
|
|
|
|
}
|
|
|
|
|
2022-01-10 19:47:23 +03:00
|
|
|
using AK::CaseInsensitiveStringViewTraits;
|
2019-04-16 03:39:16 +03:00
|
|
|
using AK::StringView;
|