mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-06 02:55:49 +03:00
6e7459322d
Having an alias function that only wraps another one is silly, and keeping the more obvious name should flush out more uses of deprecated strings. No behavior change.
599 lines
19 KiB
C++
599 lines
19 KiB
C++
/*
|
|
* Copyright (c) 2018-2022, Andreas Kling <awesomekling@gmail.com>
|
|
* Copyright (c) 2020, Fei Wu <f.eiwu@yahoo.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/CharacterTypes.h>
|
|
#include <AK/MemMem.h>
|
|
#include <AK/Optional.h>
|
|
#include <AK/String.h>
|
|
#include <AK/StringBuilder.h>
|
|
#include <AK/StringUtils.h>
|
|
#include <AK/StringView.h>
|
|
#include <AK/Vector.h>
|
|
|
|
#ifdef KERNEL
|
|
# include <Kernel/StdLib.h>
|
|
#else
|
|
# include <AK/DeprecatedString.h>
|
|
# include <AK/FloatingPointStringConversions.h>
|
|
# include <string.h>
|
|
#endif
|
|
|
|
namespace AK {
|
|
|
|
namespace StringUtils {
|
|
|
|
bool matches(StringView str, StringView mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
|
|
{
|
|
auto record_span = [&match_spans](size_t start, size_t length) {
|
|
if (match_spans)
|
|
match_spans->append({ start, length });
|
|
};
|
|
|
|
if (str.is_null() || mask.is_null())
|
|
return str.is_null() && mask.is_null();
|
|
|
|
if (mask == "*"sv) {
|
|
record_span(0, str.length());
|
|
return true;
|
|
}
|
|
|
|
char const* string_ptr = str.characters_without_null_termination();
|
|
char const* string_start = str.characters_without_null_termination();
|
|
char const* string_end = string_ptr + str.length();
|
|
char const* mask_ptr = mask.characters_without_null_termination();
|
|
char const* mask_end = mask_ptr + mask.length();
|
|
|
|
while (string_ptr < string_end && mask_ptr < mask_end) {
|
|
auto string_start_ptr = string_ptr;
|
|
switch (*mask_ptr) {
|
|
case '*':
|
|
if (mask_ptr == mask_end - 1) {
|
|
record_span(string_ptr - string_start, string_end - string_ptr);
|
|
return true;
|
|
}
|
|
while (string_ptr < string_end && !matches({ string_ptr, static_cast<size_t>(string_end - string_ptr) }, { mask_ptr + 1, static_cast<size_t>(mask_end - mask_ptr - 1) }, case_sensitivity))
|
|
++string_ptr;
|
|
record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
|
|
--string_ptr;
|
|
break;
|
|
case '?':
|
|
record_span(string_ptr - string_start, 1);
|
|
break;
|
|
case '\\':
|
|
// if backslash is last character in mask, just treat it as an exact match
|
|
// otherwise use it as escape for next character
|
|
if (mask_ptr + 1 < mask_end)
|
|
++mask_ptr;
|
|
[[fallthrough]];
|
|
default:
|
|
auto p = *mask_ptr;
|
|
auto ch = *string_ptr;
|
|
if (case_sensitivity == CaseSensitivity::CaseSensitive ? p != ch : to_ascii_lowercase(p) != to_ascii_lowercase(ch))
|
|
return false;
|
|
break;
|
|
}
|
|
++string_ptr;
|
|
++mask_ptr;
|
|
}
|
|
|
|
if (string_ptr == string_end) {
|
|
// Allow ending '*' to contain nothing.
|
|
while (mask_ptr != mask_end && *mask_ptr == '*') {
|
|
record_span(string_ptr - string_start, 0);
|
|
++mask_ptr;
|
|
}
|
|
}
|
|
|
|
return string_ptr == string_end && mask_ptr == mask_end;
|
|
}
|
|
|
|
template<typename T>
|
|
Optional<T> convert_to_int(StringView str, TrimWhitespace trim_whitespace)
|
|
{
|
|
auto string = trim_whitespace == TrimWhitespace::Yes
|
|
? str.trim_whitespace()
|
|
: str;
|
|
if (string.is_empty())
|
|
return {};
|
|
|
|
T sign = 1;
|
|
size_t i = 0;
|
|
auto const characters = string.characters_without_null_termination();
|
|
|
|
if (characters[0] == '-' || characters[0] == '+') {
|
|
if (string.length() == 1)
|
|
return {};
|
|
i++;
|
|
if (characters[0] == '-')
|
|
sign = -1;
|
|
}
|
|
|
|
T value = 0;
|
|
for (; i < string.length(); i++) {
|
|
if (characters[i] < '0' || characters[i] > '9')
|
|
return {};
|
|
|
|
if (__builtin_mul_overflow(value, 10, &value))
|
|
return {};
|
|
|
|
if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value))
|
|
return {};
|
|
}
|
|
return value;
|
|
}
|
|
|
|
template Optional<i8> convert_to_int(StringView str, TrimWhitespace);
|
|
template Optional<i16> convert_to_int(StringView str, TrimWhitespace);
|
|
template Optional<i32> convert_to_int(StringView str, TrimWhitespace);
|
|
template Optional<long> convert_to_int(StringView str, TrimWhitespace);
|
|
template Optional<long long> convert_to_int(StringView str, TrimWhitespace);
|
|
|
|
template<typename T>
|
|
Optional<T> convert_to_uint(StringView str, TrimWhitespace trim_whitespace)
|
|
{
|
|
auto string = trim_whitespace == TrimWhitespace::Yes
|
|
? str.trim_whitespace()
|
|
: str;
|
|
if (string.is_empty())
|
|
return {};
|
|
|
|
T value = 0;
|
|
auto const characters = string.characters_without_null_termination();
|
|
|
|
for (size_t i = 0; i < string.length(); i++) {
|
|
if (characters[i] < '0' || characters[i] > '9')
|
|
return {};
|
|
|
|
if (__builtin_mul_overflow(value, 10, &value))
|
|
return {};
|
|
|
|
if (__builtin_add_overflow(value, characters[i] - '0', &value))
|
|
return {};
|
|
}
|
|
return value;
|
|
}
|
|
|
|
template Optional<u8> convert_to_uint(StringView str, TrimWhitespace);
|
|
template Optional<u16> convert_to_uint(StringView str, TrimWhitespace);
|
|
template Optional<u32> convert_to_uint(StringView str, TrimWhitespace);
|
|
template Optional<unsigned long> convert_to_uint(StringView str, TrimWhitespace);
|
|
template Optional<unsigned long long> convert_to_uint(StringView str, TrimWhitespace);
|
|
|
|
template<typename T>
|
|
Optional<T> convert_to_uint_from_hex(StringView str, TrimWhitespace trim_whitespace)
|
|
{
|
|
auto string = trim_whitespace == TrimWhitespace::Yes
|
|
? str.trim_whitespace()
|
|
: str;
|
|
if (string.is_empty())
|
|
return {};
|
|
|
|
T value = 0;
|
|
auto const count = string.length();
|
|
const T upper_bound = NumericLimits<T>::max();
|
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
char digit = string[i];
|
|
u8 digit_val;
|
|
if (value > (upper_bound >> 4))
|
|
return {};
|
|
|
|
if (digit >= '0' && digit <= '9') {
|
|
digit_val = digit - '0';
|
|
} else if (digit >= 'a' && digit <= 'f') {
|
|
digit_val = 10 + (digit - 'a');
|
|
} else if (digit >= 'A' && digit <= 'F') {
|
|
digit_val = 10 + (digit - 'A');
|
|
} else {
|
|
return {};
|
|
}
|
|
|
|
value = (value << 4) + digit_val;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
template Optional<u8> convert_to_uint_from_hex(StringView str, TrimWhitespace);
|
|
template Optional<u16> convert_to_uint_from_hex(StringView str, TrimWhitespace);
|
|
template Optional<u32> convert_to_uint_from_hex(StringView str, TrimWhitespace);
|
|
template Optional<u64> convert_to_uint_from_hex(StringView str, TrimWhitespace);
|
|
|
|
template<typename T>
|
|
Optional<T> convert_to_uint_from_octal(StringView str, TrimWhitespace trim_whitespace)
|
|
{
|
|
auto string = trim_whitespace == TrimWhitespace::Yes
|
|
? str.trim_whitespace()
|
|
: str;
|
|
if (string.is_empty())
|
|
return {};
|
|
|
|
T value = 0;
|
|
auto const count = string.length();
|
|
const T upper_bound = NumericLimits<T>::max();
|
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
char digit = string[i];
|
|
u8 digit_val;
|
|
if (value > (upper_bound >> 3))
|
|
return {};
|
|
|
|
if (digit >= '0' && digit <= '7') {
|
|
digit_val = digit - '0';
|
|
} else {
|
|
return {};
|
|
}
|
|
|
|
value = (value << 3) + digit_val;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
template Optional<u8> convert_to_uint_from_octal(StringView str, TrimWhitespace);
|
|
template Optional<u16> convert_to_uint_from_octal(StringView str, TrimWhitespace);
|
|
template Optional<u32> convert_to_uint_from_octal(StringView str, TrimWhitespace);
|
|
template Optional<u64> convert_to_uint_from_octal(StringView str, TrimWhitespace);
|
|
|
|
#ifndef KERNEL
|
|
template<typename T>
|
|
Optional<T> convert_to_floating_point(StringView str, TrimWhitespace trim_whitespace)
|
|
{
|
|
static_assert(IsSame<T, double> || IsSame<T, float>);
|
|
auto string = trim_whitespace == TrimWhitespace::Yes
|
|
? str.trim_whitespace()
|
|
: str;
|
|
|
|
char const* start = string.characters_without_null_termination();
|
|
return parse_floating_point_completely<T>(start, start + str.length());
|
|
}
|
|
|
|
template Optional<double> convert_to_floating_point(StringView str, TrimWhitespace);
|
|
template Optional<float> convert_to_floating_point(StringView str, TrimWhitespace);
|
|
#endif
|
|
|
|
bool equals_ignoring_case(StringView a, StringView b)
|
|
{
|
|
if (a.length() != b.length())
|
|
return false;
|
|
for (size_t i = 0; i < a.length(); ++i) {
|
|
if (to_ascii_lowercase(a.characters_without_null_termination()[i]) != to_ascii_lowercase(b.characters_without_null_termination()[i]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool ends_with(StringView str, StringView end, CaseSensitivity case_sensitivity)
|
|
{
|
|
if (end.is_empty())
|
|
return true;
|
|
if (str.is_empty())
|
|
return false;
|
|
if (end.length() > str.length())
|
|
return false;
|
|
|
|
if (case_sensitivity == CaseSensitivity::CaseSensitive)
|
|
return !memcmp(str.characters_without_null_termination() + (str.length() - end.length()), end.characters_without_null_termination(), end.length());
|
|
|
|
auto str_chars = str.characters_without_null_termination();
|
|
auto end_chars = end.characters_without_null_termination();
|
|
|
|
size_t si = str.length() - end.length();
|
|
for (size_t ei = 0; ei < end.length(); ++si, ++ei) {
|
|
if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(end_chars[ei]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool starts_with(StringView str, StringView start, CaseSensitivity case_sensitivity)
|
|
{
|
|
if (start.is_empty())
|
|
return true;
|
|
if (str.is_empty())
|
|
return false;
|
|
if (start.length() > str.length())
|
|
return false;
|
|
if (str.characters_without_null_termination() == start.characters_without_null_termination())
|
|
return true;
|
|
|
|
if (case_sensitivity == CaseSensitivity::CaseSensitive)
|
|
return !memcmp(str.characters_without_null_termination(), start.characters_without_null_termination(), start.length());
|
|
|
|
auto str_chars = str.characters_without_null_termination();
|
|
auto start_chars = start.characters_without_null_termination();
|
|
|
|
size_t si = 0;
|
|
for (size_t starti = 0; starti < start.length(); ++si, ++starti) {
|
|
if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(start_chars[starti]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool contains(StringView str, StringView needle, CaseSensitivity case_sensitivity)
|
|
{
|
|
if (str.is_null() || needle.is_null() || str.is_empty() || needle.length() > str.length())
|
|
return false;
|
|
if (needle.is_empty())
|
|
return true;
|
|
auto str_chars = str.characters_without_null_termination();
|
|
auto needle_chars = needle.characters_without_null_termination();
|
|
if (case_sensitivity == CaseSensitivity::CaseSensitive)
|
|
return memmem(str_chars, str.length(), needle_chars, needle.length()) != nullptr;
|
|
|
|
auto needle_first = to_ascii_lowercase(needle_chars[0]);
|
|
for (size_t si = 0; si < str.length(); si++) {
|
|
if (to_ascii_lowercase(str_chars[si]) != needle_first)
|
|
continue;
|
|
for (size_t ni = 0; si + ni < str.length(); ni++) {
|
|
if (to_ascii_lowercase(str_chars[si + ni]) != to_ascii_lowercase(needle_chars[ni])) {
|
|
if (ni > 0)
|
|
si += ni - 1;
|
|
break;
|
|
}
|
|
if (ni + 1 == needle.length())
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool is_whitespace(StringView str)
|
|
{
|
|
return all_of(str, is_ascii_space);
|
|
}
|
|
|
|
StringView trim(StringView str, StringView characters, TrimMode mode)
|
|
{
|
|
size_t substring_start = 0;
|
|
size_t substring_length = str.length();
|
|
|
|
if (mode == TrimMode::Left || mode == TrimMode::Both) {
|
|
for (size_t i = 0; i < str.length(); ++i) {
|
|
if (substring_length == 0)
|
|
return ""sv;
|
|
if (!characters.contains(str[i]))
|
|
break;
|
|
++substring_start;
|
|
--substring_length;
|
|
}
|
|
}
|
|
|
|
if (mode == TrimMode::Right || mode == TrimMode::Both) {
|
|
for (size_t i = str.length(); i > 0; --i) {
|
|
if (substring_length == 0)
|
|
return ""sv;
|
|
if (!characters.contains(str[i - 1]))
|
|
break;
|
|
--substring_length;
|
|
}
|
|
}
|
|
|
|
return str.substring_view(substring_start, substring_length);
|
|
}
|
|
|
|
StringView trim_whitespace(StringView str, TrimMode mode)
|
|
{
|
|
return trim(str, " \n\t\v\f\r"sv, mode);
|
|
}
|
|
|
|
Optional<size_t> find(StringView haystack, char needle, size_t start)
|
|
{
|
|
if (start >= haystack.length())
|
|
return {};
|
|
for (size_t i = start; i < haystack.length(); ++i) {
|
|
if (haystack[i] == needle)
|
|
return i;
|
|
}
|
|
return {};
|
|
}
|
|
|
|
Optional<size_t> find(StringView haystack, StringView needle, size_t start)
|
|
{
|
|
if (start > haystack.length())
|
|
return {};
|
|
auto index = AK::memmem_optional(
|
|
haystack.characters_without_null_termination() + start, haystack.length() - start,
|
|
needle.characters_without_null_termination(), needle.length());
|
|
return index.has_value() ? (*index + start) : index;
|
|
}
|
|
|
|
Optional<size_t> find_last(StringView haystack, char needle)
|
|
{
|
|
for (size_t i = haystack.length(); i > 0; --i) {
|
|
if (haystack[i - 1] == needle)
|
|
return i - 1;
|
|
}
|
|
return {};
|
|
}
|
|
|
|
Optional<size_t> find_last(StringView haystack, StringView needle)
|
|
{
|
|
for (size_t i = haystack.length(); i > 0; --i) {
|
|
auto value = StringUtils::find(haystack, needle, i - 1);
|
|
if (value.has_value())
|
|
return value;
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
Optional<size_t> find_last_not(StringView haystack, char needle)
|
|
{
|
|
for (size_t i = haystack.length(); i > 0; --i) {
|
|
if (haystack[i - 1] != needle)
|
|
return i - 1;
|
|
}
|
|
return {};
|
|
}
|
|
|
|
Vector<size_t> find_all(StringView haystack, StringView needle)
|
|
{
|
|
Vector<size_t> positions;
|
|
size_t current_position = 0;
|
|
while (current_position <= haystack.length()) {
|
|
auto maybe_position = AK::memmem_optional(
|
|
haystack.characters_without_null_termination() + current_position, haystack.length() - current_position,
|
|
needle.characters_without_null_termination(), needle.length());
|
|
if (!maybe_position.has_value())
|
|
break;
|
|
positions.append(current_position + *maybe_position);
|
|
current_position += *maybe_position + 1;
|
|
}
|
|
return positions;
|
|
}
|
|
|
|
Optional<size_t> find_any_of(StringView haystack, StringView needles, SearchDirection direction)
|
|
{
|
|
if (haystack.is_empty() || needles.is_empty())
|
|
return {};
|
|
if (direction == SearchDirection::Forward) {
|
|
for (size_t i = 0; i < haystack.length(); ++i) {
|
|
if (needles.contains(haystack[i]))
|
|
return i;
|
|
}
|
|
} else if (direction == SearchDirection::Backward) {
|
|
for (size_t i = haystack.length(); i > 0; --i) {
|
|
if (needles.contains(haystack[i - 1]))
|
|
return i - 1;
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
#ifndef KERNEL
|
|
DeprecatedString to_snakecase(StringView str)
|
|
{
|
|
auto should_insert_underscore = [&](auto i, auto current_char) {
|
|
if (i == 0)
|
|
return false;
|
|
auto previous_ch = str[i - 1];
|
|
if (is_ascii_lower_alpha(previous_ch) && is_ascii_upper_alpha(current_char))
|
|
return true;
|
|
if (i >= str.length() - 1)
|
|
return false;
|
|
auto next_ch = str[i + 1];
|
|
if (is_ascii_upper_alpha(current_char) && is_ascii_lower_alpha(next_ch))
|
|
return true;
|
|
return false;
|
|
};
|
|
|
|
StringBuilder builder;
|
|
for (size_t i = 0; i < str.length(); ++i) {
|
|
auto ch = str[i];
|
|
if (should_insert_underscore(i, ch))
|
|
builder.append('_');
|
|
builder.append_as_lowercase(ch);
|
|
}
|
|
return builder.to_deprecated_string();
|
|
}
|
|
|
|
DeprecatedString to_titlecase(StringView str)
|
|
{
|
|
StringBuilder builder;
|
|
bool next_is_upper = true;
|
|
|
|
for (auto ch : str) {
|
|
if (next_is_upper)
|
|
builder.append(to_ascii_uppercase(ch));
|
|
else
|
|
builder.append(to_ascii_lowercase(ch));
|
|
next_is_upper = ch == ' ';
|
|
}
|
|
|
|
return builder.to_deprecated_string();
|
|
}
|
|
|
|
DeprecatedString invert_case(StringView str)
|
|
{
|
|
StringBuilder builder(str.length());
|
|
|
|
for (auto ch : str) {
|
|
if (is_ascii_lower_alpha(ch))
|
|
builder.append(to_ascii_uppercase(ch));
|
|
else
|
|
builder.append(to_ascii_lowercase(ch));
|
|
}
|
|
|
|
return builder.to_deprecated_string();
|
|
}
|
|
|
|
DeprecatedString replace(StringView str, StringView needle, StringView replacement, ReplaceMode replace_mode)
|
|
{
|
|
if (str.is_empty())
|
|
return str;
|
|
|
|
Vector<size_t> positions;
|
|
if (replace_mode == ReplaceMode::All) {
|
|
positions = str.find_all(needle);
|
|
if (!positions.size())
|
|
return str;
|
|
} else {
|
|
auto pos = str.find(needle);
|
|
if (!pos.has_value())
|
|
return str;
|
|
positions.append(pos.value());
|
|
}
|
|
|
|
StringBuilder replaced_string;
|
|
size_t last_position = 0;
|
|
for (auto& position : positions) {
|
|
replaced_string.append(str.substring_view(last_position, position - last_position));
|
|
replaced_string.append(replacement);
|
|
last_position = position + needle.length();
|
|
}
|
|
replaced_string.append(str.substring_view(last_position, str.length() - last_position));
|
|
return replaced_string.to_deprecated_string();
|
|
}
|
|
|
|
ErrorOr<String> replace(String const& haystack, StringView needle, StringView replacement, ReplaceMode replace_mode)
|
|
{
|
|
if (haystack.is_empty())
|
|
return haystack;
|
|
|
|
// FIXME: Propagate Vector allocation failures (or do this without putting positions in a vector)
|
|
Vector<size_t> positions;
|
|
if (replace_mode == ReplaceMode::All) {
|
|
positions = haystack.bytes_as_string_view().find_all(needle);
|
|
if (!positions.size())
|
|
return haystack;
|
|
} else {
|
|
auto pos = haystack.bytes_as_string_view().find(needle);
|
|
if (!pos.has_value())
|
|
return haystack;
|
|
positions.append(pos.value());
|
|
}
|
|
|
|
StringBuilder replaced_string;
|
|
size_t last_position = 0;
|
|
for (auto& position : positions) {
|
|
replaced_string.append(haystack.bytes_as_string_view().substring_view(last_position, position - last_position));
|
|
replaced_string.append(replacement);
|
|
last_position = position + needle.length();
|
|
}
|
|
replaced_string.append(haystack.bytes_as_string_view().substring_view(last_position, haystack.bytes_as_string_view().length() - last_position));
|
|
return replaced_string.to_string();
|
|
}
|
|
#endif
|
|
|
|
// TODO: Benchmark against KMP (AK/MemMem.h) and switch over if it's faster for short strings too
|
|
size_t count(StringView str, StringView needle)
|
|
{
|
|
if (needle.is_empty())
|
|
return str.length();
|
|
|
|
size_t count = 0;
|
|
for (size_t i = 0; i < str.length() - needle.length() + 1; ++i) {
|
|
if (str.substring_view(i).starts_with(needle))
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
}
|
|
|
|
}
|