2020-08-21 02:17:15 +03:00
|
|
|
/*
|
2022-01-29 20:40:19 +03:00
|
|
|
* Copyright (c) 2020-2022, the SerenityOS developers.
|
2020-08-21 02:17:15 +03:00
|
|
|
*
|
2021-04-22 11:24:48 +03:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-08-21 02:17:15 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2020-12-05 03:59:09 +03:00
|
|
|
#include <AK/Array.h>
|
2020-08-21 02:17:15 +03:00
|
|
|
#include <AK/Assertions.h>
|
2020-12-05 03:59:09 +03:00
|
|
|
#include <AK/Span.h>
|
2020-08-21 02:17:15 +03:00
|
|
|
#include <AK/Types.h>
|
2020-12-05 03:59:09 +03:00
|
|
|
#include <AK/Vector.h>
|
2020-08-21 02:17:15 +03:00
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
2021-11-21 00:09:22 +03:00
|
|
|
namespace Detail {
|
2022-04-01 20:58:27 +03:00
|
|
|
constexpr void const* bitap_bitwise(void const* haystack, size_t haystack_length, void const* needle, size_t needle_length)
|
2020-08-21 02:17:15 +03:00
|
|
|
{
|
2021-02-23 22:42:32 +03:00
|
|
|
VERIFY(needle_length < 32);
|
2020-08-21 02:17:15 +03:00
|
|
|
|
2022-07-14 05:53:36 +03:00
|
|
|
u32 lookup = 0xfffffffe;
|
2020-08-21 02:17:15 +03:00
|
|
|
|
|
|
|
constexpr size_t mask_length = (size_t)((u8)-1) + 1;
|
2022-07-14 05:53:36 +03:00
|
|
|
u32 needle_mask[mask_length];
|
2020-08-21 02:17:15 +03:00
|
|
|
|
|
|
|
for (size_t i = 0; i < mask_length; ++i)
|
|
|
|
needle_mask[i] = 0xffffffff;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < needle_length; ++i)
|
2022-04-01 20:58:27 +03:00
|
|
|
needle_mask[((u8 const*)needle)[i]] &= ~(0x00000001 << i);
|
2020-08-21 02:17:15 +03:00
|
|
|
|
|
|
|
for (size_t i = 0; i < haystack_length; ++i) {
|
2022-04-01 20:58:27 +03:00
|
|
|
lookup |= needle_mask[((u8 const*)haystack)[i]];
|
2020-08-21 02:17:15 +03:00
|
|
|
lookup <<= 1;
|
|
|
|
|
2021-11-21 00:09:22 +03:00
|
|
|
if (0 == (lookup & (0x00000001 << needle_length)))
|
2022-04-01 20:58:27 +03:00
|
|
|
return ((u8 const*)haystack) + i - needle_length + 1;
|
2020-08-21 02:17:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-05 03:59:09 +03:00
|
|
|
template<typename HaystackIterT>
|
2023-02-05 22:02:54 +03:00
|
|
|
inline Optional<size_t> memmem(HaystackIterT const& haystack_begin, HaystackIterT const& haystack_end, ReadonlyBytes needle)
|
2022-10-17 01:06:11 +03:00
|
|
|
requires(requires { (*haystack_begin).data(); (*haystack_begin).size(); })
|
2020-12-05 03:59:09 +03:00
|
|
|
{
|
|
|
|
auto prepare_kmp_partial_table = [&] {
|
|
|
|
Vector<int, 64> table;
|
|
|
|
table.resize(needle.size());
|
|
|
|
|
|
|
|
size_t position = 1;
|
|
|
|
int candidate = 0;
|
|
|
|
|
|
|
|
table[0] = -1;
|
|
|
|
while (position < needle.size()) {
|
|
|
|
if (needle[position] == needle[candidate]) {
|
|
|
|
table[position] = table[candidate];
|
|
|
|
} else {
|
|
|
|
table[position] = candidate;
|
|
|
|
do {
|
|
|
|
candidate = table[candidate];
|
|
|
|
} while (candidate >= 0 && needle[candidate] != needle[position]);
|
|
|
|
}
|
|
|
|
++position;
|
|
|
|
++candidate;
|
|
|
|
}
|
|
|
|
return table;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto table = prepare_kmp_partial_table();
|
|
|
|
size_t total_haystack_index = 0;
|
|
|
|
size_t current_haystack_index = 0;
|
|
|
|
int needle_index = 0;
|
|
|
|
auto haystack_it = haystack_begin;
|
|
|
|
|
|
|
|
while (haystack_it != haystack_end) {
|
|
|
|
auto&& chunk = *haystack_it;
|
|
|
|
if (current_haystack_index >= chunk.size()) {
|
|
|
|
current_haystack_index = 0;
|
|
|
|
++haystack_it;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (needle[needle_index] == chunk[current_haystack_index]) {
|
|
|
|
++needle_index;
|
|
|
|
++current_haystack_index;
|
|
|
|
++total_haystack_index;
|
|
|
|
if ((size_t)needle_index == needle.size())
|
|
|
|
return total_haystack_index - needle_index;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
needle_index = table[needle_index];
|
|
|
|
if (needle_index < 0) {
|
|
|
|
++needle_index;
|
|
|
|
++current_haystack_index;
|
|
|
|
++total_haystack_index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2022-04-01 20:58:27 +03:00
|
|
|
inline Optional<size_t> memmem_optional(void const* haystack, size_t haystack_length, void const* needle, size_t needle_length)
|
2020-08-21 02:17:15 +03:00
|
|
|
{
|
|
|
|
if (needle_length == 0)
|
2021-01-12 22:58:45 +03:00
|
|
|
return 0;
|
2020-08-21 02:17:15 +03:00
|
|
|
|
|
|
|
if (haystack_length < needle_length)
|
2021-01-12 22:58:45 +03:00
|
|
|
return {};
|
2020-08-21 02:17:15 +03:00
|
|
|
|
2021-01-12 22:58:45 +03:00
|
|
|
if (haystack_length == needle_length) {
|
|
|
|
if (__builtin_memcmp(haystack, needle, haystack_length) == 0)
|
|
|
|
return 0;
|
|
|
|
return {};
|
|
|
|
}
|
2020-08-21 02:17:15 +03:00
|
|
|
|
2021-01-12 22:58:45 +03:00
|
|
|
if (needle_length < 32) {
|
2021-11-21 00:09:22 +03:00
|
|
|
auto const* ptr = Detail::bitap_bitwise(haystack, haystack_length, needle, needle_length);
|
2021-01-12 22:58:45 +03:00
|
|
|
if (ptr)
|
|
|
|
return static_cast<size_t>((FlatPtr)ptr - (FlatPtr)haystack);
|
|
|
|
return {};
|
|
|
|
}
|
2020-08-21 02:17:15 +03:00
|
|
|
|
2020-12-05 03:59:09 +03:00
|
|
|
// Fallback to KMP.
|
2023-02-05 22:02:54 +03:00
|
|
|
Array<ReadonlyBytes, 1> spans { ReadonlyBytes { (u8 const*)haystack, haystack_length } };
|
2022-04-01 20:58:27 +03:00
|
|
|
return memmem(spans.begin(), spans.end(), { (u8 const*)needle, needle_length });
|
2021-01-12 22:58:45 +03:00
|
|
|
}
|
2020-08-21 02:17:15 +03:00
|
|
|
|
2022-04-01 20:58:27 +03:00
|
|
|
inline void const* memmem(void const* haystack, size_t haystack_length, void const* needle, size_t needle_length)
|
2021-01-12 22:58:45 +03:00
|
|
|
{
|
|
|
|
auto offset = memmem_optional(haystack, haystack_length, needle, needle_length);
|
|
|
|
if (offset.has_value())
|
2022-04-01 20:58:27 +03:00
|
|
|
return ((u8 const*)haystack) + offset.value();
|
2020-12-05 03:59:09 +03:00
|
|
|
|
2021-01-12 22:58:45 +03:00
|
|
|
return nullptr;
|
2020-08-21 02:17:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|