mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-09-19 17:27:51 +03:00
LibRegex: Avoid excessive Vector copy when compiling regexps
Previously we would've copied the bytecode instead of moving the chunks around, use the fancy new DisjointChunks<T> abstraction to make that happen automagically. This decreases vector copies and uses of memmove() by nearly 10x :^)
This commit is contained in:
parent
7e75a16e6f
commit
bf0315ff8f
Notes:
sideshowbarker
2024-07-18 03:58:34 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/bf0315ff8f7 Pull-request: https://github.com/SerenityOS/serenity/pull/10019
@ -10,6 +10,7 @@
|
||||
#include "RegexMatch.h"
|
||||
#include "RegexOptions.h"
|
||||
|
||||
#include <AK/DisjointChunks.h>
|
||||
#include <AK/Format.h>
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/HashMap.h>
|
||||
@ -139,7 +140,9 @@ struct CompareTypeAndValuePair {
|
||||
|
||||
class OpCode;
|
||||
|
||||
class ByteCode : public Vector<ByteCodeValueType> {
|
||||
class ByteCode : public DisjointChunks<ByteCodeValueType> {
|
||||
using Base = DisjointChunks<ByteCodeValueType>;
|
||||
|
||||
public:
|
||||
ByteCode()
|
||||
{
|
||||
@ -150,6 +153,36 @@ public:
|
||||
virtual ~ByteCode() = default;
|
||||
|
||||
ByteCode& operator=(ByteCode&&) = default;
|
||||
ByteCode& operator=(Base&& value)
|
||||
{
|
||||
static_cast<Base&>(*this) = move(value);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
void empend(Args&&... args)
|
||||
{
|
||||
if (is_empty())
|
||||
Base::append({});
|
||||
Base::last_chunk().empend(forward<Args>(args)...);
|
||||
}
|
||||
template<typename T>
|
||||
void append(T&& value)
|
||||
{
|
||||
if (is_empty())
|
||||
Base::append({});
|
||||
Base::last_chunk().append(forward<T>(value));
|
||||
}
|
||||
template<typename T>
|
||||
void prepend(T&& value)
|
||||
{
|
||||
if (is_empty())
|
||||
return append(forward<T>(value));
|
||||
Base::first_chunk().prepend(forward<T>(value));
|
||||
}
|
||||
|
||||
void last_chunk() const = delete;
|
||||
void first_chunk() const = delete;
|
||||
|
||||
void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs)
|
||||
{
|
||||
@ -309,7 +342,7 @@ public:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
void insert_bytecode_alternation(ByteCode left, ByteCode right)
|
||||
void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
|
||||
{
|
||||
|
||||
// FORKJUMP _ALT
|
||||
@ -320,7 +353,7 @@ public:
|
||||
// LABEL _END
|
||||
|
||||
// Optimisation: Eliminate extra work by unifying common pre-and-postfix exprs.
|
||||
Optimizer::append_alternation(*this, left, right);
|
||||
Optimizer::append_alternation(*this, move(left), move(right));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@ -476,8 +509,7 @@ public:
|
||||
|
||||
bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
|
||||
|
||||
for (auto& op : bytecode_to_repeat)
|
||||
bytecode.append(move(op));
|
||||
bytecode.extend(move(bytecode_to_repeat));
|
||||
// LABEL _END = bytecode.size()
|
||||
|
||||
bytecode_to_repeat = move(bytecode);
|
||||
|
@ -12,7 +12,7 @@ namespace regex {
|
||||
|
||||
class Optimizer {
|
||||
public:
|
||||
static void append_alternation(ByteCode& target, ByteCode& left, ByteCode& right);
|
||||
static void append_alternation(ByteCode& target, ByteCode&& left, ByteCode&& right);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -20,6 +20,8 @@ void Regex<Parser>::run_optimization_passes()
|
||||
// Rewrite fork loops as atomic groups
|
||||
// e.g. a*b -> (ATOMIC a*)b
|
||||
attempt_rewrite_loops_as_atomic_groups(split_basic_blocks());
|
||||
|
||||
parser_result.bytecode.flatten();
|
||||
}
|
||||
|
||||
template<typename Parser>
|
||||
@ -413,7 +415,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
||||
}
|
||||
}
|
||||
|
||||
void Optimizer::append_alternation(ByteCode& target, ByteCode& left, ByteCode& right)
|
||||
void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&& right)
|
||||
{
|
||||
if (left.is_empty()) {
|
||||
target.extend(right);
|
||||
@ -433,7 +435,7 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode& left, ByteCode& r
|
||||
if (left_size != right_size)
|
||||
break;
|
||||
|
||||
if (left.span().slice(state.instruction_position, left_size) == right.span().slice(state.instruction_position, right_size))
|
||||
if (left.spans().slice(state.instruction_position, left_size) == right.spans().slice(state.instruction_position, right_size))
|
||||
left_skip = state.instruction_position + left_size;
|
||||
else
|
||||
break;
|
||||
@ -441,34 +443,30 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode& left, ByteCode& r
|
||||
state.instruction_position += left_size;
|
||||
}
|
||||
|
||||
// FIXME: Implement postfix unification too.
|
||||
size_t right_skip = 0;
|
||||
dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}/{}", left_skip, 0, left.size(), right.size());
|
||||
|
||||
if (left_skip)
|
||||
target.append(left.data(), left_skip);
|
||||
if (left_skip) {
|
||||
target.extend(left.release_slice(0, left_skip));
|
||||
right = right.release_slice(left_skip);
|
||||
}
|
||||
|
||||
dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}/{}", left_skip, right_skip, left.size(), right.size());
|
||||
|
||||
auto left_slice = left.span().slice(left_skip, left.size() - left_skip - right_skip);
|
||||
auto right_slice = right.span().slice(left_skip, right.size() - left_skip - right_skip);
|
||||
auto left_size = left.size();
|
||||
|
||||
target.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||
target.empend(right_slice.size() + 2); // Jump to the _ALT label
|
||||
target.empend(right.size() + (left_size ? 2 : 0)); // Jump to the _ALT label
|
||||
|
||||
target.append(right_slice.data(), right_slice.size());
|
||||
target.extend(move(right));
|
||||
|
||||
if (!left_slice.is_empty()) {
|
||||
if (left_size != 0) {
|
||||
target.empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
|
||||
target.empend(left_slice.size()); // Jump to the _END label
|
||||
target.empend(left.size()); // Jump to the _END label
|
||||
}
|
||||
|
||||
// LABEL _ALT = bytecode.size() + 2
|
||||
|
||||
target.append(left_slice.data(), left_slice.size());
|
||||
target.extend(move(left));
|
||||
|
||||
// LABEL _END = alterantive_bytecode.size
|
||||
if (right_skip)
|
||||
target.append(left.span().slice_from_end(right_skip).data(), right_skip);
|
||||
}
|
||||
|
||||
template void Regex<PosixBasicParser>::run_optimization_passes();
|
||||
|
Loading…
Reference in New Issue
Block a user