mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-09-20 01:37:39 +03:00
LibRegex: Assign unique serial IDs to checkpoints
This makes the compiler assign a serial ID to each checkpoint instead of using the IP as the identifier. This will be used in a future commit to replace the backing store of checkpoints with a vector.
This commit is contained in:
parent
06573cd46d
commit
2d6f50932b
Notes:
sideshowbarker
2024-07-16 23:51:07 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/2d6f50932b Pull-request: https://github.com/SerenityOS/serenity/pull/20014
@ -160,6 +160,7 @@ static bool restore_string_position(MatchInput const& input, MatchState& state)
|
|||||||
|
|
||||||
OwnPtr<OpCode> ByteCode::s_opcodes[(size_t)OpCodeId::Last + 1];
|
OwnPtr<OpCode> ByteCode::s_opcodes[(size_t)OpCodeId::Last + 1];
|
||||||
bool ByteCode::s_opcodes_initialized { false };
|
bool ByteCode::s_opcodes_initialized { false };
|
||||||
|
size_t ByteCode::s_next_checkpoint_serial_id { 0 };
|
||||||
|
|
||||||
void ByteCode::ensure_opcodes_initialized()
|
void ByteCode::ensure_opcodes_initialized()
|
||||||
{
|
{
|
||||||
@ -1063,15 +1064,14 @@ ALWAYS_INLINE ExecutionResult OpCode_ResetRepeat::execute(MatchInput const&, Mat
|
|||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_Checkpoint::execute(MatchInput const& input, MatchState& state) const
|
ALWAYS_INLINE ExecutionResult OpCode_Checkpoint::execute(MatchInput const& input, MatchState& state) const
|
||||||
{
|
{
|
||||||
input.checkpoints.set(state.instruction_position, state.string_position);
|
input.checkpoints.set(id(), state.string_position);
|
||||||
return ExecutionResult::Continue;
|
return ExecutionResult::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_JumpNonEmpty::execute(MatchInput const& input, MatchState& state) const
|
ALWAYS_INLINE ExecutionResult OpCode_JumpNonEmpty::execute(MatchInput const& input, MatchState& state) const
|
||||||
{
|
{
|
||||||
u64 current_position = state.string_position;
|
u64 current_position = state.string_position;
|
||||||
auto checkpoint_ip = state.instruction_position + size() + checkpoint();
|
auto checkpoint_position = input.checkpoints.find(checkpoint());
|
||||||
auto checkpoint_position = input.checkpoints.find(checkpoint_ip);
|
|
||||||
|
|
||||||
if (checkpoint_position != input.checkpoints.end() && checkpoint_position->value != current_position) {
|
if (checkpoint_position != input.checkpoints.end() && checkpoint_position->value != current_position) {
|
||||||
auto form = this->form();
|
auto form = this->form();
|
||||||
|
@ -397,12 +397,14 @@ public:
|
|||||||
// JUMP_NONEMPTY _C _START FORK
|
// JUMP_NONEMPTY _C _START FORK
|
||||||
|
|
||||||
// Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
|
// Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
|
||||||
|
auto checkpoint = s_next_checkpoint_serial_id++;
|
||||||
new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
|
new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
|
||||||
|
new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint);
|
||||||
|
|
||||||
auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
|
auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
|
||||||
new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
|
new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
|
||||||
new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 1); // Jump to the last iteration
|
new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration
|
||||||
new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 1); // if _C is not empty.
|
new_bytecode.empend(checkpoint); // if _C is not empty.
|
||||||
new_bytecode.empend(jump_kind);
|
new_bytecode.empend(jump_kind);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -440,11 +442,13 @@ public:
|
|||||||
// REGEXP
|
// REGEXP
|
||||||
// JUMP_NONEMPTY _C _START FORKSTAY (FORKJUMP -> Greedy)
|
// JUMP_NONEMPTY _C _START FORKSTAY (FORKJUMP -> Greedy)
|
||||||
|
|
||||||
|
auto checkpoint = s_next_checkpoint_serial_id++;
|
||||||
|
bytecode_to_repeat.prepend((ByteCodeValueType)checkpoint);
|
||||||
bytecode_to_repeat.prepend((ByteCodeValueType)OpCodeId::Checkpoint);
|
bytecode_to_repeat.prepend((ByteCodeValueType)OpCodeId::Checkpoint);
|
||||||
|
|
||||||
bytecode_to_repeat.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
|
bytecode_to_repeat.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
|
||||||
bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 3); // Jump to the _START label...
|
bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 3); // Jump to the _START label...
|
||||||
bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 2); // ...if _C is not empty
|
bytecode_to_repeat.empend(checkpoint); // ...if _C is not empty
|
||||||
|
|
||||||
if (greedy)
|
if (greedy)
|
||||||
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||||
@ -469,16 +473,17 @@ public:
|
|||||||
else
|
else
|
||||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
||||||
|
|
||||||
bytecode.empend(bytecode_to_repeat.size() + 1 + 4); // Jump to the _END label
|
bytecode.empend(bytecode_to_repeat.size() + 2 + 4); // Jump to the _END label
|
||||||
|
|
||||||
auto c_label = bytecode.size();
|
auto checkpoint = s_next_checkpoint_serial_id++;
|
||||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Checkpoint));
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Checkpoint));
|
||||||
|
bytecode.empend(static_cast<ByteCodeValueType>(checkpoint));
|
||||||
|
|
||||||
bytecode.extend(bytecode_to_repeat);
|
bytecode.extend(bytecode_to_repeat);
|
||||||
|
|
||||||
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::JumpNonEmpty));
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::JumpNonEmpty));
|
||||||
bytecode.empend(-bytecode.size() - 3); // Jump(...) to the _START label...
|
bytecode.empend(-bytecode.size() - 3); // Jump(...) to the _START label...
|
||||||
bytecode.empend(c_label - bytecode.size() - 2); // ...only if _C passes.
|
bytecode.empend(checkpoint); // ...only if _C passes.
|
||||||
bytecode.empend((ByteCodeValueType)OpCodeId::Jump);
|
bytecode.empend((ByteCodeValueType)OpCodeId::Jump);
|
||||||
// LABEL _END = bytecode.size()
|
// LABEL _END = bytecode.size()
|
||||||
|
|
||||||
@ -507,6 +512,8 @@ public:
|
|||||||
|
|
||||||
OpCode& get_opcode(MatchState& state) const;
|
OpCode& get_opcode(MatchState& state) const;
|
||||||
|
|
||||||
|
static void reset_checkpoint_serial_id() { s_next_checkpoint_serial_id = 0; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void insert_string(StringView view)
|
void insert_string(StringView view)
|
||||||
{
|
{
|
||||||
@ -519,6 +526,7 @@ private:
|
|||||||
ALWAYS_INLINE OpCode& get_opcode_by_id(OpCodeId id) const;
|
ALWAYS_INLINE OpCode& get_opcode_by_id(OpCodeId id) const;
|
||||||
static OwnPtr<OpCode> s_opcodes[(size_t)OpCodeId::Last + 1];
|
static OwnPtr<OpCode> s_opcodes[(size_t)OpCodeId::Last + 1];
|
||||||
static bool s_opcodes_initialized;
|
static bool s_opcodes_initialized;
|
||||||
|
static size_t s_next_checkpoint_serial_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ENUMERATE_EXECUTION_RESULTS \
|
#define ENUMERATE_EXECUTION_RESULTS \
|
||||||
@ -793,8 +801,9 @@ class OpCode_Checkpoint final : public OpCode {
|
|||||||
public:
|
public:
|
||||||
ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
|
ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
|
||||||
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Checkpoint; }
|
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Checkpoint; }
|
||||||
ALWAYS_INLINE size_t size() const override { return 1; }
|
ALWAYS_INLINE size_t size() const override { return 2; }
|
||||||
DeprecatedString arguments_string() const override { return DeprecatedString::empty(); }
|
ALWAYS_INLINE size_t id() const { return argument(0); }
|
||||||
|
DeprecatedString arguments_string() const override { return DeprecatedString::formatted("id={}", id()); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class OpCode_JumpNonEmpty final : public OpCode {
|
class OpCode_JumpNonEmpty final : public OpCode {
|
||||||
@ -807,10 +816,10 @@ public:
|
|||||||
ALWAYS_INLINE OpCodeId form() const { return (OpCodeId)argument(2); }
|
ALWAYS_INLINE OpCodeId form() const { return (OpCodeId)argument(2); }
|
||||||
DeprecatedString arguments_string() const override
|
DeprecatedString arguments_string() const override
|
||||||
{
|
{
|
||||||
return DeprecatedString::formatted("{} offset={} [&{}], cp={} [&{}]",
|
return DeprecatedString::formatted("{} offset={} [&{}], cp={}",
|
||||||
opcode_id_name(form()),
|
opcode_id_name(form()),
|
||||||
offset(), state().instruction_position + size() + offset(),
|
offset(), state().instruction_position + size() + offset(),
|
||||||
checkpoint(), state().instruction_position + size() + checkpoint());
|
checkpoint());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -179,6 +179,8 @@ ALWAYS_INLINE void Parser::reset()
|
|||||||
|
|
||||||
Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
||||||
{
|
{
|
||||||
|
ByteCode::reset_checkpoint_serial_id();
|
||||||
|
|
||||||
reset();
|
reset();
|
||||||
if (regex_options.has_value())
|
if (regex_options.has_value())
|
||||||
m_parser_state.regex_options = regex_options.value();
|
m_parser_state.regex_options = regex_options.value();
|
||||||
|
Loading…
Reference in New Issue
Block a user