mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-09-20 09:49:15 +03:00
LibRegex: Make Fork{Jump,Stay} non-recursive
This makes very fork-heavy expressions (like `(aa)*`) not run out of stack space when matching very long strings.
This commit is contained in:
parent
a08870cc19
commit
5f342e4fa9
Notes:
sideshowbarker
2024-07-18 07:35:17 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/5f342e4fa96 Pull-request: https://github.com/SerenityOS/serenity/pull/9127
@ -469,6 +469,7 @@ struct MatchState {
|
|||||||
Vector<Match> matches;
|
Vector<Match> matches;
|
||||||
Vector<Vector<Match>> capture_group_matches;
|
Vector<Vector<Match>> capture_group_matches;
|
||||||
Vector<HashMap<String, Match>> named_capture_group_matches;
|
Vector<HashMap<String, Match>> named_capture_group_matches;
|
||||||
|
size_t recursion_level { 0 };
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MatchOutput {
|
struct MatchOutput {
|
||||||
|
@ -7,11 +7,12 @@
|
|||||||
#include <AK/Debug.h>
|
#include <AK/Debug.h>
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
|
#include <LibRegex/RegexMatcher.h>
|
||||||
|
#include <LibRegex/RegexParser.h>
|
||||||
|
|
||||||
#if REGEX_DEBUG
|
#if REGEX_DEBUG
|
||||||
# include <LibRegex/RegexDebug.h>
|
# include <LibRegex/RegexDebug.h>
|
||||||
#endif
|
#endif
|
||||||
#include <LibRegex/RegexMatcher.h>
|
|
||||||
#include <LibRegex/RegexParser.h>
|
|
||||||
|
|
||||||
namespace regex {
|
namespace regex {
|
||||||
|
|
||||||
@ -210,10 +211,10 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
|
|||||||
state.string_position = view_index;
|
state.string_position = view_index;
|
||||||
state.instruction_position = 0;
|
state.instruction_position = 0;
|
||||||
|
|
||||||
auto success = execute(input, state, temp_output, 0);
|
auto success = execute(input, state, temp_output);
|
||||||
// This success is acceptable only if it doesn't read anything from the input (input length is 0).
|
// This success is acceptable only if it doesn't read anything from the input (input length is 0).
|
||||||
if (state.string_position <= view_index) {
|
if (state.string_position <= view_index) {
|
||||||
if (success.value()) {
|
if (success.has_value() && success.value()) {
|
||||||
output = move(temp_output);
|
output = move(temp_output);
|
||||||
if (!match_count) {
|
if (!match_count) {
|
||||||
// Nothing was *actually* matched, so append an empty match.
|
// Nothing was *actually* matched, so append an empty match.
|
||||||
@ -241,7 +242,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
|
|||||||
state.string_position = view_index;
|
state.string_position = view_index;
|
||||||
state.instruction_position = 0;
|
state.instruction_position = 0;
|
||||||
|
|
||||||
auto success = execute(input, state, output, 0);
|
auto success = execute(input, state, output);
|
||||||
if (!success.has_value())
|
if (!success.has_value())
|
||||||
return { false, 0, {}, {}, {}, output.operations };
|
return { false, 0, {}, {}, {}, output.operations };
|
||||||
|
|
||||||
@ -334,14 +335,11 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Parser>
|
template<class Parser>
|
||||||
Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const
|
Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output) const
|
||||||
{
|
{
|
||||||
if (recursion_level > c_max_recursion)
|
state.recursion_level = 0;
|
||||||
return false;
|
|
||||||
|
|
||||||
Vector<MatchState, 64> reversed_fork_low_prio_states;
|
Vector<MatchState, 64> states_to_try_next;
|
||||||
MatchState fork_high_prio_state;
|
|
||||||
Optional<bool> success;
|
|
||||||
|
|
||||||
auto& bytecode = m_pattern->parser_result.bytecode;
|
auto& bytecode = m_pattern->parser_result.bytecode;
|
||||||
|
|
||||||
@ -350,7 +348,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
|
|||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
|
|
||||||
#if REGEX_DEBUG
|
#if REGEX_DEBUG
|
||||||
s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false);
|
s_regex_dbg.print_opcode("VM", opcode, state, state.recursion_level, false);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ExecutionResult result;
|
ExecutionResult result;
|
||||||
@ -369,33 +367,33 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
|
|||||||
|
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case ExecutionResult::Fork_PrioLow:
|
case ExecutionResult::Fork_PrioLow:
|
||||||
reversed_fork_low_prio_states.append(state);
|
states_to_try_next.append(state);
|
||||||
|
states_to_try_next.last().instruction_position = state.fork_at_position;
|
||||||
continue;
|
continue;
|
||||||
case ExecutionResult::Fork_PrioHigh:
|
case ExecutionResult::Fork_PrioHigh:
|
||||||
fork_high_prio_state = state;
|
states_to_try_next.append(state);
|
||||||
fork_high_prio_state.instruction_position = fork_high_prio_state.fork_at_position;
|
state.instruction_position = state.fork_at_position;
|
||||||
success = execute(input, fork_high_prio_state, output, ++recursion_level);
|
++state.recursion_level;
|
||||||
if (!success.has_value())
|
|
||||||
return {};
|
|
||||||
|
|
||||||
if (success.value()) {
|
|
||||||
state = fork_high_prio_state;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
case ExecutionResult::Continue:
|
case ExecutionResult::Continue:
|
||||||
continue;
|
continue;
|
||||||
case ExecutionResult::Succeeded:
|
case ExecutionResult::Succeeded:
|
||||||
return true;
|
return true;
|
||||||
case ExecutionResult::Failed:
|
case ExecutionResult::Failed:
|
||||||
|
if (!states_to_try_next.is_empty()) {
|
||||||
|
state = states_to_try_next.take_last();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
case ExecutionResult::Failed_ExecuteLowPrioForks: {
|
case ExecutionResult::Failed_ExecuteLowPrioForks: {
|
||||||
Vector<MatchState> fork_low_prio_states;
|
if (states_to_try_next.is_empty()) {
|
||||||
fork_low_prio_states.ensure_capacity(reversed_fork_low_prio_states.size());
|
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||||
for (ssize_t i = reversed_fork_low_prio_states.size() - 1; i >= 0; i--)
|
return {};
|
||||||
fork_low_prio_states.unchecked_append(move(reversed_fork_low_prio_states[i]));
|
return false;
|
||||||
return execute_low_prio_forks(input, state, output, move(fork_low_prio_states), recursion_level + 1);
|
}
|
||||||
|
state = states_to_try_next.take_last();
|
||||||
|
++state.recursion_level;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -403,27 +401,6 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
|
|||||||
VERIFY_NOT_REACHED();
|
VERIFY_NOT_REACHED();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Parser>
|
|
||||||
ALWAYS_INLINE Optional<bool> Matcher<Parser>::execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const
|
|
||||||
{
|
|
||||||
for (auto& state : states) {
|
|
||||||
|
|
||||||
state.instruction_position = state.fork_at_position;
|
|
||||||
dbgln_if(REGEX_DEBUG, "Forkstay... ip = {}, sp = {}", state.instruction_position, state.string_position);
|
|
||||||
auto success = execute(input, state, output, recursion_level);
|
|
||||||
if (!success.has_value())
|
|
||||||
return {};
|
|
||||||
if (success.value()) {
|
|
||||||
dbgln_if(REGEX_DEBUG, "Forkstay succeeded... ip = {}, sp = {}", state.instruction_position, state.string_position);
|
|
||||||
original_state = state;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
original_state.string_position = 0;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
template class Matcher<PosixBasicParser>;
|
template class Matcher<PosixBasicParser>;
|
||||||
template class Regex<PosixBasicParser>;
|
template class Regex<PosixBasicParser>;
|
||||||
|
|
||||||
|
@ -65,8 +65,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const;
|
Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output) const;
|
||||||
ALWAYS_INLINE Optional<bool> execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const;
|
|
||||||
|
|
||||||
Regex<Parser> const* m_pattern;
|
Regex<Parser> const* m_pattern;
|
||||||
typename ParserTraits<Parser>::OptionsType const m_regex_options;
|
typename ParserTraits<Parser>::OptionsType const m_regex_options;
|
||||||
|
Loading…
Reference in New Issue
Block a user