LibRegex+LibJS: Combine named and unnamed capture groups in MatchState

Combining these into one list helps reduce the size of MatchState, and
as a result, reduces the amount of memory consumed during execution of
very large regex matches.

Doing this also allows us to remove a few regex byte code instructions:
ClearNamedCaptureGroup, SaveLeftNamedCaptureGroup, and NamedReference.
Named groups now behave the same as unnamed groups for these operations.
Note that SaveRightNamedCaptureGroup still exists to cache the matched
group name.

This also removes the recursion level from the MatchState, as it can
exist as a local variable in Matcher::execute instead.
This commit is contained in:
Timothy Flynn 2021-08-14 16:28:54 -04:00 committed by Linus Groh
parent fea181bde3
commit f1ce998d73
Notes: sideshowbarker 2024-07-18 05:40:26 +09:00
9 changed files with 69 additions and 202 deletions

View File

@ -422,9 +422,11 @@ TEST_CASE(named_capture_group)
EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true);
EXPECT_EQ(result.count, 2u);
EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("Test").view, "255");
EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "Test");
EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("Test").view, "0");
EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "Test");
}
TEST_CASE(ecma262_named_capture_group_with_dollar_sign)
@ -443,9 +445,11 @@ TEST_CASE(ecma262_named_capture_group_with_dollar_sign)
EXPECT_EQ(re.search(haystack, result, ECMAScriptFlags::Multiline), true);
EXPECT_EQ(result.count, 2u);
EXPECT_EQ(result.matches.at(0).view, "Opacity=255");
EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("$Test$").view, "255");
EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "255");
EXPECT_EQ(result.capture_group_matches.at(0).at(0).capture_group_name, "$Test$");
EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0");
EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("$Test$").view, "0");
EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "0");
EXPECT_EQ(result.capture_group_matches.at(1).at(0).capture_group_name, "$Test$");
}
TEST_CASE(a_star)

View File

@ -263,13 +263,16 @@ static Value regexp_builtin_exec(GlobalObject& global_object, RegExpObject& rege
return {};
}
auto* array = Array::create(global_object, result.n_capture_groups + 1);
auto* array = Array::create(global_object, result.n_named_capture_groups + 1);
if (vm.exception())
return {};
Vector<Optional<Match>> indices { Match::create(match) };
HashMap<String, Match> group_names;
bool has_groups = result.n_named_capture_groups != 0;
Object* groups_object = has_groups ? Object::create(global_object, nullptr) : nullptr;
for (size_t i = 0; i < result.n_capture_groups; ++i) {
auto capture_value = js_undefined();
auto& capture = result.capture_group_matches[0][i + 1];
@ -280,22 +283,15 @@ static Value regexp_builtin_exec(GlobalObject& global_object, RegExpObject& rege
indices.append(Match::create(capture));
}
array->create_data_property_or_throw(i + 1, capture_value);
}
bool has_groups = result.n_named_capture_groups > 0;
Value groups = js_undefined();
if (has_groups) {
auto groups_object = Object::create(global_object, nullptr);
for (auto& entry : result.named_capture_group_matches[0]) {
groups_object->create_data_property_or_throw(entry.key, js_string(vm, entry.value.view.u16_view()));
group_names.set(entry.key, Match::create(entry.value));
if (capture.capture_group_name.has_value()) {
auto group_name = capture.capture_group_name->to_string();
groups_object->create_data_property_or_throw(group_name, js_string(vm, capture.view.u16_view()));
group_names.set(move(group_name), Match::create(capture));
}
groups = move(groups_object);
}
Value groups = has_groups ? groups_object : js_undefined();
array->create_data_property_or_throw(vm.names.groups, groups);
if (has_indices) {

View File

@ -166,18 +166,12 @@ void ByteCode::ensure_opcodes_initialized()
case OpCodeId::ClearCaptureGroup:
s_opcodes[i] = make<OpCode_ClearCaptureGroup>();
break;
case OpCodeId::ClearNamedCaptureGroup:
s_opcodes[i] = make<OpCode_ClearNamedCaptureGroup>();
break;
case OpCodeId::SaveLeftCaptureGroup:
s_opcodes[i] = make<OpCode_SaveLeftCaptureGroup>();
break;
case OpCodeId::SaveRightCaptureGroup:
s_opcodes[i] = make<OpCode_SaveRightCaptureGroup>();
break;
case OpCodeId::SaveLeftNamedCaptureGroup:
s_opcodes[i] = make<OpCode_SaveLeftNamedCaptureGroup>();
break;
case OpCodeId::SaveRightNamedCaptureGroup:
s_opcodes[i] = make<OpCode_SaveRightNamedCaptureGroup>();
break;
@ -378,52 +372,26 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
return ExecutionResult::Continue;
}
ALWAYS_INLINE ExecutionResult OpCode_ClearNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
{
if (input.match_index < state.capture_group_matches.size()) {
auto& group = state.named_capture_group_matches[input.match_index];
if (auto it = group.find(name()); it != group.end())
it->value.reset();
}
return ExecutionResult::Continue;
}
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
{
if (input.match_index >= state.named_capture_group_matches.size()) {
state.named_capture_group_matches.ensure_capacity(input.match_index);
auto capacity = state.named_capture_group_matches.capacity();
for (size_t i = state.named_capture_group_matches.size(); i <= capacity; ++i)
state.named_capture_group_matches.empend();
}
state.named_capture_group_matches.at(input.match_index).ensure(name()).column = state.string_position;
return ExecutionResult::Continue;
}
ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state, MatchOutput&) const
{
StringView capture_group_name = name();
auto& match = state.capture_group_matches.at(input.match_index).at(id());
auto start_position = match.left_column;
if (state.string_position < start_position)
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (state.named_capture_group_matches.at(input.match_index).contains(capture_group_name)) {
auto start_position = state.named_capture_group_matches.at(input.match_index).ensure(capture_group_name).column;
auto length = state.string_position - start_position;
auto length = state.string_position - start_position;
auto& map = state.named_capture_group_matches.at(input.match_index);
if (start_position < match.column)
return ExecutionResult::Continue;
if constexpr (REGEX_DEBUG) {
VERIFY(start_position + length <= input.view.length());
dbgln("Save named capture group with name={} and content='{}'", capture_group_name, input.view.substring_view(start_position, length));
}
VERIFY(start_position + length <= input.view.length());
VERIFY(start_position + length <= input.view.length());
auto view = input.view.substring_view(start_position, length);
if (input.regex_options & AllFlags::StringCopyMatches) {
map.set(capture_group_name, { view.to_string(), input.line, start_position, input.global_offset + start_position }); // create a copy of the original string
} else {
map.set(capture_group_name, { view, input.line, start_position, input.global_offset + start_position }); // take view to original string
}
auto view = input.view.substring_view(start_position, length);
if (input.regex_options & AllFlags::StringCopyMatches) {
match = { view.to_string(), name(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string
} else {
warnln("Didn't find corresponding capture group match for name={}, match_index={}", capture_group_name.to_string(), input.match_index);
match = { view, name(), input.line, start_position, input.global_offset + start_position }; // take view to original string
}
return ExecutionResult::Continue;
@ -543,24 +511,6 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
if (!compare_string(input, state, str, had_zero_length_match))
return ExecutionResult::Failed_ExecuteLowPrioForks;
} else if (compare_type == CharacterCompareType::NamedReference) {
auto ptr = (char const*)m_bytecode->at(offset++);
auto length = (size_t)m_bytecode->at(offset++);
StringView name { ptr, length };
auto group = state.named_capture_group_matches.at(input.match_index).get(name);
if (!group.has_value())
return ExecutionResult::Failed_ExecuteLowPrioForks;
auto str = group.value().view;
// We want to compare a string that is definitely longer than the available string
if (input.view.length() < state.string_position + str.length())
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (!compare_string(input, state, str, had_zero_length_match))
return ExecutionResult::Failed_ExecuteLowPrioForks;
} else if (compare_type == CharacterCompareType::Property) {
auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
compare_property(input, state, property, current_inversion_state(), inverse_matched);
@ -869,10 +819,6 @@ Vector<String> const OpCode_Compare::variable_arguments_to_string(Optional<Match
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]));
}
}
} else if (compare_type == CharacterCompareType::NamedReference) {
auto ptr = (char const*)m_bytecode->at(offset++);
auto length = m_bytecode->at(offset++);
result.empend(String::formatted("name='{}'", StringView { ptr, (size_t)length }));
} else if (compare_type == CharacterCompareType::Reference) {
auto ref = m_bytecode->at(offset++);
result.empend(String::formatted("number={}", ref));

View File

@ -32,7 +32,6 @@ using ByteCodeValueType = u64;
__ENUMERATE_OPCODE(FailForks) \
__ENUMERATE_OPCODE(SaveLeftCaptureGroup) \
__ENUMERATE_OPCODE(SaveRightCaptureGroup) \
__ENUMERATE_OPCODE(SaveLeftNamedCaptureGroup) \
__ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
__ENUMERATE_OPCODE(CheckBegin) \
__ENUMERATE_OPCODE(CheckEnd) \
@ -41,7 +40,6 @@ using ByteCodeValueType = u64;
__ENUMERATE_OPCODE(Restore) \
__ENUMERATE_OPCODE(GoBack) \
__ENUMERATE_OPCODE(ClearCaptureGroup) \
__ENUMERATE_OPCODE(ClearNamedCaptureGroup) \
__ENUMERATE_OPCODE(Exit)
// clang-format off
@ -65,7 +63,6 @@ enum class OpCodeId : ByteCodeValueType {
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory) \
__ENUMERATE_CHARACTER_COMPARE_TYPE(Script) \
@ -159,7 +156,6 @@ public:
VERIFY(value.type != CharacterCompareType::RangeExpressionDummy);
VERIFY(value.type != CharacterCompareType::Undefined);
VERIFY(value.type != CharacterCompareType::String);
VERIFY(value.type != CharacterCompareType::NamedReference);
arguments.append((ByteCodeValueType)value.type);
if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse)
@ -187,13 +183,6 @@ public:
empend(index);
}
void insert_bytecode_clear_named_capture_group(StringView name)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::ClearNamedCaptureGroup));
empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
empend(name.length());
}
void insert_bytecode_compare_string(StringView view)
{
ByteCode bytecode;
@ -212,49 +201,24 @@ public:
extend(move(bytecode));
}
void insert_bytecode_compare_named_reference(StringView name)
{
ByteCode bytecode;
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
bytecode.empend(static_cast<u64>(1)); // number of arguments
ByteCode arguments;
arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference));
arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
arguments.empend(name.length());
bytecode.empend(arguments.size()); // size of arguments
bytecode.extend(move(arguments));
extend(move(bytecode));
}
void insert_bytecode_group_capture_left(size_t capture_groups_count)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
empend(capture_groups_count);
}
void insert_bytecode_group_capture_left(StringView const& name)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftNamedCaptureGroup));
empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
empend(name.length());
}
void insert_bytecode_group_capture_right(size_t capture_groups_count)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
empend(capture_groups_count);
}
void insert_bytecode_group_capture_right(StringView const& name)
void insert_bytecode_group_capture_right(size_t capture_groups_count, StringView const& name)
{
empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
empend(name.length());
empend(capture_groups_count);
}
enum class LookAroundType {
@ -655,19 +619,6 @@ public:
String const arguments_string() const override { return String::formatted("id={}", id()); }
};
class OpCode_ClearNamedCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearNamedCaptureGroup; }
ALWAYS_INLINE size_t size() const override { return 3; }
ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
ALWAYS_INLINE size_t length() const { return argument(1); }
String const arguments_string() const override
{
return String::formatted("name={}, length={}", name(), length());
}
};
class OpCode_SaveLeftCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
@ -686,26 +637,14 @@ public:
String const arguments_string() const override { return String::formatted("id={}", id()); }
};
class OpCode_SaveLeftNamedCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftNamedCaptureGroup; }
ALWAYS_INLINE size_t size() const override { return 3; }
ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
ALWAYS_INLINE size_t length() const { return argument(1); }
String const arguments_string() const override
{
return String::formatted("name={}, length={}", name(), length());
}
};
class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
public:
ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
ALWAYS_INLINE size_t size() const override { return 3; }
ALWAYS_INLINE size_t size() const override { return 4; }
ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
ALWAYS_INLINE size_t length() const { return argument(1); }
ALWAYS_INLINE size_t id() const { return argument(2); }
String const arguments_string() const override
{
return String::formatted("name={}, length={}", name(), length());

View File

@ -442,11 +442,20 @@ public:
}
Match(String const string_, size_t const line_, size_t const column_, size_t const global_offset_)
: string(string_)
: string(move(string_))
, view(string.value().view())
, line(line_)
, column(column_)
, global_offset(global_offset_)
{
}
Match(RegexStringView const view_, StringView capture_group_name_, size_t const line_, size_t const column_, size_t const global_offset_)
: view(view_)
, capture_group_name(capture_group_name_)
, line(line_)
, column(column_)
, global_offset(global_offset_)
, left_column(column_)
{
}
@ -454,6 +463,7 @@ public:
void reset()
{
view = view.typed_null_view();
capture_group_name.clear();
line = 0;
column = 0;
global_offset = 0;
@ -461,6 +471,7 @@ public:
}
RegexStringView view { nullptr };
Optional<StringView> capture_group_name {};
size_t line { 0 };
size_t column { 0 };
size_t global_offset { 0 };
@ -494,8 +505,6 @@ struct MatchState {
size_t fork_at_position { 0 };
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;
Vector<HashMap<String, Match>> named_capture_group_matches;
size_t recursion_level { 0 };
};
struct MatchOutput {

View File

@ -149,10 +149,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
if (c_match_preallocation_count) {
state.matches.ensure_capacity(c_match_preallocation_count);
state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
state.named_capture_group_matches.ensure_capacity(c_match_preallocation_count);
auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
auto& named_capture_groups_count = m_pattern->parser_result.named_capture_groups_count;
for (size_t j = 0; j < c_match_preallocation_count; ++j) {
state.matches.empend();
@ -160,9 +157,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
state.capture_group_matches.at(j).ensure_capacity(capture_groups_count);
for (size_t k = 0; k < capture_groups_count; ++k)
state.capture_group_matches.at(j).unchecked_append({});
state.named_capture_group_matches.unchecked_append({});
state.named_capture_group_matches.at(j).ensure_capacity(named_capture_groups_count);
}
}
@ -315,15 +309,9 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
matches.template remove_all_matching([](auto& match) { return match.view.is_null(); });
}
output_copy.named_capture_group_matches = state.named_capture_group_matches;
// Make sure there are as many capture matches as there are actual matches.
if (output_copy.named_capture_group_matches.size() < match_count)
output_copy.named_capture_group_matches.resize(match_count);
output_copy.matches = state.matches;
} else {
output_copy.capture_group_matches.clear_with_capacity();
output_copy.named_capture_group_matches.clear_with_capacity();
}
return {
@ -331,7 +319,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
match_count,
move(output_copy.matches),
move(output_copy.capture_group_matches),
move(output_copy.named_capture_group_matches),
output.operations,
m_pattern->parser_result.capture_groups_count,
m_pattern->parser_result.named_capture_groups_count,
@ -399,9 +386,8 @@ private:
template<class Parser>
Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& state, MatchOutput& output) const
{
state.recursion_level = 0;
BumpAllocatedLinkedList<MatchState> states_to_try_next;
size_t recursion_level = 0;
auto& bytecode = m_pattern->parser_result.bytecode;
@ -410,7 +396,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
auto& opcode = bytecode.get_opcode(state);
#if REGEX_DEBUG
s_regex_dbg.print_opcode("VM", opcode, state, state.recursion_level, false);
s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false);
#endif
ExecutionResult result;
@ -435,7 +421,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
case ExecutionResult::Fork_PrioHigh:
states_to_try_next.append(state);
state.instruction_position = state.fork_at_position;
++state.recursion_level;
++recursion_level;
continue;
case ExecutionResult::Continue:
continue;
@ -454,7 +440,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
return false;
}
state = states_to_try_next.take_last();
++state.recursion_level;
++recursion_level;
continue;
}
}

View File

@ -31,7 +31,6 @@ struct RegexResult final {
size_t count { 0 };
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;
Vector<HashMap<String, Match>> named_capture_group_matches;
size_t n_operations { 0 };
size_t n_capture_groups { 0 };
size_t n_named_capture_groups { 0 };

View File

@ -150,7 +150,6 @@ ALWAYS_INLINE void Parser::reset()
m_parser_state.capture_group_minimum_lengths.clear();
m_parser_state.capture_groups_count = 0;
m_parser_state.named_capture_groups_count = 0;
m_parser_state.named_capture_group_minimum_lengths.clear();
m_parser_state.named_capture_groups.clear();
}
@ -780,12 +779,8 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
}
}
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
if (capture_group_name.has_value())
bytecode.insert_bytecode_group_capture_left(capture_group_name.value());
else
bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count);
}
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group))
bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count);
ByteCode capture_group_bytecode;
@ -814,12 +809,12 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
if (capture_group_name.has_value()) {
bytecode.insert_bytecode_group_capture_right(capture_group_name.value());
bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count, capture_group_name.value());
++m_parser_state.named_capture_groups_count;
} else {
bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count);
++m_parser_state.capture_groups_count;
}
++m_parser_state.capture_groups_count;
}
should_parse_repetition_symbol = true;
break;
@ -1564,14 +1559,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
auto maybe_length = m_parser_state.named_capture_group_minimum_lengths.get(name);
if (!maybe_length.has_value()) {
auto maybe_capture_group = m_parser_state.named_capture_groups.get(name);
if (!maybe_capture_group.has_value()) {
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
match_length_minimum += maybe_length.value();
match_length_minimum += maybe_capture_group->minimum_length;
stack.insert_bytecode_compare_named_reference(name);
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } });
return true;
}
@ -2121,15 +2116,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
m_capture_groups_in_scope.last().empend(identifier);
};
auto clear_all_capture_groups_in_scope = [&] {
for (auto& entry : m_capture_groups_in_scope.last()) {
entry.visit(
[&](size_t index) {
stack.insert_bytecode_clear_capture_group(index);
},
[&](String const& name) {
stack.insert_bytecode_clear_named_capture_group(name);
});
}
for (auto& index : m_capture_groups_in_scope.last())
stack.insert_bytecode_clear_capture_group(index);
};
if (match(TokenType::Questionmark)) {
@ -2172,21 +2160,18 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
clear_all_capture_groups_in_scope();
exit_capture_group_scope();
register_capture_group_in_current_scope(name);
register_capture_group_in_current_scope(group_index);
consume(TokenType::RightParen, Error::MismatchingParen);
stack.insert_bytecode_group_capture_left(name);
stack.insert_bytecode_group_capture_left(group_index);
stack.extend(move(capture_group_bytecode));
stack.insert_bytecode_group_capture_right(name);
stack.insert_bytecode_group_capture_right(group_index);
stack.insert_bytecode_group_capture_right(group_index, name);
match_length_minimum += length;
m_parser_state.named_capture_group_minimum_lengths.set(name, length);
m_parser_state.capture_group_minimum_lengths.set(group_index, length);
m_parser_state.named_capture_groups.set(name, { group_index, length });
return true;
}

View File

@ -88,6 +88,11 @@ protected:
ALWAYS_INLINE bool done() const;
ALWAYS_INLINE bool set_error(Error error);
struct NamedCaptureGroup {
size_t group_index { 0 };
size_t minimum_length { 0 };
};
struct ParserState {
Lexer& lexer;
Token current_token;
@ -99,8 +104,7 @@ protected:
size_t match_length_minimum { 0 };
AllOptions regex_options;
HashMap<int, size_t> capture_group_minimum_lengths;
HashMap<FlyString, size_t> named_capture_group_minimum_lengths;
HashMap<size_t, FlyString> named_capture_groups;
HashMap<FlyString, NamedCaptureGroup> named_capture_groups;
explicit ParserState(Lexer& lexer)
: lexer(lexer)
@ -258,8 +262,7 @@ private:
// ECMA-262 basically requires that we clear the inner captures of a capture group before trying to match it,
// by requiring that (...)+ only contain the matches for the last iteration.
// To do that, we have to keep track of which capture groups are "in scope", so we can clear them as needed.
using CaptureGroup = Variant<size_t, String>;
Vector<Vector<CaptureGroup>> m_capture_groups_in_scope;
Vector<Vector<size_t>> m_capture_groups_in_scope;
};
using PosixExtended = PosixExtendedParser;