LibRegex: Bail out of atomic rewrite if a block doesn't contain compares

If a block jumps before performing a compare, we'd need to recursively
find the first of the jumped-to block. While this is doable, it's not
really worth spending the time as most such cases won't actually qualify
for atomic loop rewrite anyway.
Fixes an invalid rewrite when `.+` is followed by an alternation, e.g.
/.+(a|b|c)/.
This commit is contained in:
Ali Mohammad Pur 2023-02-15 10:14:13 +03:30 committed by Andreas Kling
parent af441bb939
commit 7f530c0753
Notes: sideshowbarker 2024-07-17 05:01:20 +09:00
2 changed files with 21 additions and 1 deletions

View File

@ -986,6 +986,8 @@ TEST_CASE(optimizer_atomic_groups)
Tuple { "a+"sv, ""sv, false },
// 'y' and [^x] have an overlap ('y'), the loop should not be rewritten here.
Tuple { "[^x]+y"sv, "ay"sv, true },
// .+ should not be rewritten here, as it's followed by something that would be matched by `.`.
Tuple { ".+(a|b|c)"sv, "xxa"sv, true },
};
for (auto& test : tests) {

View File

@ -303,10 +303,12 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
HashTable<size_t> active_capture_groups;
MatchState state;
auto has_seen_actionable_opcode = false;
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
auto& opcode = bytecode.get_opcode(state);
switch (opcode.opcode_id()) {
case OpCodeId::Compare: {
has_seen_actionable_opcode = true;
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
if (repeated_values.is_empty() && any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
return AtomicRewritePreconditionResult::NotSatisfied;
@ -315,6 +317,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
}
case OpCodeId::CheckBegin:
case OpCodeId::CheckEnd:
has_seen_actionable_opcode = true;
if (repeated_values.is_empty())
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
break;
@ -330,6 +333,13 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
case OpCodeId::SaveLeftCaptureGroup:
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
break;
case OpCodeId::ForkJump:
case OpCodeId::ForkReplaceJump:
case OpCodeId::JumpNonEmpty:
// We could attempt to recursively resolve the follow set, but pretending that this just goes nowhere is faster.
if (!has_seen_actionable_opcode)
return AtomicRewritePreconditionResult::NotSatisfied;
break;
default:
break;
}
@ -375,6 +385,13 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
case OpCodeId::CheckBoundary:
// FIXME: What should we do with these? For now, consider them a failure.
return AtomicRewritePreconditionResult::NotSatisfied;
case OpCodeId::ForkJump:
case OpCodeId::ForkReplaceJump:
case OpCodeId::JumpNonEmpty:
// See note in the previous switch, same cases.
if (!following_block_has_at_least_one_compare)
return AtomicRewritePreconditionResult::NotSatisfied;
break;
default:
break;
}
@ -484,7 +501,8 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
if (is_an_eligible_jump(opcode, state.instruction_position, forking_block.start, AlternateForm::DirectLoopWithoutHeader)) {
// We've found RE0 (and RE1 is just the following block, if any), let's see if the precondition applies.
// if RE1 is empty, there's no first(RE1), so this is an automatic pass.
if (!fork_fallback_block.has_value() || fork_fallback_block->end == fork_fallback_block->start) {
if (!fork_fallback_block.has_value()
|| (fork_fallback_block->end == fork_fallback_block->start && block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block) != AtomicRewritePreconditionResult::NotSatisfied)) {
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader });
break;
}