mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-01 07:35:02 +03:00
LibRegex: Avoid rewriting a+
as a*
as part of atomic rewriting
The initial `ForkStay` is only needed if the looping block has a following block, if there's no following block or the following block does not attempt to match anything, we should not insert the ForkStay, otherwise we would be rewriting `a+` as `a*` by allowing the 'end' to be executed. Fixes #10952.
This commit is contained in:
parent
f10036b7c5
commit
387df06385
Notes:
sideshowbarker
2024-07-18 01:01:24 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/387df06385f Pull-request: https://github.com/SerenityOS/serenity/pull/10953 Issue: https://github.com/SerenityOS/serenity/issues/10952
@ -895,6 +895,7 @@ TEST_CASE(optimizer_atomic_groups)
|
|||||||
// Fork -> ForkReplace
|
// Fork -> ForkReplace
|
||||||
Tuple { "a*b"sv, "aaaaa"sv, false },
|
Tuple { "a*b"sv, "aaaaa"sv, false },
|
||||||
Tuple { "a+b"sv, "aaaaa"sv, false },
|
Tuple { "a+b"sv, "aaaaa"sv, false },
|
||||||
|
Tuple { "\\\\(\\d+)"sv, "\\\\"sv, false }, // Rewrite bug turning a+ to a*, see #10952.
|
||||||
// Alternative fuse
|
// Alternative fuse
|
||||||
Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
|
Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
|
||||||
Tuple { "(a|a)"sv, "a"sv, true },
|
Tuple { "(a|a)"sv, "a"sv, true },
|
||||||
|
@ -103,7 +103,12 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks()
|
|||||||
return block_boundaries;
|
return block_boundaries;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
|
enum class AtomicRewritePreconditionResult {
|
||||||
|
SatisfiedWithProperHeader,
|
||||||
|
SatisfiedWithEmptyHeader,
|
||||||
|
NotSatisfied,
|
||||||
|
};
|
||||||
|
static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
|
||||||
{
|
{
|
||||||
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
||||||
HashTable<size_t> active_capture_groups;
|
HashTable<size_t> active_capture_groups;
|
||||||
@ -114,21 +119,21 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||||||
case OpCodeId::Compare: {
|
case OpCodeId::Compare: {
|
||||||
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
||||||
if (repeated_values.is_empty() && any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
if (repeated_values.is_empty() && any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
||||||
return false;
|
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||||
repeated_values.append(move(compares));
|
repeated_values.append(move(compares));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCodeId::CheckBegin:
|
case OpCodeId::CheckBegin:
|
||||||
case OpCodeId::CheckEnd:
|
case OpCodeId::CheckEnd:
|
||||||
if (repeated_values.is_empty())
|
if (repeated_values.is_empty())
|
||||||
return true;
|
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
|
||||||
break;
|
break;
|
||||||
case OpCodeId::CheckBoundary:
|
case OpCodeId::CheckBoundary:
|
||||||
// FIXME: What should we do with these? for now, let's fail.
|
// FIXME: What should we do with these? for now, let's fail.
|
||||||
return false;
|
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||||
case OpCodeId::Restore:
|
case OpCodeId::Restore:
|
||||||
case OpCodeId::GoBack:
|
case OpCodeId::GoBack:
|
||||||
return false;
|
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||||
case OpCodeId::SaveRightCaptureGroup:
|
case OpCodeId::SaveRightCaptureGroup:
|
||||||
active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
|
active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
|
||||||
break;
|
break;
|
||||||
@ -144,6 +149,7 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||||||
dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size());
|
dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size());
|
||||||
dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size());
|
dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size());
|
||||||
|
|
||||||
|
bool following_block_has_at_least_one_compare = false;
|
||||||
// Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
|
// Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
|
||||||
for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) {
|
for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) {
|
||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
@ -156,6 +162,7 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||||||
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
|
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
|
||||||
break;
|
break;
|
||||||
case OpCodeId::Compare: {
|
case OpCodeId::Compare: {
|
||||||
|
following_block_has_at_least_one_compare = true;
|
||||||
// We found a compare, let's see what it has.
|
// We found a compare, let's see what it has.
|
||||||
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
||||||
if (compares.is_empty())
|
if (compares.is_empty())
|
||||||
@ -165,27 +172,27 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||||||
return compare.type == CharacterCompareType::AnyChar
|
return compare.type == CharacterCompareType::AnyChar
|
||||||
|| (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value));
|
|| (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value));
|
||||||
}))
|
}))
|
||||||
return false;
|
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||||
|
|
||||||
for (auto& repeated_value : repeated_values) {
|
for (auto& repeated_value : repeated_values) {
|
||||||
// FIXME: This is too naive!
|
// FIXME: This is too naive!
|
||||||
if (any_of(repeated_value, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
if (any_of(repeated_value, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
||||||
return false;
|
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||||
|
|
||||||
for (auto& repeated_compare : repeated_value) {
|
for (auto& repeated_compare : repeated_value) {
|
||||||
// FIXME: This is too naive! it will miss _tons_ of cases since it doesn't check ranges!
|
// FIXME: This is too naive! it will miss _tons_ of cases since it doesn't check ranges!
|
||||||
if (any_of(compares, [&](auto& compare) { return compare.type == repeated_compare.type && compare.value == repeated_compare.value; }))
|
if (any_of(compares, [&](auto& compare) { return compare.type == repeated_compare.type && compare.value == repeated_compare.value; }))
|
||||||
return false;
|
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
|
||||||
}
|
}
|
||||||
case OpCodeId::CheckBegin:
|
case OpCodeId::CheckBegin:
|
||||||
case OpCodeId::CheckEnd:
|
case OpCodeId::CheckEnd:
|
||||||
return true; // Nothing can match the end!
|
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader; // Nothing can match the end!
|
||||||
case OpCodeId::CheckBoundary:
|
case OpCodeId::CheckBoundary:
|
||||||
// FIXME: What should we do with these? For now, consider them a failure.
|
// FIXME: What should we do with these? For now, consider them a failure.
|
||||||
return false;
|
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -193,7 +200,9 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||||||
state.instruction_position += opcode.size();
|
state.instruction_position += opcode.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
if (following_block_has_at_least_one_compare)
|
||||||
|
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
|
||||||
|
return AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Parser>
|
template<typename Parser>
|
||||||
@ -213,7 +222,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||||||
// -------------------------
|
// -------------------------
|
||||||
// bb1 | RE1
|
// bb1 | RE1
|
||||||
// can be rewritten as:
|
// can be rewritten as:
|
||||||
// loop.hdr | ForkStay bb1
|
// loop.hdr | ForkStay bb1 (if RE1 matches _something_, empty otherwise)
|
||||||
// -------------------------
|
// -------------------------
|
||||||
// bb0 | RE0
|
// bb0 | RE0
|
||||||
// | ForkReplaceX bb0
|
// | ForkReplaceX bb0
|
||||||
@ -240,8 +249,9 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||||||
// bb2 | RE1
|
// bb2 | RE1
|
||||||
|
|
||||||
enum class AlternateForm {
|
enum class AlternateForm {
|
||||||
DirectLoopWithoutHeader, // loop without proper header, a block forking to itself. i.e. the first form.
|
DirectLoopWithoutHeader, // loop without proper header, a block forking to itself. i.e. the first form.
|
||||||
DirectLoopWithHeader, // loop with proper header, i.e. the second form.
|
DirectLoopWithoutHeaderAndEmptyFollow, // loop without proper header, a block forking to itself. i.e. the first form but with RE1 being empty.
|
||||||
|
DirectLoopWithHeader, // loop with proper header, i.e. the second form.
|
||||||
};
|
};
|
||||||
struct CandidateBlock {
|
struct CandidateBlock {
|
||||||
Block forking_block;
|
Block forking_block;
|
||||||
@ -298,10 +308,15 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block)) {
|
auto precondition = block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block);
|
||||||
|
if (precondition == AtomicRewritePreconditionResult::SatisfiedWithProperHeader) {
|
||||||
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader });
|
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader });
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (precondition == AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader) {
|
||||||
|
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeaderAndEmptyFollow });
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Check if the last instruction in the last block is a direct jump to this block
|
// Check if the last instruction in the last block is a direct jump to this block
|
||||||
@ -316,7 +331,8 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||||||
Optional<Block> block_following_fork_fallback;
|
Optional<Block> block_following_fork_fallback;
|
||||||
if (i + 2 < basic_blocks.size())
|
if (i + 2 < basic_blocks.size())
|
||||||
block_following_fork_fallback = basic_blocks[i + 2];
|
block_following_fork_fallback = basic_blocks[i + 2];
|
||||||
if (!block_following_fork_fallback.has_value() || block_satisfies_atomic_rewrite_precondition(bytecode, *fork_fallback_block, *block_following_fork_fallback)) {
|
if (!block_following_fork_fallback.has_value()
|
||||||
|
|| block_satisfies_atomic_rewrite_precondition(bytecode, *fork_fallback_block, *block_following_fork_fallback) != AtomicRewritePreconditionResult::NotSatisfied) {
|
||||||
candidate_blocks.append({ forking_block, {}, AlternateForm::DirectLoopWithHeader });
|
candidate_blocks.append({ forking_block, {}, AlternateForm::DirectLoopWithHeader });
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user