1
1
mirror of https://github.com/mawww/kakoune.git synced 2024-11-11 01:37:41 +03:00

Use a dedicated vm op for dot when match-newline is false

This commit is contained in:
Olivier Perret 2018-06-24 12:13:35 +02:00
parent b5ee1db1c4
commit 67655de947
2 changed files with 29 additions and 9 deletions

View File

@ -23,6 +23,7 @@ struct ParsedRegex
{
Literal,
AnyChar,
AnyCharExceptNewLine,
Class,
CharacterType,
Sequence,
@ -296,14 +297,7 @@ private:
if (m_flags & Flags::DotMatchesNewLine)
return new_node(ParsedRegex::AnyChar);
else
{
CharacterClass c;
c.negative = true;
c.ranges.push_back({ '\n', '\n' });
auto class_id = m_parsed_regex.character_classes.size();
m_parsed_regex.character_classes.push_back(std::move(c));
return new_node(ParsedRegex::Class, class_id);
}
return new_node(ParsedRegex::AnyCharExceptNewLine);
case '(':
{
auto captures = [this, it = (++m_pos).base()]() mutable {
@ -611,7 +605,8 @@ private:
ForEachChild<>::apply(m_parsed_regex, index, [this](NodeIndex child_index) {
auto& child = get_node(child_index);
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Class and
child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar)
child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar and
child.op != ParsedRegex::AnyCharExceptNewLine)
parse_error("Lookaround can only contain literals, any chars or character classes");
if (child.quantifier.type != ParsedRegex::Quantifier::One)
parse_error("Quantifiers cannot be used in lookarounds");
@ -714,6 +709,9 @@ private:
case ParsedRegex::AnyChar:
push_inst(CompiledRegex::AnyChar);
break;
case ParsedRegex::AnyCharExceptNewLine:
push_inst(CompiledRegex::AnyCharExceptNewLine);
break;
case ParsedRegex::Class:
push_inst(CompiledRegex::Class, node.value);
break;
@ -887,6 +885,8 @@ private:
: character.value);
else if (character.op == ParsedRegex::AnyChar)
m_program.lookarounds.push_back(0xF000);
else if (character.op == ParsedRegex::AnyCharExceptNewLine)
m_program.lookarounds.push_back(0xF001);
else if (character.op == ParsedRegex::Class)
m_program.lookarounds.push_back(0xF0001 + character.value);
else if (character.op == ParsedRegex::CharacterType)
@ -930,6 +930,13 @@ private:
for (auto& b : start_desc.map)
b = true;
return node.quantifier.allows_none();
case ParsedRegex::AnyCharExceptNewLine:
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
{
if (cp != '\n')
start_desc.map[cp] = true;
}
return node.quantifier.allows_none();
case ParsedRegex::Class:
{
auto& character_class = m_parsed_regex.character_classes[node.value];
@ -1044,6 +1051,9 @@ String dump_regex(const CompiledRegex& program)
case CompiledRegex::AnyChar:
res += "any char\n";
break;
case CompiledRegex::AnyCharExceptNewLine:
res += "anything but newline\n";
break;
case CompiledRegex::Jump:
res += format("jump {}\n", inst.param);
break;

View File

@ -59,6 +59,7 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
Literal,
Literal_IgnoreCase,
AnyChar,
AnyCharExceptNewLine,
Class,
CharacterType,
Jump,
@ -322,6 +323,10 @@ private:
return StepResult::Failed;
case CompiledRegex::AnyChar:
return StepResult::Consumed;
case CompiledRegex::AnyCharExceptNewLine:
if (pos != config.end and *pos != '\n')
return StepResult::Consumed;
return StepResult::Failed;
case CompiledRegex::Jump:
thread.inst = static_cast<int16_t>(inst.param);
break;
@ -529,6 +534,11 @@ private:
const Codepoint ref = *it;
if (ref == 0xF000)
{} // any character matches
else if (ref == 0xF001)
{
if (cp == '\n')
return false;
}
else if (ref > 0xF0000 and ref < 0xF8000)
{
if (not is_character_class(m_program.character_classes[ref - 0xF0001], cp))