1
1
mirror of https://github.com/mawww/kakoune.git synced 2024-12-22 19:11:40 +03:00

Regex: support more than two children in alternations

Avoid deep nested alternations, parse them flattened.
This commit is contained in:
Maxime Coste 2017-10-09 21:04:28 +08:00
parent fb5243f710
commit 23b3a221eb

View File

@ -14,7 +14,7 @@ namespace Kakoune
struct ParsedRegex struct ParsedRegex
{ {
enum Op enum Op : char
{ {
Literal, Literal,
AnyChar, AnyChar,
@ -36,7 +36,7 @@ struct ParsedRegex
struct Quantifier struct Quantifier
{ {
enum Type enum Type : char
{ {
One, One,
Optional, Optional,
@ -63,17 +63,18 @@ struct ParsedRegex
}; };
}; };
struct AstNode;
using AstNodePtr = std::unique_ptr<AstNode>;
struct AstNode struct AstNode
{ {
Op op; Op op;
bool ignore_case;
Codepoint value; Codepoint value;
Quantifier quantifier; Quantifier quantifier;
bool ignore_case; Vector<AstNodePtr> children;
Vector<std::unique_ptr<AstNode>> children;
}; };
using AstNodePtr = std::unique_ptr<AstNode>;
AstNodePtr ast; AstNodePtr ast;
size_t capture_count; size_t capture_count;
Vector<std::function<bool (Codepoint)>> matchers; Vector<std::function<bool (Codepoint)>> matchers;
@ -112,11 +113,15 @@ private:
return node; return node;
} }
++m_pos;
AstNodePtr res = new_node(ParsedRegex::Alternation); AstNodePtr res = new_node(ParsedRegex::Alternation);
res->children.push_back(std::move(node));
res->children.push_back(disjunction());
res->value = capture; res->value = capture;
res->children.push_back(std::move(node));
do
{
++m_pos;
res->children.push_back(alternative());
}
while (not at_end() and *m_pos == '|');
return res; return res;
} }
@ -459,7 +464,7 @@ private:
AstNodePtr new_node(ParsedRegex::Op op, Codepoint value = -1, AstNodePtr new_node(ParsedRegex::Op op, Codepoint value = -1,
ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One}) ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One})
{ {
return AstNodePtr{new ParsedRegex::AstNode{op, value, quantifier, m_ignore_case, {}}}; return AstNodePtr{new ParsedRegex::AstNode{op, m_ignore_case, value, quantifier, {}}};
} }
bool at_end() const { return m_pos == m_regex.end(); } bool at_end() const { return m_pos == m_regex.end(); }
@ -569,17 +574,23 @@ private:
case ParsedRegex::Alternation: case ParsedRegex::Alternation:
{ {
auto& children = node->children; auto& children = node->children;
kak_assert(children.size() == 2); kak_assert(children.size() > 1);
auto split_pos = push_inst(CompiledRegex::Split_PrioritizeParent); const auto split_pos = m_program.instructions.size();
for (int i = 0; i < children.size() - 1; ++i)
compile_node(children[m_forward ? 0 : 1]); push_inst(CompiledRegex::Split_PrioritizeParent);
auto left_pos = push_inst(CompiledRegex::Jump);
goto_inner_end_offsets.push_back(left_pos);
auto right_pos = compile_node(children[m_forward ? 1 : 0]);
m_program.instructions[split_pos].param = right_pos;
for (int i = 0; i < children.size(); ++i)
{
auto node = compile_node(children[i]);
if (i > 0)
m_program.instructions[split_pos + i - 1].param = node;
if (i < children.size() - 1)
{
auto jump = push_inst(CompiledRegex::Jump);
goto_inner_end_offsets.push_back(jump);
}
}
break; break;
} }
case ParsedRegex::LookAhead: case ParsedRegex::LookAhead:
@ -795,6 +806,7 @@ private:
return false; return false;
} }
[[gnu::noinline]]
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
{ {
bool accepted[start_chars_count] = {}; bool accepted[start_chars_count] = {};