mirror of
https://github.com/mawww/kakoune.git
synced 2024-12-22 19:11:40 +03:00
Regex: support more than two children in alternations
Avoid deep nested alternations, parse them flattened.
This commit is contained in:
parent
fb5243f710
commit
23b3a221eb
@ -14,7 +14,7 @@ namespace Kakoune
|
|||||||
|
|
||||||
struct ParsedRegex
|
struct ParsedRegex
|
||||||
{
|
{
|
||||||
enum Op
|
enum Op : char
|
||||||
{
|
{
|
||||||
Literal,
|
Literal,
|
||||||
AnyChar,
|
AnyChar,
|
||||||
@ -36,7 +36,7 @@ struct ParsedRegex
|
|||||||
|
|
||||||
struct Quantifier
|
struct Quantifier
|
||||||
{
|
{
|
||||||
enum Type
|
enum Type : char
|
||||||
{
|
{
|
||||||
One,
|
One,
|
||||||
Optional,
|
Optional,
|
||||||
@ -63,17 +63,18 @@ struct ParsedRegex
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct AstNode;
|
||||||
|
using AstNodePtr = std::unique_ptr<AstNode>;
|
||||||
|
|
||||||
struct AstNode
|
struct AstNode
|
||||||
{
|
{
|
||||||
Op op;
|
Op op;
|
||||||
|
bool ignore_case;
|
||||||
Codepoint value;
|
Codepoint value;
|
||||||
Quantifier quantifier;
|
Quantifier quantifier;
|
||||||
bool ignore_case;
|
Vector<AstNodePtr> children;
|
||||||
Vector<std::unique_ptr<AstNode>> children;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using AstNodePtr = std::unique_ptr<AstNode>;
|
|
||||||
|
|
||||||
AstNodePtr ast;
|
AstNodePtr ast;
|
||||||
size_t capture_count;
|
size_t capture_count;
|
||||||
Vector<std::function<bool (Codepoint)>> matchers;
|
Vector<std::function<bool (Codepoint)>> matchers;
|
||||||
@ -112,11 +113,15 @@ private:
|
|||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
++m_pos;
|
|
||||||
AstNodePtr res = new_node(ParsedRegex::Alternation);
|
AstNodePtr res = new_node(ParsedRegex::Alternation);
|
||||||
res->children.push_back(std::move(node));
|
|
||||||
res->children.push_back(disjunction());
|
|
||||||
res->value = capture;
|
res->value = capture;
|
||||||
|
res->children.push_back(std::move(node));
|
||||||
|
do
|
||||||
|
{
|
||||||
|
++m_pos;
|
||||||
|
res->children.push_back(alternative());
|
||||||
|
}
|
||||||
|
while (not at_end() and *m_pos == '|');
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -459,7 +464,7 @@ private:
|
|||||||
AstNodePtr new_node(ParsedRegex::Op op, Codepoint value = -1,
|
AstNodePtr new_node(ParsedRegex::Op op, Codepoint value = -1,
|
||||||
ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One})
|
ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One})
|
||||||
{
|
{
|
||||||
return AstNodePtr{new ParsedRegex::AstNode{op, value, quantifier, m_ignore_case, {}}};
|
return AstNodePtr{new ParsedRegex::AstNode{op, m_ignore_case, value, quantifier, {}}};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool at_end() const { return m_pos == m_regex.end(); }
|
bool at_end() const { return m_pos == m_regex.end(); }
|
||||||
@ -569,17 +574,23 @@ private:
|
|||||||
case ParsedRegex::Alternation:
|
case ParsedRegex::Alternation:
|
||||||
{
|
{
|
||||||
auto& children = node->children;
|
auto& children = node->children;
|
||||||
kak_assert(children.size() == 2);
|
kak_assert(children.size() > 1);
|
||||||
|
|
||||||
auto split_pos = push_inst(CompiledRegex::Split_PrioritizeParent);
|
const auto split_pos = m_program.instructions.size();
|
||||||
|
for (int i = 0; i < children.size() - 1; ++i)
|
||||||
compile_node(children[m_forward ? 0 : 1]);
|
push_inst(CompiledRegex::Split_PrioritizeParent);
|
||||||
auto left_pos = push_inst(CompiledRegex::Jump);
|
|
||||||
goto_inner_end_offsets.push_back(left_pos);
|
|
||||||
|
|
||||||
auto right_pos = compile_node(children[m_forward ? 1 : 0]);
|
|
||||||
m_program.instructions[split_pos].param = right_pos;
|
|
||||||
|
|
||||||
|
for (int i = 0; i < children.size(); ++i)
|
||||||
|
{
|
||||||
|
auto node = compile_node(children[i]);
|
||||||
|
if (i > 0)
|
||||||
|
m_program.instructions[split_pos + i - 1].param = node;
|
||||||
|
if (i < children.size() - 1)
|
||||||
|
{
|
||||||
|
auto jump = push_inst(CompiledRegex::Jump);
|
||||||
|
goto_inner_end_offsets.push_back(jump);
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ParsedRegex::LookAhead:
|
case ParsedRegex::LookAhead:
|
||||||
@ -795,6 +806,7 @@ private:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[gnu::noinline]]
|
||||||
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
|
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
|
||||||
{
|
{
|
||||||
bool accepted[start_chars_count] = {};
|
bool accepted[start_chars_count] = {};
|
||||||
|
Loading…
Reference in New Issue
Block a user