1
1
mirror of https://github.com/mawww/kakoune.git synced 2024-12-19 09:21:30 +03:00

Regex: Introduce RegexExecFlags::PrevAvailable

Rework assertion code as well.
This commit is contained in:
Maxime Coste 2017-10-08 09:22:24 +08:00
parent 73e177ec59
commit 9ec376135b
4 changed files with 33 additions and 13 deletions

View File

@ -31,7 +31,6 @@ void option_from_string(StringView str, Regex& re)
re = Regex{str}; re = Regex{str};
} }
void regex_mismatch(const Regex& re) void regex_mismatch(const Regex& re)
{ {
write_to_debug_buffer(format("regex mismatch for '{}'", re.str())); write_to_debug_buffer(format("regex mismatch for '{}'", re.str()));

View File

@ -133,6 +133,8 @@ inline RegexExecFlags convert_flags(RegexConstant::match_flag_type flags)
res |= RegexExecFlags::NotInitialNull; res |= RegexExecFlags::NotInitialNull;
if (flags & RegexConstant::match_any) if (flags & RegexConstant::match_any)
res |= RegexExecFlags::AnyMatch; res |= RegexExecFlags::AnyMatch;
if (flags & RegexConstant::match_prev_avail)
res |= RegexExecFlags::PrevAvailable;
return res; return res;
} }
@ -261,7 +263,7 @@ private:
if (m_results.size() and m_results[0].first == m_results[0].second) if (m_results.size() and m_results[0].first == m_results[0].second)
additional_flags |= RegexConstant::match_not_initial_null; additional_flags |= RegexConstant::match_not_initial_null;
if (m_begin != m_next_begin) if (m_begin != m_next_begin)
additional_flags |= RegexConstant::match_not_bob; additional_flags |= RegexConstant::match_not_bob | RegexConstant::match_prev_avail;
if (not regex_search(m_next_begin, m_end, m_results, *m_regex, if (not regex_search(m_next_begin, m_end, m_results, *m_regex,
m_flags | additional_flags)) m_flags | additional_flags))

View File

@ -1041,6 +1041,11 @@ auto test_regex = UnitTest{[]{
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f"); kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
} }
{
TestVM<> vm{R"((?<!f).)"};
kak_assert(vm.exec("f"));
}
{ {
TestVM<> vm{R"((?!foo)...)"}; TestVM<> vm{R"((?!foo)...)"};
kak_assert(not vm.exec("foo")); kak_assert(not vm.exec("foo"));
@ -1110,6 +1115,12 @@ auto test_regex = UnitTest{[]{
TestVM<> vm{R"(()*)"}; TestVM<> vm{R"(()*)"};
kak_assert(not vm.exec(" ")); kak_assert(not vm.exec(" "));
} }
{
TestVM<> vm{R"(\b(?<!-)(a|b|)(?!-)\b)"};
kak_assert(vm.exec("# foo bar", RegexExecFlags::Search));
kak_assert(*vm.captures()[0] == '#');
}
}}; }};
} }

View File

@ -80,6 +80,7 @@ enum class RegexExecFlags
NotInitialNull = 1 << 6, NotInitialNull = 1 << 6,
AnyMatch = 1 << 7, AnyMatch = 1 << 7,
NoSaves = 1 << 8, NoSaves = 1 << 8,
PrevAvailable = 1 << 9,
}; };
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; } constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
@ -125,8 +126,11 @@ public:
bool exec(Iterator begin, Iterator end, RegexExecFlags flags) bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{ {
const bool forward = direction == MatchDirection::Forward; const bool forward = direction == MatchDirection::Forward;
m_begin = Utf8It{utf8::iterator<Iterator>{forward ? begin : end, begin, end}}; const bool prev_avail = flags & RegexExecFlags::PrevAvailable;
m_end = Utf8It{utf8::iterator<Iterator>{forward ? end : begin, begin, end}}; m_begin = Utf8It{utf8::iterator<Iterator>{forward ? begin : end,
prev_avail ? begin-1 : begin, end}};
m_end = Utf8It{utf8::iterator<Iterator>{forward ? end : begin,
prev_avail ? begin-1 : begin, end}};
m_flags = flags; m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end) if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
@ -314,8 +318,8 @@ private:
case CompiledRegex::NegativeLookBehind: case CompiledRegex::NegativeLookBehind:
{ {
auto ref = m_program.lookarounds.begin() + inst.param; auto ref = m_program.lookarounds.begin() + inst.param;
for (auto it = pos-1; *ref != -1 and it >= m_begin; --it, ++ref) for (auto it = pos; *ref != -1 and it > m_begin; --it, ++ref)
if (*it != *ref) if (*(it-1) != *ref)
break; break;
if ((inst.op == CompiledRegex::LookBehind and *ref != -1) or if ((inst.op == CompiledRegex::LookBehind and *ref != -1) or
(inst.op == CompiledRegex::NegativeLookBehind and *ref == -1)) (inst.op == CompiledRegex::NegativeLookBehind and *ref == -1))
@ -400,21 +404,25 @@ private:
bool is_line_start(const Utf8It& pos) const bool is_line_start(const Utf8It& pos) const
{ {
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
*(pos-1) == '\n'; return not (m_flags & RegexExecFlags::NotBeginOfLine);
return *(pos-1) == '\n';
} }
bool is_line_end(const Utf8It& pos) const bool is_line_end(const Utf8It& pos) const
{ {
return (pos == m_end and not (m_flags & RegexExecFlags::NotEndOfLine)) or if (pos == m_end)
*pos == '\n'; return not (m_flags & RegexExecFlags::NotEndOfLine);
return *pos == '\n';
} }
bool is_word_boundary(const Utf8It& pos) const bool is_word_boundary(const Utf8It& pos) const
{ {
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfWord)) or if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
(pos == m_end and not (m_flags & RegexExecFlags::NotEndOfWord)) or return not (m_flags & RegexExecFlags::NotBeginOfWord);
is_word(*(pos-1)) != is_word(*pos); if (pos == m_end)
return not (m_flags & RegexExecFlags::NotEndOfWord);
return is_word(*(pos-1)) != is_word(*pos);
} }
static const Iterator& get_base(const utf8::iterator<Iterator>& it) { return it.base(); } static const Iterator& get_base(const utf8::iterator<Iterator>& it) { return it.base(); }