From 236751cb84e9bdbf46f114f4f456760101618c06 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Fri, 6 Oct 2017 19:30:46 +0800 Subject: [PATCH] Regex: Make ThreadedRegexVM a proper class, define a proper interface --- src/regex_impl.cc | 24 ++++++------- src/regex_impl.hh | 92 ++++++++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 53 deletions(-) diff --git a/src/regex_impl.cc b/src/regex_impl.cc index de00ce870..17241064c 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -931,7 +931,7 @@ auto test_regex = UnitTest{[]{ { TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"}; kak_assert(vm.exec("fooquxbarbaz")); - kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux"); + kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "qux"); kak_assert(not vm.exec("fooquxbarbaze")); kak_assert(not vm.exec("quxbar")); kak_assert(not vm.exec("blahblah")); @@ -942,7 +942,7 @@ auto test_regex = UnitTest{[]{ { TestVM vm{R"(.*\b(foo|bar)\b.*)"}; kak_assert(vm.exec("qux foo baz")); - kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo"); + kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "foo"); kak_assert(not vm.exec("quxfoobaz")); kak_assert(vm.exec("bar")); kak_assert(not vm.exec("foobar")); @@ -988,11 +988,11 @@ auto test_regex = UnitTest{[]{ { TestVM vm{R"(f.*a(.*o))"}; kak_assert(vm.exec("blahfoobarfoobaz", false, true)); - kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo"); - kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo"); + kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo"); + kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo"); kak_assert(vm.exec("mais que fais la police", false, true)); - kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po"); - kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po"); + kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po"); + kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po"); } { @@ -1006,13 +1006,13 @@ auto test_regex = UnitTest{[]{ { TestVM vm{R"((a{3,5})a+)"}; kak_assert(vm.exec("aaaaaa", true, true)); - kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa"); + kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaaaa"); } { TestVM vm{R"((a{3,5}?)a+)"}; kak_assert(vm.exec("aaaaaa", true, true)); - kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa"); + kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaa"); } { @@ -1052,20 +1052,20 @@ auto test_regex = UnitTest{[]{ { TestVM vm{R"(foo\Kbar)"}; kak_assert(vm.exec("foobar", true, true)); - kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar"); + kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar"); kak_assert(not vm.exec("bar", true, true)); } { TestVM vm{R"((fo+?).*)"}; kak_assert(vm.exec("foooo", true, true)); - kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo"); + kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo"); } { TestVM vm{R"((?=foo).)"}; kak_assert(vm.exec("barfoo", false, true)); - kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f"); + kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f"); } { @@ -1111,7 +1111,7 @@ auto test_regex = UnitTest{[]{ { TestVM vm{R"($)"}; kak_assert(vm.exec("foo\n", false, true)); - kak_assert(*vm.m_captures->pos[0] == '\n'); + kak_assert(*vm.captures()[0] == '\n'); } }}; diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 9e81a7018..84b90fa7d 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -66,12 +66,14 @@ enum class RegexExecFlags constexpr bool with_bit_ops(Meta::Type) { return true; } template -struct ThreadedRegexVM +class ThreadedRegexVM { +public: ThreadedRegexVM(const CompiledRegex& program) : m_program{program} { kak_assert(m_program); } ThreadedRegexVM(const ThreadedRegexVM&) = delete; + ThreadedRegexVM& operator=(const ThreadedRegexVM&) = delete; ~ThreadedRegexVM() { @@ -83,6 +85,52 @@ struct ThreadedRegexVM } } + bool exec(Iterator begin, Iterator end, RegexExecFlags flags) + { + m_begin = begin; + m_end = end; + m_flags = flags; + + if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end) + return false; + + Vector current_threads, next_threads; + + const bool no_saves = (m_flags & RegexExecFlags::NoSaves); + Utf8It start{m_begin, m_begin, m_end}; + + const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr; + + if (flags & RegexExecFlags::Search) + to_next_start(start, end, start_chars); + + if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), + current_threads, next_threads)) + return true; + + if (not (flags & RegexExecFlags::Search)) + return false; + + do + { + to_next_start(++start, end, start_chars); + if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), + current_threads, next_threads)) + return true; + } + while (start != end); + + return false; + } + + ArrayView captures() const + { + if (m_captures) + return { m_captures->pos, m_program.save_count }; + return {}; + } + +private: struct Saves { int refcount; @@ -325,44 +373,6 @@ struct ThreadedRegexVM ++start; } - bool exec(Iterator begin, Iterator end, RegexExecFlags flags) - { - m_begin = begin; - m_end = end; - m_flags = flags; - - if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end) - return false; - - Vector current_threads, next_threads; - - const bool no_saves = (m_flags & RegexExecFlags::NoSaves); - Utf8It start{m_begin, m_begin, m_end}; - - const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr; - - if (flags & RegexExecFlags::Search) - to_next_start(start, end, start_chars); - - if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - current_threads, next_threads)) - return true; - - if (not (flags & RegexExecFlags::Search)) - return false; - - do - { - to_next_start(++start, end, start_chars); - if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - current_threads, next_threads)) - return true; - } - while (start != end); - - return false; - } - bool is_line_start(const Utf8It& pos) const { return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or @@ -409,7 +419,7 @@ bool regex_match(It begin, It end, Vector& captures, const CompiledRegex& re ThreadedRegexVM vm{re}; if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search))) { - std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures)); + std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures)); return true; } return false; @@ -430,7 +440,7 @@ bool regex_search(It begin, It end, Vector& captures, const CompiledRegex& r ThreadedRegexVM vm{re}; if (vm.exec(begin, end, flags | RegexExecFlags::Search)) { - std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures)); + std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures)); return true; } return false;