From 11abd544c623d1efb39b0f1761f31474a902a90f Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sat, 7 Oct 2017 14:25:14 +0800 Subject: [PATCH] Regex: avoid infinite loops --- src/regex_impl.cc | 5 +++++ src/regex_impl.hh | 20 ++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 7e122c81e..57c7d2be6 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -1126,6 +1126,11 @@ auto test_regex = UnitTest{[]{ kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "oober"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "ber"); } + + { + TestVM<> vm{R"(()*)"}; + kak_assert(not vm.exec(" ")); + } }}; } diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 6b4d13697..a6980ff56 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -123,6 +123,7 @@ public: return false; Vector current_threads, next_threads; + std::unique_ptr inst_processed{new bool[m_program.bytecode.size()]}; const bool no_saves = (m_flags & RegexExecFlags::NoSaves); Utf8It start{m_begin}; @@ -133,7 +134,7 @@ public: to_next_start(start, m_end, start_chars); if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - current_threads, next_threads)) + current_threads, next_threads, inst_processed.get())) return true; if (not (flags & RegexExecFlags::Search)) @@ -143,7 +144,7 @@ public: { to_next_start(++start, m_end, start_chars); if (exec_from(start, no_saves ? nullptr : new_saves(nullptr), - current_threads, next_threads)) + current_threads, next_threads, inst_processed.get())) return true; } while (start != m_end); @@ -206,12 +207,17 @@ private: using Utf8It = typename ChooseUtf8It::Type; enum class StepResult { Consumed, Matched, Failed }; - StepResult step(const Utf8It& pos, Thread& thread, Vector& threads) + StepResult step(const Utf8It& pos, Thread& thread, Vector& threads, bool* inst_processed) { const auto prog_start = m_program.bytecode.data(); const auto prog_end = prog_start + m_program.bytecode.size(); while (true) { + const auto inst_offset = thread.inst - prog_start; + if (inst_processed[inst_offset]) + return StepResult::Failed; + inst_processed[inst_offset] = true; + const Codepoint cp = pos == m_end ? 0 : *pos; const CompiledRegex::Op op = (CompiledRegex::Op)*thread.inst++; switch (op) @@ -325,7 +331,7 @@ private: return StepResult::Failed; } - bool exec_from(const Utf8It& start, Saves* initial_saves, Vector& current_threads, Vector& next_threads) + bool exec_from(const Utf8It& start, Saves* initial_saves, Vector& current_threads, Vector& next_threads, bool* inst_processed) { current_threads.push_back({m_program.bytecode.data(), initial_saves}); next_threads.clear(); @@ -333,11 +339,12 @@ private: bool found_match = false; for (Utf8It pos = start; pos != m_end; ++pos) { + memset(inst_processed, 0, m_program.bytecode.size() * sizeof(bool));; while (not current_threads.empty()) { auto thread = current_threads.back(); current_threads.pop_back(); - switch (step(pos, thread, current_threads)) + switch (step(pos, thread, current_threads, inst_processed)) { case StepResult::Matched: if (not (m_flags & RegexExecFlags::Search) or // We are not at end, this is not a full match @@ -375,12 +382,13 @@ private: if (found_match) return true; + memset(inst_processed, 0, m_program.bytecode.size() * sizeof(bool));; // Step remaining threads to see if they match without consuming anything else while (not current_threads.empty()) { auto thread = current_threads.back(); current_threads.pop_back(); - if (step(m_end, thread, current_threads) == StepResult::Matched) + if (step(m_end, thread, current_threads, inst_processed) == StepResult::Matched) { release_saves(m_captures); m_captures = thread.saves;