Store instruction pointers directly in ThreadedRegexVM::Thread

The previous tradeoff of having a very small Thread struct is not necessary anymore as we do not memcpy Threads on swap_next since d708b77186. This requires offsets to be used instead of indices for jump/split ops.
2024-07-14 16:10:24 +03:00 · 2024-06-15 14:03:10 +10:00 · 2024-06-15 14:03:10 +10:00 · c4684d0d84
commit c4684d0d84
parent c84942c2ac
2 changed files with 42 additions and 31 deletions
--- a/src/regex_impl.cc
+++ b/src/regex_impl.cc
@ -706,7 +706,7 @@ private:
    {
        auto& node = get_node(index);

-        const uint32_t start_pos = (uint32_t)m_program.instructions.size();
+        const OpIndex start_pos = op_count();
        const bool ignore_case = node.ignore_case;

        const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
@ -746,14 +746,14 @@ private:
                    if (child != index+1)
                        push_inst(CompiledRegex::Split);
                }
-                auto split_pos = m_program.instructions.size();
+                auto split_pos = op_count();

                const auto end = node.children_end;
                for (auto child : Children<>{m_parsed_regex, index})
                {
                    auto node = compile_node<direction>(child);
                    if (child != index+1)
-                        m_program.instructions[--split_pos].param.split = CompiledRegex::Param::Split{.target = node, .prioritize_parent = true};
+                        m_program.instructions[--split_pos].param.split = CompiledRegex::Param::Split{.offset = offset(node, split_pos), .prioritize_parent = true};
                    if (get_node(child).children_end != end)
                    {
                        auto jump = push_inst(CompiledRegex::Jump);
@ -801,8 +801,8 @@ private:
                break;
        }

-        for (auto& offset : goto_inner_end_offsets)
-            m_program.instructions[offset].param.jump_target = m_program.instructions.size();
+        for (auto& index : goto_inner_end_offsets)
+            m_program.instructions[index].param.jump_offset = offset(op_count(), index);

        if (save)
            push_inst(CompiledRegex::Save, {.save_index=int16_t(node.value * 2 + (forward ? 1 : 0))});
@ -810,19 +810,29 @@ private:
        return start_pos;
    }

+    OpIndex op_count() const
+    {
+        return static_cast<OpIndex>(m_program.instructions.size());
+    }
+
+    static OpIndex offset(OpIndex to, OpIndex from)
+    {
+        return static_cast<OpIndex>(to - from);
+    }
+
    template<RegexMode direction>
    OpIndex compile_node(ParsedRegex::NodeIndex index)
    {
        auto& node = get_node(index);

-        const OpIndex start_pos = (OpIndex)m_program.instructions.size();
+        const OpIndex start_pos = op_count();
        Vector<OpIndex> goto_ends;

        auto& quantifier = node.quantifier;

        if (quantifier.allows_none())
        {
-            auto split_pos = push_inst(CompiledRegex::Split, {.split={.target=0, .prioritize_parent=quantifier.greedy}});
+            auto split_pos = push_inst(CompiledRegex::Split, {.split={.offset=0, .prioritize_parent=quantifier.greedy}});
            goto_ends.push_back(split_pos);
        }

@ -832,18 +842,18 @@ private:
            inner_pos = compile_node_inner<direction>(index);

        if (quantifier.allows_infinite_repeat())
-            push_inst(CompiledRegex::Split, {.split = {.target=inner_pos, .prioritize_parent=not quantifier.greedy}});
+            push_inst(CompiledRegex::Split, {.split = {.offset=offset(inner_pos, op_count()), .prioritize_parent=not quantifier.greedy}});
        // Write the node as an optional match for the min -> max counts
        else for (int i = std::max((int16_t)1, quantifier.min); // STILL UGLY !
                  i < quantifier.max; ++i)
        {
-            auto split_pos = push_inst(CompiledRegex::Split, {.split={.target=0, .prioritize_parent=quantifier.greedy}});
+            auto split_pos = push_inst(CompiledRegex::Split, {.split={.offset=0, .prioritize_parent=quantifier.greedy}});
            goto_ends.push_back(split_pos);
            compile_node_inner<direction>(index);
        }

-        for (auto offset : goto_ends)
-            m_program.instructions[offset].param.split.target = m_program.instructions.size();
+        for (auto index : goto_ends)
+            m_program.instructions[index].param.split.offset = offset(op_count(), index);

        return start_pos;
    }
@ -851,7 +861,7 @@ private:
    OpIndex push_inst(CompiledRegex::Op op, CompiledRegex::Param param = {})
    {
        constexpr auto max_instructions = std::numeric_limits<OpIndex>::max();
-        const auto res = m_program.instructions.size();
+        const auto res = op_count();
        if (res >= max_instructions)
            throw regex_error(format("regex compiled to more than {} instructions", max_instructions));
        m_program.instructions.push_back({ op, 0, param });
@ -1031,7 +1041,7 @@ private:
        {
            auto& inst = m_program.instructions[i];
            if (is_jump(inst.op))
-                m_program.instructions[inst.param.jump_target].last_step = 0xffff; // tag as jump target
+                m_program.instructions[i + inst.param.jump_offset].last_step = 0xffff; // tag as jump target
        }

        for (auto block_begin = begin; block_begin < end; )
@ -1071,11 +1081,11 @@ private:
 String dump_regex(const CompiledRegex& program)
 {
    String res;
-    int count = 0;
+    int index = 0;
    for (auto& inst : program.instructions)
    {
        char buf[20];
-        format_to(buf, " {:03}     ", count++);
+        format_to(buf, " {:03}     ", index);
        res += buf;
        switch (inst.op)
        {
@ -1095,13 +1105,13 @@ String dump_regex(const CompiledRegex& program)
                res += format("character type {}\n", to_underlying(inst.param.character_type));
                break;
            case CompiledRegex::Jump:
-                res += format("jump {}\n", inst.param.jump_target);
+                res += format("jump {} ({:03})\n", inst.param.jump_offset, index + inst.param.jump_offset);
                break;
            case CompiledRegex::Split:
            {
-                res += format("split (prioritize {}) {}\n",
+                res += format("split (prioritize {}) {} ({:03})\n",
                              (inst.param.split.prioritize_parent) ? "parent" : "child",
-                              inst.param.split.target);
+                              inst.param.split.offset, index + inst.param.split.offset);
                break;
            }
            case CompiledRegex::Save:
@ -1135,6 +1145,7 @@ String dump_regex(const CompiledRegex& program)
            case CompiledRegex::Match:
                res += "match\n";
        }
+        ++index;
    }
    auto dump_start_desc = [&](const CompiledRegex::StartDesc& desc, StringView name) {
        res += name + " start desc: [";
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@ -106,11 +106,11 @@ struct CompiledRegex : UseMemoryDomain<MemoryDomain::Regex>
        } literal;
        int16_t character_class_index;
        CharacterType character_type;
-        int16_t jump_target;
+        int16_t jump_offset;
        int16_t save_index;
        struct Split
        {
-            int16_t target;
+            int16_t offset;
            bool prioritize_parent : 1;
        } split;
        bool line_start;
@ -351,10 +351,10 @@ private:
            --saves.refcount;
    };

-    struct alignas(int32_t) Thread
+    struct Thread
    {
-        int16_t inst;
-        int16_t saves;
+        const CompiledRegex::Instruction* inst;
+        int saves;
    };

    using StartDesc = CompiledRegex::StartDesc;
@ -370,7 +370,7 @@ private:

    // Steps a thread until it consumes the current character, matches or fail
    [[gnu::always_inline]]
-    void step_thread(const CompiledRegex::Instruction* instructions, const Iterator& pos, Codepoint cp,
+    void step_thread(const Iterator& pos, Codepoint cp,
                     uint16_t current_step, Thread thread, const ExecConfig& config)
    {
        auto failed = [this, &thread]() {
@ -382,7 +382,7 @@ private:

        while (true)
        {
-            auto& inst = instructions[thread.inst++];
+            auto& inst = *thread.inst++;
            // if this instruction was already executed for this step in another thread,
            // then this thread is redundant and can be dropped
            if (inst.last_step == current_step)
@ -424,11 +424,11 @@ private:
                        return failed();
                    return is_ctype(inst.param.character_type, cp) ? consumed() : failed();
                case CompiledRegex::Jump:
-                    thread.inst = inst.param.jump_target;
+                    thread.inst = &inst + inst.param.jump_offset;
                    break;
                case CompiledRegex::Split:
-                    if (auto target = inst.param.split.target;
-                        instructions[target].last_step != current_step)
+                    if (auto* target = &inst + inst.param.split.offset;
+                        target->last_step != current_step)
                    {
                        if (thread.saves >= 0)
                            ++m_saves[thread.saves].refcount;
@ -478,7 +478,8 @@ private:
        m_captures = -1;
        m_threads.ensure_initial_capacity();

-        const int16_t first_inst = forward ? 0 : m_program.first_backward_inst;
+        ConstArrayView<CompiledRegex::Instruction> insts{m_program.instructions};
+        const auto* first_inst = insts.begin() + (forward ? 0 : m_program.first_backward_inst);
        m_threads.push_current({first_inst, -1});

        const auto* start_desc = (forward ? m_program.forward_start_desc : m_program.backward_start_desc).get();
@ -486,7 +487,6 @@ private:

        constexpr bool search = mode & RegexMode::Search;
        constexpr bool any_match = mode & RegexMode::AnyMatch;
-        ConstArrayView<CompiledRegex::Instruction> insts{m_program.instructions};
        uint16_t current_step = -1;
        m_found_match = false;
        while (true) // Iterate on all codepoints and once at the end
@ -506,7 +506,7 @@ private:
            Codepoint cp = codepoint(next, config);

            while (not m_threads.current_is_empty())
-                step_thread(insts.pointer(), pos, cp, current_step, m_threads.pop_current(), config);
+                step_thread(pos, cp, current_step, m_threads.pop_current(), config);

            if (pos == config.end or
                (m_threads.next_is_empty() and (not search or m_found_match)) or