From ec4d29849dc1d0357c73690722aea1a7802dd0bc Mon Sep 17 00:00:00 2001
From: Tim Ledbetter <timledbetter@gmail.com>
Date: Thu, 13 Jun 2024 19:43:08 +0100
Subject: [PATCH] LibWeb: Support finding text split across multiple text nodes

Previously, the find in page function would fail to find text which was
split across multiple text nodes. For example, given the following
markup: `WH<span>F` the query `WHF` would previously fail to be
matched.

This is done by traversing all of the document's text nodes -
constructing a complete string to query against and keeping track of
the locations where that string is split across multiple nodes.
---
 Userland/Libraries/LibWeb/DOM/Document.cpp | 70 ++++++++++++++++------
 1 file changed, 53 insertions(+), 17 deletions(-)
diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp
index 3343662ea97..b9b4bec9a4c 100644
--- a/Userland/Libraries/LibWeb/DOM/Document.cpp
+++ b/Userland/Libraries/LibWeb/DOM/Document.cpp
@@ -5113,29 +5113,65 @@ Vector<JS::Handle<DOM::Range>> Document::find_matching_text(String const& query,
     if (!document_element() || !document_element()->layout_node())
         return {};
 
-    Vector<JS::Handle<DOM::Range>> matches;
+    struct TextPositionNode {
+        DOM::Text& dom_node;
+        size_t start_offset { 0 };
+    };
+
+    StringBuilder builder;
+    Vector<TextPositionNode> text_positions;
+    size_t current_start_position = 0;
+    String current_node_text;
     document_element()->layout_node()->for_each_in_inclusive_subtree_of_type<Layout::TextNode>([&](auto const& text_node) {
-        auto const& text = text_node.text_for_rendering();
-        size_t offset = 0;
-        while (true) {
-            auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
-                ? text.find_byte_offset_ignoring_case(query, offset)
-                : text.find_byte_offset(query, offset);
-            if (!match_index.has_value())
-                break;
-
-            auto range = create_range();
-            auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
-            (void)range->set_start(dom_node, match_index.value());
-            (void)range->set_end(dom_node, match_index.value() + query.code_points().length());
-
-            matches.append(range);
-            offset = match_index.value() + 1;
+        auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
+        if (text_positions.is_empty()) {
+            text_positions.empend(dom_node);
+        } else {
+            current_start_position += current_node_text.bytes_as_string_view().length();
+            text_positions.empend(dom_node, current_start_position);
         }
 
+        current_node_text = text_node.text_for_rendering();
+        builder.append(current_node_text);
         return TraversalDecision::Continue;
     });
 
+    if (text_positions.is_empty())
+        return {};
+
+    size_t offset = 0;
+    auto* match_start_position = &text_positions[0];
+    auto text = builder.to_string_without_validation();
+    Vector<JS::Handle<DOM::Range>> matches;
+    while (true) {
+        auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
+            ? text.find_byte_offset_ignoring_case(query, offset)
+            : text.find_byte_offset(query, offset);
+        if (!match_index.has_value())
+            break;
+
+        size_t i = 0;
+        for (; i < text_positions.size() && match_index.value() > text_positions[i].start_offset; ++i)
+            match_start_position = &text_positions[i];
+
+        auto range = create_range();
+        auto start_position = match_index.value() - match_start_position->start_offset;
+        auto& start_dom_node = match_start_position->dom_node;
+        (void)range->set_start(start_dom_node, start_position);
+
+        auto* match_end_position = match_start_position;
+        for (; i < text_positions.size() && match_index.value() + query.bytes_as_string_view().length() > text_positions[i].start_offset; ++i)
+            match_end_position = &text_positions[i];
+
+        auto& end_dom_node = match_end_position->dom_node;
+        auto end_position = match_index.value() - match_end_position->start_offset + query.bytes_as_string_view().length();
+        (void)range->set_end(end_dom_node, end_position);
+
+        matches.append(range);
+        offset = match_index.value() + query.bytes_as_string_view().length() + 1;
+        match_start_position = match_end_position;
+    }
+
     return matches;
 }