From 07cbe2daa4fa7fa0e4d8272067a4d26a9b6c4f8a Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Fri, 18 Oct 2019 22:50:44 +0200 Subject: [PATCH] LibHTML: Preserve UTF-8 codepoints when collapsing whitespace This is extremely awkward and I'm sure there are many better ways to achieve this.. --- Libraries/LibHTML/Layout/LayoutText.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Libraries/LibHTML/Layout/LayoutText.cpp b/Libraries/LibHTML/Layout/LayoutText.cpp index 9aff6a269d6..bf631a9c24d 100644 --- a/Libraries/LibHTML/Layout/LayoutText.cpp +++ b/Libraries/LibHTML/Layout/LayoutText.cpp @@ -141,16 +141,19 @@ void LayoutText::split_into_lines(LayoutBlock& container) } // Collapse whitespace into single spaces - auto& raw_text = node().data(); - StringBuilder builder(raw_text.length()); - for (int i = 0; i < raw_text.length(); ++i) { - if (!isspace(raw_text[i])) { - builder.append(raw_text[i]); + auto utf8_view = Utf8View(node().data()); + StringBuilder builder(node().data().length()); + for (auto it = utf8_view.begin(); it != utf8_view.end(); ++it) { + if (!isspace(*it)) { + builder.append(utf8_view.as_string().characters_without_null_termination() + utf8_view.byte_offset_of(it), it.codepoint_length_in_bytes()); } else { builder.append(' '); - while (i < raw_text.length() && isspace(raw_text[i])) - ++i; - --i; + auto prev = it; + while (it != utf8_view.end() && isspace(*it)) { + prev = it; + ++it; + } + it = prev; } } m_text_for_rendering = builder.to_string();