From 8d7a1e5654035dc4363824ee4071c0214b5f0485 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 23 Mar 2024 11:33:26 +0100 Subject: [PATCH] LibWeb: Skip some redundant UTF-8 validation in CSS tokenizer If we're just adding code points to a StringBuilder, there's no need to revalidate the result. --- AK/FlyString.cpp | 5 +++++ AK/FlyString.h | 1 + AK/StringBuilder.cpp | 5 +++++ AK/StringBuilder.h | 2 ++ Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 4 ++-- 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/AK/FlyString.cpp b/AK/FlyString.cpp index ad947190c4a..43d1129e155 100644 --- a/AK/FlyString.cpp +++ b/AK/FlyString.cpp @@ -31,6 +31,11 @@ ErrorOr FlyString::from_utf8(StringView string) return FlyString { TRY(String::from_utf8(string)) }; } +FlyString FlyString::from_utf8_without_validation(ReadonlyBytes string) +{ + return FlyString { String::from_utf8_without_validation(string) }; +} + FlyString::FlyString(String const& string) { if (string.is_short_string()) { diff --git a/AK/FlyString.h b/AK/FlyString.h index f91fe1d2ccc..319079bfedb 100644 --- a/AK/FlyString.h +++ b/AK/FlyString.h @@ -23,6 +23,7 @@ public: FlyString() = default; static ErrorOr from_utf8(StringView); + static FlyString from_utf8_without_validation(ReadonlyBytes); template requires(IsOneOf, ByteString, DeprecatedFlyString, FlyString, String>) static ErrorOr from_utf8(T&&) = delete; diff --git a/AK/StringBuilder.cpp b/AK/StringBuilder.cpp index 70d4ec10fdc..cdf8d2f5f62 100644 --- a/AK/StringBuilder.cpp +++ b/AK/StringBuilder.cpp @@ -161,6 +161,11 @@ String StringBuilder::to_string_without_validation() const return String::from_utf8_without_validation(string_view().bytes()); } +FlyString StringBuilder::to_fly_string_without_validation() const +{ + return FlyString::from_utf8_without_validation(string_view().bytes()); +} + ErrorOr StringBuilder::to_fly_string() const { return FlyString::from_utf8(string_view()); diff --git a/AK/StringBuilder.h b/AK/StringBuilder.h index f710503a386..923adfedb62 100644 --- a/AK/StringBuilder.h +++ b/AK/StringBuilder.h @@ -75,6 +75,8 @@ public: [[nodiscard]] String to_string_without_validation() const; ErrorOr to_string() const; + + [[nodiscard]] FlyString to_fly_string_without_validation() const; ErrorOr to_fly_string() const; [[nodiscard]] ErrorOr to_byte_buffer() const; diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index 02e95117850..5a154e28034 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -238,7 +238,7 @@ ErrorOr> Tokenizer::tokenize(StringView input, StringView encoding } return {}; })); - return builder.to_string(); + return builder.to_string_without_validation(); }; Tokenizer tokenizer { TRY(filter_code_points(input, encoding)) }; @@ -623,7 +623,7 @@ ErrorOr Tokenizer::consume_an_ident_sequence() break; } - return result.to_fly_string(); + return result.to_fly_string_without_validation(); } // https://www.w3.org/TR/css-syntax-3/#consume-url-token