AK+Everywhere: Make UTF-8 and UTF-32 to UTF-16 converters fallible

These could fail to allocate the underlying storage needed to store the
UTF-16 data. Propagate these errors.
This commit is contained in:
Timothy Flynn 2023-01-06 13:19:34 -05:00 committed by Linus Groh
parent d8044c5358
commit 1edb96376b
Notes: sideshowbarker 2024-07-17 18:46:30 +09:00
13 changed files with 46 additions and 35 deletions

View File

@ -5,6 +5,7 @@
*/ */
#include <AK/CharacterTypes.h> #include <AK/CharacterTypes.h>
#include <AK/Concepts.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <AK/StringView.h> #include <AK/StringView.h>
#include <AK/Utf16View.h> #include <AK/Utf16View.h>
@ -20,45 +21,46 @@ static constexpr u16 low_surrogate_max = 0xdfff;
static constexpr u32 replacement_code_point = 0xfffd; static constexpr u32 replacement_code_point = 0xfffd;
static constexpr u32 first_supplementary_plane_code_point = 0x10000; static constexpr u32 first_supplementary_plane_code_point = 0x10000;
template<typename UtfViewType> template<OneOf<Utf8View, Utf32View> UtfViewType>
static Utf16Data to_utf16_impl(UtfViewType const& view) static ErrorOr<Utf16Data> to_utf16_impl(UtfViewType const& view)
requires(IsSame<UtfViewType, Utf8View> || IsSame<UtfViewType, Utf32View>)
{ {
Utf16Data utf16_data; Utf16Data utf16_data;
utf16_data.ensure_capacity(view.length()); TRY(utf16_data.try_ensure_capacity(view.length()));
for (auto code_point : view) for (auto code_point : view)
code_point_to_utf16(utf16_data, code_point); TRY(code_point_to_utf16(utf16_data, code_point));
return utf16_data; return utf16_data;
} }
Utf16Data utf8_to_utf16(StringView utf8_view) ErrorOr<Utf16Data> utf8_to_utf16(StringView utf8_view)
{ {
return to_utf16_impl(Utf8View { utf8_view }); return to_utf16_impl(Utf8View { utf8_view });
} }
Utf16Data utf8_to_utf16(Utf8View const& utf8_view) ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const& utf8_view)
{ {
return to_utf16_impl(utf8_view); return to_utf16_impl(utf8_view);
} }
Utf16Data utf32_to_utf16(Utf32View const& utf32_view) ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const& utf32_view)
{ {
return to_utf16_impl(utf32_view); return to_utf16_impl(utf32_view);
} }
void code_point_to_utf16(Utf16Data& string, u32 code_point) ErrorOr<void> code_point_to_utf16(Utf16Data& string, u32 code_point)
{ {
VERIFY(is_unicode(code_point)); VERIFY(is_unicode(code_point));
if (code_point < first_supplementary_plane_code_point) { if (code_point < first_supplementary_plane_code_point) {
string.append(static_cast<u16>(code_point)); TRY(string.try_append(static_cast<u16>(code_point)));
} else { } else {
code_point -= first_supplementary_plane_code_point; code_point -= first_supplementary_plane_code_point;
string.append(static_cast<u16>(high_surrogate_min | (code_point >> 10))); TRY(string.try_append(static_cast<u16>(high_surrogate_min | (code_point >> 10))));
string.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff))); TRY(string.try_append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff))));
} }
return {};
} }
bool Utf16View::is_high_surrogate(u16 code_unit) bool Utf16View::is_high_surrogate(u16 code_unit)

View File

@ -7,6 +7,7 @@
#pragma once #pragma once
#include <AK/DeprecatedString.h> #include <AK/DeprecatedString.h>
#include <AK/Error.h>
#include <AK/Format.h> #include <AK/Format.h>
#include <AK/Forward.h> #include <AK/Forward.h>
#include <AK/Optional.h> #include <AK/Optional.h>
@ -18,10 +19,10 @@ namespace AK {
using Utf16Data = Vector<u16, 1>; using Utf16Data = Vector<u16, 1>;
Utf16Data utf8_to_utf16(StringView); ErrorOr<Utf16Data> utf8_to_utf16(StringView);
Utf16Data utf8_to_utf16(Utf8View const&); ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&);
Utf16Data utf32_to_utf16(Utf32View const&); ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&);
void code_point_to_utf16(Utf16Data&, u32); ErrorOr<void> code_point_to_utf16(Utf16Data&, u32);
class Utf16View; class Utf16View;

View File

@ -14,7 +14,7 @@
TEST_CASE(decode_ascii) TEST_CASE(decode_ascii)
{ {
auto string = AK::utf8_to_utf16("Hello World!11"sv); auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv));
Utf16View view { string }; Utf16View view { string };
size_t valid_code_units = 0; size_t valid_code_units = 0;
@ -33,7 +33,7 @@ TEST_CASE(decode_ascii)
TEST_CASE(decode_utf8) TEST_CASE(decode_utf8)
{ {
auto string = AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv); auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
Utf16View view { string }; Utf16View view { string };
size_t valid_code_units = 0; size_t valid_code_units = 0;
@ -54,7 +54,7 @@ TEST_CASE(encode_utf8)
{ {
{ {
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"); DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
auto string = AK::utf8_to_utf16(utf8_string); auto string = MUST(AK::utf8_to_utf16(utf8_string));
Utf16View view { string }; Utf16View view { string };
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string); EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string); EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
@ -91,7 +91,7 @@ TEST_CASE(decode_utf16)
TEST_CASE(iterate_utf16) TEST_CASE(iterate_utf16)
{ {
auto string = AK::utf8_to_utf16("Привет 😀"sv); auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
Utf16View view { string }; Utf16View view { string };
auto iterator = view.begin(); auto iterator = view.begin();
@ -263,7 +263,7 @@ TEST_CASE(decode_invalid_utf16)
TEST_CASE(substring_view) TEST_CASE(substring_view)
{ {
auto string = AK::utf8_to_utf16("Привет 😀"sv); auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
{ {
Utf16View view { string }; Utf16View view { string };
view = view.substring_view(7, 2); view = view.substring_view(7, 2);

View File

@ -754,7 +754,7 @@ TEST_CASE(ECMA262_unicode_match)
for (auto& test : tests) { for (auto& test : tests) {
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options); Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
auto subject = AK::utf8_to_utf16(test.subject); auto subject = MUST(AK::utf8_to_utf16(test.subject));
Utf16View view { subject }; Utf16View view { subject };
if constexpr (REGEX_DEBUG) { if constexpr (REGEX_DEBUG) {
@ -868,7 +868,7 @@ TEST_CASE(ECMA262_property_match)
for (auto& test : tests) { for (auto& test : tests) {
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options); Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
auto subject = AK::utf8_to_utf16(test.subject); auto subject = MUST(AK::utf8_to_utf16(test.subject));
Utf16View view { subject }; Utf16View view { subject };
if constexpr (REGEX_DEBUG) { if constexpr (REGEX_DEBUG) {

View File

@ -142,8 +142,8 @@ public:
return 0; return 0;
} }
case UTF16: { case UTF16: {
auto utf16_view = Utf16View(utf8_to_utf16(m_values.at(index.row()))); auto utf16_data = utf8_to_utf16(m_values.at(index.row())).release_value_but_fixme_should_propagate_errors();
if (utf16_view.validate()) if (Utf16View utf16_view { utf16_data }; utf16_view.validate())
return static_cast<i32>(utf16_view.length_in_code_units() * 2); return static_cast<i32>(utf16_view.length_in_code_units() * 2);
return 0; return 0;
} }

View File

@ -490,7 +490,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
{ {
auto string = TRY(vm.argument(0).to_string(vm)); auto string = TRY(vm.argument(0).to_string(vm));
StringBuilder escaped; StringBuilder escaped;
for (auto code_point : utf8_to_utf16(string)) { for (auto code_point : TRY_OR_THROW_OOM(vm, utf8_to_utf16(string))) {
if (code_point < 256) { if (code_point < 256) {
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast<char>(code_point))) if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast<char>(code_point)))
escaped.append(code_point); escaped.append(code_point);

View File

@ -89,7 +89,11 @@ ErrorOr<DeprecatedString, ParseRegexPatternError> parse_regex_pattern(StringView
if (unicode && unicode_sets) if (unicode && unicode_sets)
return ParseRegexPatternError { DeprecatedString::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') }; return ParseRegexPatternError { DeprecatedString::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') };
auto utf16_pattern = AK::utf8_to_utf16(pattern); auto utf16_pattern_result = AK::utf8_to_utf16(pattern);
if (utf16_pattern_result.is_error())
return ParseRegexPatternError { "Out of memory"sv };
auto utf16_pattern = utf16_pattern_result.release_value();
Utf16View utf16_pattern_view { utf16_pattern }; Utf16View utf16_pattern_view { utf16_pattern };
StringBuilder builder; StringBuilder builder;

View File

@ -123,7 +123,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
if (code_point < 0 || code_point > 0x10FFFF) if (code_point < 0 || code_point > 0x10FFFF)
return vm.throw_completion<RangeError>(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects()); return vm.throw_completion<RangeError>(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
AK::code_point_to_utf16(string, static_cast<u32>(code_point)); TRY_OR_THROW_OOM(vm, code_point_to_utf16(string, static_cast<u32>(code_point)));
} }
return PrimitiveString::create(vm, Utf16String(move(string))); return PrimitiveString::create(vm, Utf16String(move(string)));

View File

@ -33,7 +33,7 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string) NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
{ {
return create(AK::utf8_to_utf16(string)); return create(AK::utf8_to_utf16(string).release_value_but_fixme_should_propagate_errors());
} }
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view) NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)

View File

@ -280,7 +280,7 @@ public:
return RegexStringView { Utf32View { data.data(), data.size() } }; return RegexStringView { Utf32View { data.data(), data.size() } };
}, },
[&](Utf16View) { [&](Utf16View) {
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }); optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
return RegexStringView { Utf16View { optional_utf16_storage } }; return RegexStringView { Utf16View { optional_utf16_storage } };
}); });

View File

@ -34,8 +34,8 @@ DeprecatedString strip_and_collapse_whitespace(StringView string)
// https://infra.spec.whatwg.org/#code-unit-prefix // https://infra.spec.whatwg.org/#code-unit-prefix
bool is_code_unit_prefix(StringView potential_prefix, StringView input) bool is_code_unit_prefix(StringView potential_prefix, StringView input)
{ {
auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix); auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
auto input_utf16 = utf8_to_utf16(input); auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();
// 1. Let i be 0. // 1. Let i be 0.
size_t i = 0; size_t i = 0;

View File

@ -5,6 +5,8 @@
*/ */
#include <AK/Utf16View.h> #include <AK/Utf16View.h>
#include <LibJS/Runtime/Completion.h>
#include <LibJS/Runtime/Utf16String.h>
#include <LibWeb/DOM/Document.h> #include <LibWeb/DOM/Document.h>
#include <LibWeb/SVG/SVGTextContentElement.h> #include <LibWeb/SVG/SVGTextContentElement.h>
@ -17,9 +19,10 @@ SVGTextContentElement::SVGTextContentElement(DOM::Document& document, DOM::Quali
} }
// https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars // https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
int SVGTextContentElement::get_number_of_chars() const WebIDL::ExceptionOr<int> SVGTextContentElement::get_number_of_chars() const
{ {
return AK::utf8_to_utf16(child_text_content()).size(); auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(child_text_content()));
return static_cast<int>(chars.size());
} }
} }

View File

@ -7,6 +7,7 @@
#pragma once #pragma once
#include <LibWeb/SVG/SVGGraphicsElement.h> #include <LibWeb/SVG/SVGGraphicsElement.h>
#include <LibWeb/WebIDL/ExceptionOr.h>
namespace Web::SVG { namespace Web::SVG {
@ -15,7 +16,7 @@ class SVGTextContentElement : public SVGGraphicsElement {
WEB_PLATFORM_OBJECT(SVGTextContentElement, SVGGraphicsElement); WEB_PLATFORM_OBJECT(SVGTextContentElement, SVGGraphicsElement);
public: public:
int get_number_of_chars() const; WebIDL::ExceptionOr<int> get_number_of_chars() const;
protected: protected:
SVGTextContentElement(DOM::Document&, DOM::QualifiedName); SVGTextContentElement(DOM::Document&, DOM::QualifiedName);