mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-01 15:43:36 +03:00
AK+Everywhere: Make UTF-8 and UTF-32 to UTF-16 converters fallible
These could fail to allocate the underlying storage needed to store the UTF-16 data. Propagate these errors.
This commit is contained in:
parent
d8044c5358
commit
1edb96376b
Notes:
sideshowbarker
2024-07-17 18:46:30 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/1edb96376b Pull-request: https://github.com/SerenityOS/serenity/pull/16895 Reviewed-by: https://github.com/linusg
@ -5,6 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/CharacterTypes.h>
|
#include <AK/CharacterTypes.h>
|
||||||
|
#include <AK/Concepts.h>
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
@ -20,45 +21,46 @@ static constexpr u16 low_surrogate_max = 0xdfff;
|
|||||||
static constexpr u32 replacement_code_point = 0xfffd;
|
static constexpr u32 replacement_code_point = 0xfffd;
|
||||||
static constexpr u32 first_supplementary_plane_code_point = 0x10000;
|
static constexpr u32 first_supplementary_plane_code_point = 0x10000;
|
||||||
|
|
||||||
template<typename UtfViewType>
|
template<OneOf<Utf8View, Utf32View> UtfViewType>
|
||||||
static Utf16Data to_utf16_impl(UtfViewType const& view)
|
static ErrorOr<Utf16Data> to_utf16_impl(UtfViewType const& view)
|
||||||
requires(IsSame<UtfViewType, Utf8View> || IsSame<UtfViewType, Utf32View>)
|
|
||||||
{
|
{
|
||||||
Utf16Data utf16_data;
|
Utf16Data utf16_data;
|
||||||
utf16_data.ensure_capacity(view.length());
|
TRY(utf16_data.try_ensure_capacity(view.length()));
|
||||||
|
|
||||||
for (auto code_point : view)
|
for (auto code_point : view)
|
||||||
code_point_to_utf16(utf16_data, code_point);
|
TRY(code_point_to_utf16(utf16_data, code_point));
|
||||||
|
|
||||||
return utf16_data;
|
return utf16_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
Utf16Data utf8_to_utf16(StringView utf8_view)
|
ErrorOr<Utf16Data> utf8_to_utf16(StringView utf8_view)
|
||||||
{
|
{
|
||||||
return to_utf16_impl(Utf8View { utf8_view });
|
return to_utf16_impl(Utf8View { utf8_view });
|
||||||
}
|
}
|
||||||
|
|
||||||
Utf16Data utf8_to_utf16(Utf8View const& utf8_view)
|
ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const& utf8_view)
|
||||||
{
|
{
|
||||||
return to_utf16_impl(utf8_view);
|
return to_utf16_impl(utf8_view);
|
||||||
}
|
}
|
||||||
|
|
||||||
Utf16Data utf32_to_utf16(Utf32View const& utf32_view)
|
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const& utf32_view)
|
||||||
{
|
{
|
||||||
return to_utf16_impl(utf32_view);
|
return to_utf16_impl(utf32_view);
|
||||||
}
|
}
|
||||||
|
|
||||||
void code_point_to_utf16(Utf16Data& string, u32 code_point)
|
ErrorOr<void> code_point_to_utf16(Utf16Data& string, u32 code_point)
|
||||||
{
|
{
|
||||||
VERIFY(is_unicode(code_point));
|
VERIFY(is_unicode(code_point));
|
||||||
|
|
||||||
if (code_point < first_supplementary_plane_code_point) {
|
if (code_point < first_supplementary_plane_code_point) {
|
||||||
string.append(static_cast<u16>(code_point));
|
TRY(string.try_append(static_cast<u16>(code_point)));
|
||||||
} else {
|
} else {
|
||||||
code_point -= first_supplementary_plane_code_point;
|
code_point -= first_supplementary_plane_code_point;
|
||||||
string.append(static_cast<u16>(high_surrogate_min | (code_point >> 10)));
|
TRY(string.try_append(static_cast<u16>(high_surrogate_min | (code_point >> 10))));
|
||||||
string.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff)));
|
TRY(string.try_append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Utf16View::is_high_surrogate(u16 code_unit)
|
bool Utf16View::is_high_surrogate(u16 code_unit)
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <AK/DeprecatedString.h>
|
#include <AK/DeprecatedString.h>
|
||||||
|
#include <AK/Error.h>
|
||||||
#include <AK/Format.h>
|
#include <AK/Format.h>
|
||||||
#include <AK/Forward.h>
|
#include <AK/Forward.h>
|
||||||
#include <AK/Optional.h>
|
#include <AK/Optional.h>
|
||||||
@ -18,10 +19,10 @@ namespace AK {
|
|||||||
|
|
||||||
using Utf16Data = Vector<u16, 1>;
|
using Utf16Data = Vector<u16, 1>;
|
||||||
|
|
||||||
Utf16Data utf8_to_utf16(StringView);
|
ErrorOr<Utf16Data> utf8_to_utf16(StringView);
|
||||||
Utf16Data utf8_to_utf16(Utf8View const&);
|
ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&);
|
||||||
Utf16Data utf32_to_utf16(Utf32View const&);
|
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&);
|
||||||
void code_point_to_utf16(Utf16Data&, u32);
|
ErrorOr<void> code_point_to_utf16(Utf16Data&, u32);
|
||||||
|
|
||||||
class Utf16View;
|
class Utf16View;
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
TEST_CASE(decode_ascii)
|
TEST_CASE(decode_ascii)
|
||||||
{
|
{
|
||||||
auto string = AK::utf8_to_utf16("Hello World!11"sv);
|
auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv));
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
|
|
||||||
size_t valid_code_units = 0;
|
size_t valid_code_units = 0;
|
||||||
@ -33,7 +33,7 @@ TEST_CASE(decode_ascii)
|
|||||||
|
|
||||||
TEST_CASE(decode_utf8)
|
TEST_CASE(decode_utf8)
|
||||||
{
|
{
|
||||||
auto string = AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv);
|
auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
|
|
||||||
size_t valid_code_units = 0;
|
size_t valid_code_units = 0;
|
||||||
@ -54,7 +54,7 @@ TEST_CASE(encode_utf8)
|
|||||||
{
|
{
|
||||||
{
|
{
|
||||||
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
|
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
|
||||||
auto string = AK::utf8_to_utf16(utf8_string);
|
auto string = MUST(AK::utf8_to_utf16(utf8_string));
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
|
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
|
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
|
||||||
@ -91,7 +91,7 @@ TEST_CASE(decode_utf16)
|
|||||||
|
|
||||||
TEST_CASE(iterate_utf16)
|
TEST_CASE(iterate_utf16)
|
||||||
{
|
{
|
||||||
auto string = AK::utf8_to_utf16("Привет 😀"sv);
|
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
auto iterator = view.begin();
|
auto iterator = view.begin();
|
||||||
|
|
||||||
@ -263,7 +263,7 @@ TEST_CASE(decode_invalid_utf16)
|
|||||||
|
|
||||||
TEST_CASE(substring_view)
|
TEST_CASE(substring_view)
|
||||||
{
|
{
|
||||||
auto string = AK::utf8_to_utf16("Привет 😀"sv);
|
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
|
||||||
{
|
{
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
view = view.substring_view(7, 2);
|
view = view.substring_view(7, 2);
|
||||||
|
@ -754,7 +754,7 @@ TEST_CASE(ECMA262_unicode_match)
|
|||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
|
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
|
||||||
|
|
||||||
auto subject = AK::utf8_to_utf16(test.subject);
|
auto subject = MUST(AK::utf8_to_utf16(test.subject));
|
||||||
Utf16View view { subject };
|
Utf16View view { subject };
|
||||||
|
|
||||||
if constexpr (REGEX_DEBUG) {
|
if constexpr (REGEX_DEBUG) {
|
||||||
@ -868,7 +868,7 @@ TEST_CASE(ECMA262_property_match)
|
|||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
|
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
|
||||||
|
|
||||||
auto subject = AK::utf8_to_utf16(test.subject);
|
auto subject = MUST(AK::utf8_to_utf16(test.subject));
|
||||||
Utf16View view { subject };
|
Utf16View view { subject };
|
||||||
|
|
||||||
if constexpr (REGEX_DEBUG) {
|
if constexpr (REGEX_DEBUG) {
|
||||||
|
@ -142,8 +142,8 @@ public:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
case UTF16: {
|
case UTF16: {
|
||||||
auto utf16_view = Utf16View(utf8_to_utf16(m_values.at(index.row())));
|
auto utf16_data = utf8_to_utf16(m_values.at(index.row())).release_value_but_fixme_should_propagate_errors();
|
||||||
if (utf16_view.validate())
|
if (Utf16View utf16_view { utf16_data }; utf16_view.validate())
|
||||||
return static_cast<i32>(utf16_view.length_in_code_units() * 2);
|
return static_cast<i32>(utf16_view.length_in_code_units() * 2);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -490,7 +490,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
|
|||||||
{
|
{
|
||||||
auto string = TRY(vm.argument(0).to_string(vm));
|
auto string = TRY(vm.argument(0).to_string(vm));
|
||||||
StringBuilder escaped;
|
StringBuilder escaped;
|
||||||
for (auto code_point : utf8_to_utf16(string)) {
|
for (auto code_point : TRY_OR_THROW_OOM(vm, utf8_to_utf16(string))) {
|
||||||
if (code_point < 256) {
|
if (code_point < 256) {
|
||||||
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast<char>(code_point)))
|
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast<char>(code_point)))
|
||||||
escaped.append(code_point);
|
escaped.append(code_point);
|
||||||
|
@ -89,7 +89,11 @@ ErrorOr<DeprecatedString, ParseRegexPatternError> parse_regex_pattern(StringView
|
|||||||
if (unicode && unicode_sets)
|
if (unicode && unicode_sets)
|
||||||
return ParseRegexPatternError { DeprecatedString::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') };
|
return ParseRegexPatternError { DeprecatedString::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') };
|
||||||
|
|
||||||
auto utf16_pattern = AK::utf8_to_utf16(pattern);
|
auto utf16_pattern_result = AK::utf8_to_utf16(pattern);
|
||||||
|
if (utf16_pattern_result.is_error())
|
||||||
|
return ParseRegexPatternError { "Out of memory"sv };
|
||||||
|
|
||||||
|
auto utf16_pattern = utf16_pattern_result.release_value();
|
||||||
Utf16View utf16_pattern_view { utf16_pattern };
|
Utf16View utf16_pattern_view { utf16_pattern };
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
|
|||||||
if (code_point < 0 || code_point > 0x10FFFF)
|
if (code_point < 0 || code_point > 0x10FFFF)
|
||||||
return vm.throw_completion<RangeError>(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
|
return vm.throw_completion<RangeError>(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
|
||||||
|
|
||||||
AK::code_point_to_utf16(string, static_cast<u32>(code_point));
|
TRY_OR_THROW_OOM(vm, code_point_to_utf16(string, static_cast<u32>(code_point)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return PrimitiveString::create(vm, Utf16String(move(string)));
|
return PrimitiveString::create(vm, Utf16String(move(string)));
|
||||||
|
@ -33,7 +33,7 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
|
|||||||
|
|
||||||
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
|
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
|
||||||
{
|
{
|
||||||
return create(AK::utf8_to_utf16(string));
|
return create(AK::utf8_to_utf16(string).release_value_but_fixme_should_propagate_errors());
|
||||||
}
|
}
|
||||||
|
|
||||||
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
|
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
|
||||||
|
@ -280,7 +280,7 @@ public:
|
|||||||
return RegexStringView { Utf32View { data.data(), data.size() } };
|
return RegexStringView { Utf32View { data.data(), data.size() } };
|
||||||
},
|
},
|
||||||
[&](Utf16View) {
|
[&](Utf16View) {
|
||||||
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() });
|
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
|
||||||
return RegexStringView { Utf16View { optional_utf16_storage } };
|
return RegexStringView { Utf16View { optional_utf16_storage } };
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -34,8 +34,8 @@ DeprecatedString strip_and_collapse_whitespace(StringView string)
|
|||||||
// https://infra.spec.whatwg.org/#code-unit-prefix
|
// https://infra.spec.whatwg.org/#code-unit-prefix
|
||||||
bool is_code_unit_prefix(StringView potential_prefix, StringView input)
|
bool is_code_unit_prefix(StringView potential_prefix, StringView input)
|
||||||
{
|
{
|
||||||
auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix);
|
auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
|
||||||
auto input_utf16 = utf8_to_utf16(input);
|
auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();
|
||||||
|
|
||||||
// 1. Let i be 0.
|
// 1. Let i be 0.
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
|
#include <LibJS/Runtime/Completion.h>
|
||||||
|
#include <LibJS/Runtime/Utf16String.h>
|
||||||
#include <LibWeb/DOM/Document.h>
|
#include <LibWeb/DOM/Document.h>
|
||||||
#include <LibWeb/SVG/SVGTextContentElement.h>
|
#include <LibWeb/SVG/SVGTextContentElement.h>
|
||||||
|
|
||||||
@ -17,9 +19,10 @@ SVGTextContentElement::SVGTextContentElement(DOM::Document& document, DOM::Quali
|
|||||||
}
|
}
|
||||||
|
|
||||||
// https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
|
// https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
|
||||||
int SVGTextContentElement::get_number_of_chars() const
|
WebIDL::ExceptionOr<int> SVGTextContentElement::get_number_of_chars() const
|
||||||
{
|
{
|
||||||
return AK::utf8_to_utf16(child_text_content()).size();
|
auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(child_text_content()));
|
||||||
|
return static_cast<int>(chars.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <LibWeb/SVG/SVGGraphicsElement.h>
|
#include <LibWeb/SVG/SVGGraphicsElement.h>
|
||||||
|
#include <LibWeb/WebIDL/ExceptionOr.h>
|
||||||
|
|
||||||
namespace Web::SVG {
|
namespace Web::SVG {
|
||||||
|
|
||||||
@ -15,7 +16,7 @@ class SVGTextContentElement : public SVGGraphicsElement {
|
|||||||
WEB_PLATFORM_OBJECT(SVGTextContentElement, SVGGraphicsElement);
|
WEB_PLATFORM_OBJECT(SVGTextContentElement, SVGGraphicsElement);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int get_number_of_chars() const;
|
WebIDL::ExceptionOr<int> get_number_of_chars() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
SVGTextContentElement(DOM::Document&, DOM::QualifiedName);
|
SVGTextContentElement(DOM::Document&, DOM::QualifiedName);
|
||||||
|
Loading…
Reference in New Issue
Block a user