LibJS: Replace Vector<u16> usage in PrimitiveString wth Utf16String

This commit does not go out of its way to reduce copying of the string
data yet, but is a minimum set of changes to compile LibJS after making
PrimitiveString hold a Utf16String.
This commit is contained in:
Timothy Flynn 2021-08-09 09:06:45 -04:00 committed by Andreas Kling
parent 02e7dceb96
commit c1e99fca1a
Notes: sideshowbarker 2024-07-18 07:08:37 +09:00
11 changed files with 80 additions and 87 deletions

View File

@ -29,6 +29,7 @@
#include <LibJS/Runtime/PropertyName.h>
#include <LibJS/Runtime/ProxyObject.h>
#include <LibJS/Runtime/Reference.h>
#include <LibJS/Runtime/Utf16String.h>
namespace JS {
@ -576,7 +577,7 @@ String get_substitution(GlobalObject& global_object, Utf16View const& matched, U
auto replace_string = replacement.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View replace_view { replace_string };
auto replace_view = replace_string.view();
StringBuilder result;

View File

@ -17,7 +17,7 @@ PrimitiveString::PrimitiveString(String string)
{
}
PrimitiveString::PrimitiveString(Vector<u16> string)
PrimitiveString::PrimitiveString(Utf16String string)
: m_utf16_string(move(string))
, m_has_utf16_string(true)
{
@ -30,16 +30,16 @@ PrimitiveString::~PrimitiveString()
String const& PrimitiveString::string() const
{
if (!m_has_utf8_string) {
m_utf8_string = utf16_string_view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
m_utf8_string = m_utf16_string.to_utf8();
m_has_utf8_string = true;
}
return m_utf8_string;
}
Vector<u16> const& PrimitiveString::utf16_string() const
Utf16String const& PrimitiveString::utf16_string() const
{
if (!m_has_utf16_string) {
m_utf16_string = AK::utf8_to_utf16(m_utf8_string);
m_utf16_string = Utf16String(m_utf8_string);
m_has_utf16_string = true;
}
return m_utf16_string;
@ -47,24 +47,12 @@ Vector<u16> const& PrimitiveString::utf16_string() const
Utf16View PrimitiveString::utf16_string_view() const
{
return Utf16View { utf16_string() };
return utf16_string().view();
}
PrimitiveString* js_string(Heap& heap, Utf16View const& view)
{
if (view.is_empty())
return &heap.vm().empty_string();
if (view.length_in_code_units() == 1) {
u16 code_unit = view.code_unit_at(0);
if (is_ascii(code_unit))
return &heap.vm().single_ascii_character_string(static_cast<u8>(code_unit));
}
Vector<u16> string;
string.ensure_capacity(view.length_in_code_units());
string.append(view.data(), view.length_in_code_units());
return js_string(heap, move(string));
return js_string(heap, Utf16String(view));
}
PrimitiveString* js_string(VM& vm, Utf16View const& view)
@ -72,13 +60,13 @@ PrimitiveString* js_string(VM& vm, Utf16View const& view)
return js_string(vm.heap(), view);
}
PrimitiveString* js_string(Heap& heap, Vector<u16> string)
PrimitiveString* js_string(Heap& heap, Utf16String string)
{
if (string.is_empty())
return &heap.vm().empty_string();
if (string.size() == 1) {
u16 code_unit = string.at(0);
if (string.length_in_code_units() == 1) {
u16 code_unit = string.code_unit_at(0);
if (is_ascii(code_unit))
return &heap.vm().single_ascii_character_string(static_cast<u8>(code_unit));
}
@ -86,7 +74,7 @@ PrimitiveString* js_string(Heap& heap, Vector<u16> string)
return heap.allocate_without_global_object<PrimitiveString>(move(string));
}
PrimitiveString* js_string(VM& vm, Vector<u16> string)
PrimitiveString* js_string(VM& vm, Utf16String string)
{
return js_string(vm.heap(), move(string));
}

View File

@ -7,15 +7,15 @@
#pragma once
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibJS/Heap/Cell.h>
#include <LibJS/Runtime/Utf16String.h>
namespace JS {
class PrimitiveString final : public Cell {
public:
explicit PrimitiveString(String);
explicit PrimitiveString(Vector<u16>);
explicit PrimitiveString(Utf16String);
virtual ~PrimitiveString();
PrimitiveString(PrimitiveString const&) = delete;
@ -23,7 +23,7 @@ public:
String const& string() const;
Vector<u16> const& utf16_string() const;
Utf16String const& utf16_string() const;
Utf16View utf16_string_view() const;
private:
@ -32,15 +32,15 @@ private:
mutable String m_utf8_string;
mutable bool m_has_utf8_string { false };
mutable Vector<u16> m_utf16_string;
mutable Utf16String m_utf16_string;
mutable bool m_has_utf16_string { false };
};
PrimitiveString* js_string(Heap&, Utf16View const&);
PrimitiveString* js_string(VM&, Utf16View const&);
PrimitiveString* js_string(Heap&, Vector<u16>);
PrimitiveString* js_string(VM&, Vector<u16>);
PrimitiveString* js_string(Heap&, Utf16String);
PrimitiveString* js_string(VM&, Utf16String);
PrimitiveString* js_string(Heap&, String);
PrimitiveString* js_string(VM&, String);

View File

@ -18,6 +18,7 @@
#include <LibJS/Runtime/RegExpPrototype.h>
#include <LibJS/Runtime/RegExpStringIterator.h>
#include <LibJS/Runtime/StringPrototype.h>
#include <LibJS/Runtime/Utf16String.h>
#include <LibJS/Token.h>
namespace JS {
@ -414,7 +415,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::exec)
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View string_view { string };
auto string_view = string.view();
return regexp_builtin_exec(global_object, *regexp_object, string_view);
}
@ -429,7 +430,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::test)
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View string_view { string };
auto string_view = string.view();
auto match = regexp_exec(global_object, *regexp_object, string_view);
if (vm.exception())
@ -472,7 +473,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View string_view { string };
auto string_view = string.view();
auto global_value = regexp_object->get(vm.names.global);
if (vm.exception())
@ -597,7 +598,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_replace)
auto string = string_value.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View string_view { string };
auto string_view = string.view();
if (!replace_value.is_function()) {
auto replace_string = replace_value.to_string(global_object);
@ -672,7 +673,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_replace)
auto matched = matched_value.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View matched_view { matched };
auto matched_length = matched.length_in_code_units();
auto position_value = result.get(vm.names.index);
if (vm.exception())
@ -711,7 +712,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_replace)
if (replace_value.is_function()) {
MarkedValueList replacer_args(vm.heap());
replacer_args.append(js_string(vm, matched_view));
replacer_args.append(js_string(vm, move(matched)));
replacer_args.extend(move(captures));
replacer_args.append(Value(position));
replacer_args.append(js_string(vm, string_view));
@ -734,7 +735,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_replace)
return {};
}
replacement = get_substitution(global_object, matched_view, string_view, position, captures, named_captures_object, replace_value);
replacement = get_substitution(global_object, matched.view(), string_view, position, captures, named_captures_object, replace_value);
if (vm.exception())
return {};
}
@ -748,7 +749,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_replace)
builder.append(replacement);
accumulated_result = builder.build();
next_source_position = position + matched_view.length_in_code_units();
next_source_position = position + matched_length;
}
}
@ -774,7 +775,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_search)
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View string_view { string };
auto string_view = string.view();
auto previous_last_index = regexp_object->get(vm.names.lastIndex);
if (vm.exception())
@ -822,7 +823,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_split)
auto string = vm.argument(0).to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View string_view { string };
auto string_view = string.view();
auto* constructor = species_constructor(global_object, *regexp_object, *global_object.regexp_constructor());
if (vm.exception())

View File

@ -10,12 +10,12 @@
namespace JS {
// 22.2.7.1 CreateRegExpStringIterator ( R, S, global, fullUnicode ), https://tc39.es/ecma262/#sec-createregexpstringiterator
RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, Utf16String string, bool global, bool unicode)
{
return global_object.heap().allocate<RegExpStringIterator>(global_object, *global_object.regexp_string_iterator_prototype(), regexp_object, move(string), global, unicode);
}
RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode)
RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, Utf16String string, bool global, bool unicode)
: Object(prototype)
, m_regexp_object(regexp_object)
, m_string(move(string))

View File

@ -8,6 +8,7 @@
#include <AK/Utf16View.h>
#include <LibJS/Runtime/Object.h>
#include <LibJS/Runtime/Utf16String.h>
namespace JS {
@ -15,13 +16,13 @@ class RegExpStringIterator final : public Object {
JS_OBJECT(RegExpStringIterator, Object);
public:
static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, Utf16String string, bool global, bool unicode);
explicit RegExpStringIterator(Object& prototype, Object& regexp_object, Vector<u16> string, bool global, bool unicode);
explicit RegExpStringIterator(Object& prototype, Object& regexp_object, Utf16String string, bool global, bool unicode);
virtual ~RegExpStringIterator() override = default;
Object& regexp_object() { return m_regexp_object; }
Utf16View string() const { return Utf16View { m_string }; }
Utf16View string() const { return m_string.view(); }
bool global() const { return m_global; }
bool unicode() const { return m_unicode; }
@ -32,7 +33,7 @@ private:
virtual void visit_edges(Cell::Visitor&) override;
Object& m_regexp_object;
Vector<u16> m_string;
Utf16String m_string;
bool m_global { false };
bool m_unicode { false };
bool m_done { false };

View File

@ -9,6 +9,7 @@
#include <LibJS/Runtime/RegExpPrototype.h>
#include <LibJS/Runtime/RegExpStringIterator.h>
#include <LibJS/Runtime/RegExpStringIteratorPrototype.h>
#include <LibJS/Runtime/Utf16String.h>
namespace JS {

View File

@ -13,6 +13,7 @@
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/StringConstructor.h>
#include <LibJS/Runtime/StringObject.h>
#include <LibJS/Runtime/Utf16String.h>
namespace JS {
@ -135,7 +136,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_char_code)
string.append(code_unit);
}
return js_string(vm, move(string));
return js_string(vm, Utf16String(move(string)));
}
// 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint
@ -161,7 +162,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
AK::code_point_to_utf16(string, static_cast<u32>(code_point));
}
return js_string(vm, move(string));
return js_string(vm, Utf16String(move(string)));
}
}

View File

@ -19,6 +19,7 @@
#include <LibJS/Runtime/StringIterator.h>
#include <LibJS/Runtime/StringObject.h>
#include <LibJS/Runtime/StringPrototype.h>
#include <LibJS/Runtime/Utf16String.h>
#include <LibJS/Runtime/Value.h>
#include <LibUnicode/CharacterTypes.h>
#include <string.h>
@ -33,7 +34,7 @@ static Optional<String> ak_string_from(VM& vm, GlobalObject& global_object)
return this_value.to_string(global_object);
}
static Vector<u16> utf16_string_from(VM& vm, GlobalObject& global_object)
static Utf16String utf16_string_from(VM& vm, GlobalObject& global_object)
{
auto this_value = require_object_coercible(global_object, vm.this_value(global_object));
if (vm.exception())
@ -186,7 +187,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::char_at)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
if (position < 0 || position >= utf16_string_view.length_in_code_units())
return js_string(vm, String::empty());
@ -203,7 +204,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::char_code_at)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
if (position < 0 || position >= utf16_string_view.length_in_code_units())
return js_nan();
@ -220,7 +221,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::code_point_at)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
if (position < 0 || position >= utf16_string_view.length_in_code_units())
return js_undefined();
@ -283,10 +284,10 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::starts_with)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto string_length = utf16_string_view.length_in_code_units();
Utf16View utf16_search_view { search_string };
auto utf16_search_view = search_string.view();
auto search_length = utf16_search_view.length_in_code_units();
size_t start = 0;
@ -329,10 +330,10 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::ends_with)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto string_length = utf16_string_view.length_in_code_units();
Utf16View utf16_search_view { search_string };
auto utf16_search_view = search_string.view();
auto search_length = utf16_search_view.length_in_code_units();
size_t end = string_length;
@ -365,8 +366,8 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::index_of)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
Utf16View utf16_search_view { search_string };
auto utf16_string_view = string.view();
auto utf16_search_view = search_string.view();
size_t start = 0;
if (vm.argument_count() > 1) {
@ -448,15 +449,14 @@ static Value pad_string(GlobalObject& global_object, String const& string, PadPl
{
auto& vm = global_object.vm();
auto utf16_string = AK::utf8_to_utf16(string);
Utf16View utf16_string_view { utf16_string };
auto string_length = utf16_string_view.length_in_code_units();
Utf16String utf16_string(string);
auto string_length = utf16_string.length_in_code_units();
auto max_length = vm.argument(0).to_length(global_object);
if (vm.exception())
return {};
if (max_length <= string_length)
return js_string(vm, utf16_string_view);
return js_string(vm, move(utf16_string));
String fill_string = " ";
if (!vm.argument(1).is_undefined()) {
@ -464,11 +464,11 @@ static Value pad_string(GlobalObject& global_object, String const& string, PadPl
if (vm.exception())
return {};
if (fill_string.is_empty())
return js_string(vm, utf16_string_view);
return js_string(vm, move(utf16_string));
}
auto utf16_fill_string = AK::utf8_to_utf16(fill_string);
Utf16View utf16_fill_view { utf16_fill_string };
Utf16String utf16_fill_string(fill_string);
auto utf16_fill_view = utf16_fill_string.view();
auto fill_code_units = utf16_fill_view.length_in_code_units();
auto fill_length = max_length - string_length;
@ -558,7 +558,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::substring)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto string_length = static_cast<double>(utf16_string_view.length_in_code_units());
auto start = vm.argument(0).to_integer_or_infinity(global_object);
@ -588,7 +588,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::substr)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto size = utf16_string_view.length_in_code_units();
auto int_start = vm.argument(0).to_integer_or_infinity(global_object);
@ -637,8 +637,8 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::includes)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
Utf16View utf16_search_view { search_string };
auto utf16_string_view = string.view();
auto utf16_search_view = search_string.view();
size_t start = 0;
if (!vm.argument(1).is_undefined()) {
@ -659,7 +659,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::slice)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto string_length = static_cast<double>(utf16_string_view.length_in_code_units());
auto int_start = vm.argument(0).to_integer_or_infinity(global_object);
@ -730,10 +730,10 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::split)
if (limit == 0)
return array;
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto string_length = utf16_string_view.length_in_code_units();
Utf16View utf16_separator_view { separator };
auto utf16_separator_view = separator.view();
auto separator_length = utf16_separator_view.length_in_code_units();
if (separator_argument.is_undefined()) {
@ -782,10 +782,10 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::last_index_of)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto string_length = utf16_string_view.length_in_code_units();
Utf16View utf16_search_view { search_string };
auto utf16_search_view = search_string.view();
auto search_length = utf16_search_view.length_in_code_units();
auto position = vm.argument(1).to_number(global_object);
@ -822,7 +822,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::at)
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto length = utf16_string_view.length_in_code_units();
auto relative_index = vm.argument(0).to_integer_or_infinity(global_object);
@ -873,7 +873,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match)
auto string = this_object.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto rx = regexp_create(global_object, regexp, js_undefined());
if (!rx)
@ -916,7 +916,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match_all)
auto string = this_object.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto rx = regexp_create(global_object, regexp, js_string(vm, "g"));
if (!rx)
@ -951,13 +951,13 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace)
auto replace_string = replace_value.to_utf16_string(global_object);
if (vm.exception())
return {};
replace_value = js_string(vm, Utf16View { replace_string });
replace_value = js_string(vm, move(replace_string));
if (vm.exception())
return {};
}
Utf16View utf16_string_view { string };
Utf16View utf16_search_view { search_string };
auto utf16_string_view = string.view();
auto utf16_search_view = search_string.view();
Optional<size_t> position = string_index_of(utf16_string_view, utf16_search_view, 0);
if (!position.has_value())
@ -1042,15 +1042,15 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace_all)
auto replace_string = replace_value.to_utf16_string(global_object);
if (vm.exception())
return {};
replace_value = js_string(vm, Utf16View { replace_string });
replace_value = js_string(vm, move(replace_string));
if (vm.exception())
return {};
}
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto string_length = utf16_string_view.length_in_code_units();
Utf16View utf16_search_view { search_string };
auto utf16_search_view = search_string.view();
auto search_length = utf16_search_view.length_in_code_units();
Vector<size_t> match_positions;
@ -1114,7 +1114,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::search)
auto string = this_object.to_utf16_string(global_object);
if (vm.exception())
return {};
Utf16View utf16_string_view { string };
auto utf16_string_view = string.view();
auto rx = regexp_create(global_object, regexp, js_undefined());
if (!rx)

View File

@ -8,7 +8,6 @@
#include <AK/AllOf.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/Utf16View.h>
#include <AK/Utf8View.h>
#include <LibCrypto/BigInt/SignedBigInteger.h>
#include <LibCrypto/NumberTheory/ModularFunctions.h>
@ -363,7 +362,7 @@ String Value::to_string(GlobalObject& global_object, bool legacy_null_to_empty_s
}
}
Vector<u16> Value::to_utf16_string(GlobalObject& global_object) const
Utf16String Value::to_utf16_string(GlobalObject& global_object) const
{
if (m_type == Type::String)
return m_value.as_string->utf16_string();
@ -372,7 +371,7 @@ Vector<u16> Value::to_utf16_string(GlobalObject& global_object) const
if (global_object.vm().exception())
return {};
return AK::utf8_to_utf16(utf8_string);
return Utf16String(utf8_string);
}
// 7.1.2 ToBoolean ( argument ), https://tc39.es/ecma262/#sec-toboolean

View File

@ -18,6 +18,7 @@
#include <LibJS/Forward.h>
#include <LibJS/Runtime/BigInt.h>
#include <LibJS/Runtime/PrimitiveString.h>
#include <LibJS/Runtime/Utf16String.h>
#include <math.h>
// 2 ** 53 - 1
@ -246,7 +247,7 @@ public:
u64 encoded() const { return m_value.encoded; }
String to_string(GlobalObject&, bool legacy_null_to_empty_string = false) const;
Vector<u16> to_utf16_string(GlobalObject&) const;
Utf16String to_utf16_string(GlobalObject&) const;
PrimitiveString* to_primitive_string(GlobalObject&);
Value to_primitive(GlobalObject&, PreferredType preferred_type = PreferredType::Default) const;
Object* to_object(GlobalObject&) const;