LibJS: Start implementing Intl.Segmenter

This commit is contained in:
Idan Horowitz 2022-01-29 23:47:29 +02:00 committed by Linus Groh
parent 88c5992e0b
commit a3bc06bb23
Notes: sideshowbarker 2024-07-17 19:58:51 +09:00
13 changed files with 334 additions and 9 deletions

View File

@ -115,6 +115,9 @@ set(SOURCES
Runtime/Intl/RelativeTimeFormat.cpp
Runtime/Intl/RelativeTimeFormatConstructor.cpp
Runtime/Intl/RelativeTimeFormatPrototype.cpp
Runtime/Intl/Segmenter.cpp
Runtime/Intl/SegmenterConstructor.cpp
Runtime/Intl/SegmenterPrototype.cpp
Runtime/IteratorOperations.cpp
Runtime/IteratorPrototype.cpp
Runtime/JSONObject.cpp

View File

@ -67,15 +67,16 @@
__JS_ENUMERATE(Float32Array, float32_array, Float32ArrayPrototype, Float32ArrayConstructor, float) \
__JS_ENUMERATE(Float64Array, float64_array, Float64ArrayPrototype, Float64ArrayConstructor, double)
#define JS_ENUMERATE_INTL_OBJECTS \
__JS_ENUMERATE(Collator, collator, CollatorPrototype, CollatorConstructor) \
__JS_ENUMERATE(DateTimeFormat, date_time_format, DateTimeFormatPrototype, DateTimeFormatConstructor) \
__JS_ENUMERATE(DisplayNames, display_names, DisplayNamesPrototype, DisplayNamesConstructor) \
__JS_ENUMERATE(ListFormat, list_format, ListFormatPrototype, ListFormatConstructor) \
__JS_ENUMERATE(Locale, locale, LocalePrototype, LocaleConstructor) \
__JS_ENUMERATE(NumberFormat, number_format, NumberFormatPrototype, NumberFormatConstructor) \
__JS_ENUMERATE(PluralRules, plural_rules, PluralRulesPrototype, PluralRulesConstructor) \
__JS_ENUMERATE(RelativeTimeFormat, relative_time_format, RelativeTimeFormatPrototype, RelativeTimeFormatConstructor)
#define JS_ENUMERATE_INTL_OBJECTS \
__JS_ENUMERATE(Collator, collator, CollatorPrototype, CollatorConstructor) \
__JS_ENUMERATE(DateTimeFormat, date_time_format, DateTimeFormatPrototype, DateTimeFormatConstructor) \
__JS_ENUMERATE(DisplayNames, display_names, DisplayNamesPrototype, DisplayNamesConstructor) \
__JS_ENUMERATE(ListFormat, list_format, ListFormatPrototype, ListFormatConstructor) \
__JS_ENUMERATE(Locale, locale, LocalePrototype, LocaleConstructor) \
__JS_ENUMERATE(NumberFormat, number_format, NumberFormatPrototype, NumberFormatConstructor) \
__JS_ENUMERATE(PluralRules, plural_rules, PluralRulesPrototype, PluralRulesConstructor) \
__JS_ENUMERATE(RelativeTimeFormat, relative_time_format, RelativeTimeFormatPrototype, RelativeTimeFormatConstructor) \
__JS_ENUMERATE(Segmenter, segmenter, SegmenterPrototype, SegmenterConstructor)
#define JS_ENUMERATE_TEMPORAL_OBJECTS \
__JS_ENUMERATE(Calendar, calendar, CalendarPrototype, CalendarConstructor) \

View File

@ -240,6 +240,7 @@ namespace JS {
P(getYear) \
P(global) \
P(globalThis) \
P(granularity) \
P(group) \
P(groupBy) \
P(groupByToMap) \

View File

@ -67,6 +67,8 @@
#include <LibJS/Runtime/Intl/PluralRulesPrototype.h>
#include <LibJS/Runtime/Intl/RelativeTimeFormatConstructor.h>
#include <LibJS/Runtime/Intl/RelativeTimeFormatPrototype.h>
#include <LibJS/Runtime/Intl/SegmenterConstructor.h>
#include <LibJS/Runtime/Intl/SegmenterPrototype.h>
#include <LibJS/Runtime/IteratorPrototype.h>
#include <LibJS/Runtime/JSONObject.h>
#include <LibJS/Runtime/MapConstructor.h>

View File

@ -16,6 +16,7 @@
#include <LibJS/Runtime/Intl/NumberFormatConstructor.h>
#include <LibJS/Runtime/Intl/PluralRulesConstructor.h>
#include <LibJS/Runtime/Intl/RelativeTimeFormatConstructor.h>
#include <LibJS/Runtime/Intl/SegmenterConstructor.h>
namespace JS::Intl {
@ -43,6 +44,7 @@ void Intl::initialize(GlobalObject& global_object)
define_direct_property(vm.names.NumberFormat, global_object.intl_number_format_constructor(), attr);
define_direct_property(vm.names.PluralRules, global_object.intl_plural_rules_constructor(), attr);
define_direct_property(vm.names.RelativeTimeFormat, global_object.intl_relative_time_format_constructor(), attr);
define_direct_property(vm.names.Segmenter, global_object.intl_segmenter_constructor(), attr);
define_native_function(vm.names.getCanonicalLocales, get_canonical_locales, 1, attr);
}

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibJS/Runtime/Intl/Segmenter.h>
namespace JS::Intl {
// 18 Segmenter Objects, https://tc39.es/ecma402/#segmenter-objects
Segmenter::Segmenter(Object& prototype)
: Object(prototype)
{
}
void Segmenter::set_segmenter_granularity(StringView segmenter_granularity)
{
if (segmenter_granularity == "grapheme"sv)
m_segmenter_granularity = SegmenterGranularity::Grapheme;
else if (segmenter_granularity == "word"sv)
m_segmenter_granularity = SegmenterGranularity::Word;
else if (segmenter_granularity == "sentence"sv)
m_segmenter_granularity = SegmenterGranularity::Sentence;
else
VERIFY_NOT_REACHED();
}
StringView Segmenter::segmenter_granularity_string() const
{
switch (m_segmenter_granularity) {
case SegmenterGranularity::Grapheme:
return "grapheme"sv;
case SegmenterGranularity::Word:
return "word"sv;
case SegmenterGranularity::Sentence:
return "sentence"sv;
default:
VERIFY_NOT_REACHED();
}
}
}

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
#include <LibJS/Runtime/Object.h>
namespace JS::Intl {
class Segmenter final : public Object {
JS_OBJECT(Segmenter, Object);
public:
enum class SegmenterGranularity {
Grapheme,
Word,
Sentence,
};
explicit Segmenter(Object& prototype);
virtual ~Segmenter() override = default;
String const& locale() const { return m_locale; }
void set_locale(String locale) { m_locale = move(locale); }
SegmenterGranularity segmenter_granularity() const { return m_segmenter_granularity; }
void set_segmenter_granularity(StringView);
StringView segmenter_granularity_string() const;
private:
String m_locale; // [[Locale]]
SegmenterGranularity m_segmenter_granularity { SegmenterGranularity::Grapheme }; // [[SegmenterGranularity]]
};
}

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibJS/Runtime/AbstractOperations.h>
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/Intl/AbstractOperations.h>
#include <LibJS/Runtime/Intl/Segmenter.h>
#include <LibJS/Runtime/Intl/SegmenterConstructor.h>
#include <LibJS/Runtime/Temporal/AbstractOperations.h>
namespace JS::Intl {
// 18.1 The Intl.Segmenter Constructor, https://tc39.es/ecma402/#sec-intl-segmenter-constructor
SegmenterConstructor::SegmenterConstructor(GlobalObject& global_object)
: NativeFunction(vm().names.Segmenter.as_string(), *global_object.function_prototype())
{
}
void SegmenterConstructor::initialize(GlobalObject& global_object)
{
NativeFunction::initialize(global_object);
auto& vm = this->vm();
// 18.2.1 Intl.Segmenter.prototype, https://tc39.es/ecma402/#sec-intl.segmenter.prototype
define_direct_property(vm.names.prototype, global_object.intl_segmenter_prototype(), 0);
define_direct_property(vm.names.length, Value(0), Attribute::Configurable);
}
// 18.1.1 Intl.Segmenter ( [ locales [ , options ] ] ), https://tc39.es/ecma402/#sec-intl.segmenter
ThrowCompletionOr<Value> SegmenterConstructor::call()
{
// 1. If NewTarget is undefined, throw a TypeError exception.
return vm().throw_completion<TypeError>(global_object(), ErrorType::ConstructorWithoutNew, "Intl.Segmenter");
}
// 18.1.1 Intl.Segmenter ( [ locales [ , options ] ] ), https://tc39.es/ecma402/#sec-intl.segmenter
ThrowCompletionOr<Object*> SegmenterConstructor::construct(FunctionObject& new_target)
{
auto& vm = this->vm();
auto& global_object = this->global_object();
auto locales = vm.argument(0);
auto options_value = vm.argument(1);
// 2. Let internalSlotsList be « [[InitializedSegmenter]], [[Locale]], [[SegmenterGranularity]] ».
// 3. Let segmenter be ? OrdinaryCreateFromConstructor(NewTarget, "%Segmenter.prototype%", internalSlotsList).
auto* segmenter = TRY(ordinary_create_from_constructor<Segmenter>(global_object, new_target, &GlobalObject::intl_segmenter_prototype));
// 4. Let requestedLocales be ? CanonicalizeLocaleList(locales).
auto requested_locales = TRY(canonicalize_locale_list(global_object, locales));
// 5. Set options to ? GetOptionsObject(options).
auto* options = TRY(Temporal::get_options_object(global_object, options_value));
// 6. Let opt be a new Record.
LocaleOptions opt {};
// 7. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
auto matcher = TRY(get_option(global_object, *options, vm.names.localeMatcher, Value::Type::String, { "lookup"sv, "best fit"sv }, "best fit"sv));
// 8. Set opt.[[localeMatcher]] to matcher.
opt.locale_matcher = matcher;
// 9. Let localeData be %Segmenter%.[[LocaleData]].
// 10. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]], requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]], localeData).
auto result = resolve_locale(requested_locales, opt, {});
// 11. Set segmenter.[[Locale]] to r.[[locale]].
segmenter->set_locale(move(result.locale));
// 12. Let granularity be ? GetOption(options, "granularity", "string", « "grapheme", "word", "sentence" », "grapheme").
auto granularity = TRY(get_option(global_object, *options, vm.names.granularity, Value::Type::String, { "grapheme"sv, "word"sv, "sentence"sv }, "grapheme"sv));
// 13. Set segmenter.[[SegmenterGranularity]] to granularity.
segmenter->set_segmenter_granularity(granularity.as_string().string());
// 14. Return segmenter.
return segmenter;
}
}

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibJS/Runtime/NativeFunction.h>
namespace JS::Intl {
class SegmenterConstructor final : public NativeFunction {
JS_OBJECT(SegmenterConstructor, NativeFunction);
public:
explicit SegmenterConstructor(GlobalObject&);
virtual void initialize(GlobalObject&) override;
virtual ~SegmenterConstructor() override = default;
virtual ThrowCompletionOr<Value> call() override;
virtual ThrowCompletionOr<Object*> construct(FunctionObject& new_target) override;
private:
virtual bool has_constructor() const override { return true; }
};
}

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/Intl/Segmenter.h>
#include <LibJS/Runtime/Intl/SegmenterPrototype.h>
namespace JS::Intl {
// 18.3 Properties of the Intl.Segmenter Prototype Object, https://tc39.es/ecma402/#sec-properties-of-intl-segmenter-prototype-object
SegmenterPrototype::SegmenterPrototype(GlobalObject& global_object)
: PrototypeObject(*global_object.object_prototype())
{
}
void SegmenterPrototype::initialize(GlobalObject& global_object)
{
Object::initialize(global_object);
auto& vm = this->vm();
// 18.3.2 Intl.Segmenter.prototype [ @@toStringTag ], https://tc39.es/ecma402/#sec-intl.segmenter.prototype-@@tostringtag
define_direct_property(*vm.well_known_symbol_to_string_tag(), js_string(vm, "Intl.Segmenter"), Attribute::Configurable);
}
}

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibJS/Runtime/Intl/Segmenter.h>
#include <LibJS/Runtime/PrototypeObject.h>
namespace JS::Intl {
class SegmenterPrototype final : public PrototypeObject<SegmenterPrototype, Segmenter> {
JS_PROTOTYPE_OBJECT(SegmenterPrototype, Segmenter, Segmenter);
public:
explicit SegmenterPrototype(GlobalObject&);
virtual void initialize(GlobalObject&) override;
virtual ~SegmenterPrototype() override = default;
};
}

View File

@ -0,0 +1,3 @@
test("basic functionality", () => {
expect(Intl.Segmenter.prototype[Symbol.toStringTag]).toBe("Intl.Segmenter");
});

View File

@ -0,0 +1,65 @@
describe("errors", () => {
test("called without new", () => {
expect(() => {
Intl.Segmenter();
}).toThrowWithMessage(TypeError, "Intl.Segmenter constructor must be called with 'new'");
});
test("structurally invalid tag", () => {
expect(() => {
new Intl.Segmenter("root");
}).toThrowWithMessage(RangeError, "root is not a structurally valid language tag");
expect(() => {
new Intl.Segmenter("en-");
}).toThrowWithMessage(RangeError, "en- is not a structurally valid language tag");
expect(() => {
new Intl.Segmenter("Latn");
}).toThrowWithMessage(RangeError, "Latn is not a structurally valid language tag");
expect(() => {
new Intl.Segmenter("en-u-aa-U-aa");
}).toThrowWithMessage(RangeError, "en-u-aa-U-aa is not a structurally valid language tag");
});
test("options is an invalid type", () => {
expect(() => {
new Intl.Segmenter("en", null);
}).toThrowWithMessage(TypeError, "Options is not an object");
});
test("localeMatcher option is invalid", () => {
expect(() => {
new Intl.Segmenter("en", { localeMatcher: "hello!" });
}).toThrowWithMessage(RangeError, "hello! is not a valid value for option localeMatcher");
});
test("granularity option is invalid", () => {
expect(() => {
new Intl.Segmenter("en", { granularity: "hello!" });
}).toThrowWithMessage(RangeError, "hello! is not a valid value for option granularity");
});
});
describe("normal behavior", () => {
test("length is 0", () => {
expect(Intl.Segmenter).toHaveLength(0);
});
test("all valid localeMatcher options", () => {
["lookup", "best fit"].forEach(localeMatcher => {
expect(() => {
new Intl.Segmenter("en", { localeMatcher: localeMatcher });
}).not.toThrow();
});
});
test("all valid granularity options", () => {
["grapheme", "word", "sentence"].forEach(granularity => {
expect(() => {
new Intl.Segmenter("en", { granularity: granularity });
}).not.toThrow();
});
});
});