From c85ab623c055aef73185400a5ba9006c8f8a87d5 Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Sun, 18 Jul 2021 05:15:10 +0430 Subject: [PATCH] LibJS: Use a Utf8View on the subject if the regex has the unicode flag This makes LibRegex behave (more) correctly with regards to matching unicode code points. --- Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp index 5881153ce98..c3c18e9251f 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp @@ -240,7 +240,12 @@ static Value regexp_builtin_exec(GlobalObject& global_object, RegExpObject& rege } regex.start_offset = last_index; - result = regex.match(string); + // FIXME: JavaScript strings are UTF-16, update this if the backing storage + // encoding changes for spec compliance reasons. + if (unicode) + result = regex.match(Utf8View { string }); + else + result = regex.match(string); if (result.success) break;