mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-08 20:32:56 +03:00
LibRegex: Fix ECMA-262 parsing of invalid identity escapes
* Only alphabetic (A-Z, a-z) characters may be escaped with \c. The loop currently parsing \c includes code points between the upper/lower case groups. * In Unicode mode, all invalid identity escapes should cause a parser error, even in browser-extended mode. * Avoid an infinite loop when parsing the pattern "\c" on its own.
This commit is contained in:
parent
51b3fb5532
commit
e887314472
Notes:
sideshowbarker
2024-07-18 05:40:56 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/e887314472b Pull-request: https://github.com/SerenityOS/serenity/pull/9367 Reviewed-by: https://github.com/linusg ✅
@ -525,6 +525,18 @@ TEST_CASE(ECMA262_parse)
|
||||
{ "\\\\p{1}", regex::Error::NoError, ECMAScriptFlags::Unicode },
|
||||
{ "\\\\p{AsCiI}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "\\\\p{ASCII}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "\\c", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\c", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "[\\c]", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\c]", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "\\c`", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\c`", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "[\\c`]", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\c`]", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "\\A", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\A", regex::Error::InvalidCharacterClass, ECMAScriptFlags::Unicode },
|
||||
{ "[\\A]", regex::Error::NoError, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\A]", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
};
|
||||
|
||||
for (auto& test : tests) {
|
||||
@ -579,6 +591,7 @@ TEST_CASE(ECMA262_match)
|
||||
{ "\\05", "\5", true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\455", "\45""5", true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\314", "\314", true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\c", "\\c", true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\cf", "\06", true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "\\c1", "\\c1", true, ECMAScriptFlags::BrowserExtended },
|
||||
{ "[\\c1]", "\x11", true, ECMAScriptFlags::BrowserExtended },
|
||||
|
@ -17,6 +17,7 @@
|
||||
namespace regex {
|
||||
|
||||
static constexpr size_t s_maximum_repetition_count = 1024 * 1024;
|
||||
static constexpr auto s_alphabetic_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"sv;
|
||||
|
||||
ALWAYS_INLINE bool Parser::set_error(Error error)
|
||||
{
|
||||
@ -1400,7 +1401,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
||||
|
||||
// CharacterEscape > ControlLetter
|
||||
if (try_skip("c")) {
|
||||
for (auto c = 'A'; c <= 'z'; ++c) {
|
||||
for (auto c : s_alphabetic_characters) {
|
||||
if (try_skip({ &c, 1 })) {
|
||||
match_length_minimum += 1;
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)(c % 32) } });
|
||||
@ -1408,18 +1409,18 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
||||
}
|
||||
}
|
||||
|
||||
if (m_should_use_browser_extended_grammar) {
|
||||
back(2);
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'\\' } });
|
||||
match_length_minimum += 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unicode) {
|
||||
set_error(Error::InvalidPattern);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_should_use_browser_extended_grammar) {
|
||||
back(1 + !done());
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'\\' } });
|
||||
match_length_minimum += 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allow '\c' in non-unicode mode, just matches 'c'.
|
||||
match_length_minimum += 1;
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'c' } });
|
||||
@ -1768,10 +1769,17 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
||||
|
||||
// CharacterEscape > ControlLetter
|
||||
if (try_skip("c")) {
|
||||
for (auto c = 'A'; c <= 'z'; ++c) {
|
||||
if (try_skip({ &c, 1 }))
|
||||
for (auto c : s_alphabetic_characters) {
|
||||
if (try_skip({ &c, 1 })) {
|
||||
return { CharClassRangeElement { .code_point = (u32)(c % 32), .is_character_class = false } };
|
||||
}
|
||||
}
|
||||
|
||||
if (unicode) {
|
||||
set_error(Error::InvalidPattern);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (m_should_use_browser_extended_grammar) {
|
||||
for (auto c = '0'; c <= '9'; ++c) {
|
||||
if (try_skip({ &c, 1 }))
|
||||
@ -1780,7 +1788,7 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
||||
if (try_skip("_"))
|
||||
return { CharClassRangeElement { .code_point = (u32)('_' % 32), .is_character_class = false } };
|
||||
|
||||
back(2);
|
||||
back(1 + !done());
|
||||
return { CharClassRangeElement { .code_point = '\\', .is_character_class = false } };
|
||||
}
|
||||
}
|
||||
@ -1856,6 +1864,9 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
||||
// Any unrecognised escape is allowed in non-unicode mode.
|
||||
return { CharClassRangeElement { .code_point = (u32)skip(), .is_character_class = false } };
|
||||
}
|
||||
|
||||
set_error(Error::InvalidPattern);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (match(TokenType::RightBracket) || match(TokenType::HyphenMinus))
|
||||
|
Loading…
Reference in New Issue
Block a user