LibWeb: Checking for "DOCTYPE" should be case insensitive in tokenizer

This commit is contained in:
Andreas Kling 2020-05-25 19:22:23 +02:00
parent 1df2a3d8ce
commit 556a6eea61
Notes: sideshowbarker 2024-07-19 06:09:06 +09:00
2 changed files with 12 additions and 13 deletions

View File

@ -227,13 +227,11 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
BEGIN_STATE(MarkupDeclarationOpen)
{
DONT_CONSUME_NEXT_INPUT_CHARACTER;
if (next_few_characters_are("--")) {
consume("--");
if (consume_next_if_match("--")) {
create_new_token(HTMLToken::Type::Comment);
SWITCH_TO(CommentStart);
}
if (next_few_characters_are("DOCTYPE")) {
consume("DOCTYPE");
if (consume_next_if_match("DOCTYPE", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(DOCTYPE);
}
}
@ -1029,22 +1027,24 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
}
}
void HTMLTokenizer::consume(const StringView& string)
{
ASSERT(next_few_characters_are(string));
m_cursor += string.length();
}
bool HTMLTokenizer::next_few_characters_are(const StringView& string) const
bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitivity case_sensitivity)
{
for (size_t i = 0; i < string.length(); ++i) {
auto codepoint = peek_codepoint(i);
if (!codepoint.has_value())
return false;
// FIXME: This should be more Unicode-aware.
if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
if (codepoint.value() < 0x80) {
if (tolower(codepoint.value()) != tolower(string[i]))
return false;
continue;
}
}
if (codepoint.value() != (u32)string[i])
return false;
}
m_cursor += string.length();
return true;
}

View File

@ -132,8 +132,7 @@ public:
private:
Optional<u32> next_codepoint();
Optional<u32> peek_codepoint(size_t offset) const;
bool next_few_characters_are(const StringView&) const;
void consume(const StringView&);
bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive);
void create_new_token(HTMLToken::Type);
bool current_end_tag_token_is_appropriate() const;