LibWeb: Checking for "DOCTYPE" should be case insensitive in tokenizer

This commit is contained in:
Andreas Kling 2020-05-25 19:22:23 +02:00
parent 1df2a3d8ce
commit 556a6eea61
Notes: sideshowbarker 2024-07-19 06:09:06 +09:00
2 changed files with 12 additions and 13 deletions

View File

@ -227,13 +227,11 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
BEGIN_STATE(MarkupDeclarationOpen) BEGIN_STATE(MarkupDeclarationOpen)
{ {
DONT_CONSUME_NEXT_INPUT_CHARACTER; DONT_CONSUME_NEXT_INPUT_CHARACTER;
if (next_few_characters_are("--")) { if (consume_next_if_match("--")) {
consume("--");
create_new_token(HTMLToken::Type::Comment); create_new_token(HTMLToken::Type::Comment);
SWITCH_TO(CommentStart); SWITCH_TO(CommentStart);
} }
if (next_few_characters_are("DOCTYPE")) { if (consume_next_if_match("DOCTYPE", CaseSensitivity::CaseInsensitive)) {
consume("DOCTYPE");
SWITCH_TO(DOCTYPE); SWITCH_TO(DOCTYPE);
} }
} }
@ -1029,22 +1027,24 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
} }
} }
void HTMLTokenizer::consume(const StringView& string) bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitivity case_sensitivity)
{
ASSERT(next_few_characters_are(string));
m_cursor += string.length();
}
bool HTMLTokenizer::next_few_characters_are(const StringView& string) const
{ {
for (size_t i = 0; i < string.length(); ++i) { for (size_t i = 0; i < string.length(); ++i) {
auto codepoint = peek_codepoint(i); auto codepoint = peek_codepoint(i);
if (!codepoint.has_value()) if (!codepoint.has_value())
return false; return false;
// FIXME: This should be more Unicode-aware. // FIXME: This should be more Unicode-aware.
if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
if (codepoint.value() < 0x80) {
if (tolower(codepoint.value()) != tolower(string[i]))
return false;
continue;
}
}
if (codepoint.value() != (u32)string[i]) if (codepoint.value() != (u32)string[i])
return false; return false;
} }
m_cursor += string.length();
return true; return true;
} }

View File

@ -132,8 +132,7 @@ public:
private: private:
Optional<u32> next_codepoint(); Optional<u32> next_codepoint();
Optional<u32> peek_codepoint(size_t offset) const; Optional<u32> peek_codepoint(size_t offset) const;
bool next_few_characters_are(const StringView&) const; bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive);
void consume(const StringView&);
void create_new_token(HTMLToken::Type); void create_new_token(HTMLToken::Type);
bool current_end_tag_token_is_appropriate() const; bool current_end_tag_token_is_appropriate() const;