LibWeb: Checking for "DOCTYPE" should be case insensitive in tokenizer

Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/556a6eea615
2024-11-11 01:06:01 +03:00 · 2020-05-25 19:22:23 +02:00 · 2020-05-25 19:22:23 +02:00 · 556a6eea61 · 2024-07-19 06:09:06 +09:00
commit 556a6eea61
parent 1df2a3d8ce
2 changed files with 12 additions and 13 deletions
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@ -227,13 +227,11 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
            BEGIN_STATE(MarkupDeclarationOpen)
            {
                DONT_CONSUME_NEXT_INPUT_CHARACTER;
-                if (next_few_characters_are("--")) {
+                if (consume_next_if_match("--")) {
                    consume("--");
                    create_new_token(HTMLToken::Type::Comment);
                    SWITCH_TO(CommentStart);
                }
-                if (next_few_characters_are("DOCTYPE")) {
+                if (consume_next_if_match("DOCTYPE", CaseSensitivity::CaseInsensitive)) {
                    consume("DOCTYPE");
                    SWITCH_TO(DOCTYPE);
                }
            }
@ -1029,22 +1027,24 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
    }
 }
-void HTMLTokenizer::consume(const StringView& string)
+bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitivity case_sensitivity)
 {
    ASSERT(next_few_characters_are(string));
    m_cursor += string.length();
 }
 bool HTMLTokenizer::next_few_characters_are(const StringView& string) const
 {
    for (size_t i = 0; i < string.length(); ++i) {
        auto codepoint = peek_codepoint(i);
        if (!codepoint.has_value())
            return false;
        // FIXME: This should be more Unicode-aware.
        if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
            if (codepoint.value() < 0x80) {
                if (tolower(codepoint.value()) != tolower(string[i]))
                    return false;
                continue;
            }
        }
        if (codepoint.value() != (u32)string[i])
            return false;
    }
    m_cursor += string.length();
    return true;
 }
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.h
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.h
@ -132,8 +132,7 @@ public:
 private:
    Optional<u32> next_codepoint();
    Optional<u32> peek_codepoint(size_t offset) const;
-    bool next_few_characters_are(const StringView&) const;
+    bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive);
    void consume(const StringView&);
    void create_new_token(HTMLToken::Type);
    bool current_end_tag_token_is_appropriate() const;