LibWeb: Checking for "DOCTYPE" should be case insensitive in tokenizer

Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/556a6eea615
2024-09-21 02:08:12 +03:00 · 2020-05-25 19:22:23 +02:00 · 2020-05-25 19:22:23 +02:00 · 556a6eea61 · 2024-07-19 06:09:06 +09:00
commit 556a6eea61
parent 1df2a3d8ce
2 changed files with 12 additions and 13 deletions
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@ -227,13 +227,11 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
            BEGIN_STATE(MarkupDeclarationOpen)
            {
                DONT_CONSUME_NEXT_INPUT_CHARACTER;
-                if (next_few_characters_are("--")) {
-                    consume("--");
+                if (consume_next_if_match("--")) {
                    create_new_token(HTMLToken::Type::Comment);
                    SWITCH_TO(CommentStart);
                }
-                if (next_few_characters_are("DOCTYPE")) {
-                    consume("DOCTYPE");
+                if (consume_next_if_match("DOCTYPE", CaseSensitivity::CaseInsensitive)) {
                    SWITCH_TO(DOCTYPE);
                }
            }
@ -1029,22 +1027,24 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
    }
 }

-void HTMLTokenizer::consume(const StringView& string)
-{
-    ASSERT(next_few_characters_are(string));
-    m_cursor += string.length();
-}
-
-bool HTMLTokenizer::next_few_characters_are(const StringView& string) const
+bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitivity case_sensitivity)
 {
    for (size_t i = 0; i < string.length(); ++i) {
        auto codepoint = peek_codepoint(i);
        if (!codepoint.has_value())
            return false;
        // FIXME: This should be more Unicode-aware.
+        if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
+            if (codepoint.value() < 0x80) {
+                if (tolower(codepoint.value()) != tolower(string[i]))
+                    return false;
+                continue;
+            }
+        }
        if (codepoint.value() != (u32)string[i])
            return false;
    }
+    m_cursor += string.length();
    return true;
 }

--- a/Libraries/LibWeb/Parser/HTMLTokenizer.h
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.h
@ -132,8 +132,7 @@ public:
 private:
    Optional<u32> next_codepoint();
    Optional<u32> peek_codepoint(size_t offset) const;
-    bool next_few_characters_are(const StringView&) const;
-    void consume(const StringView&);
+    bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive);
    void create_new_token(HTMLToken::Type);
    bool current_end_tag_token_is_appropriate() const;