LibWeb: Flesh out the remaining DOCTYPE related tokenizer states

We can now parse public and system identifiers! Not super useful, but
at least we can do it :^)
This commit is contained in:
Andreas Kling 2020-05-25 19:50:44 +02:00
parent 556a6eea61
commit 406fd95f32
Notes: sideshowbarker 2024-07-19 06:09:03 +09:00

View File

@ -322,6 +322,317 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
END_STATE
BEGIN_STATE(AfterDOCTYPEName)
{
ON_WHITESPACE
{
continue;
}
ON('>')
{
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
if (toupper(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(AfterDOCTYPEPublicKeyword);
}
if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(AfterDOCTYPESystemKeyword);
}
TODO();
}
}
END_STATE
BEGIN_STATE(AfterDOCTYPEPublicKeyword)
{
ON_WHITESPACE
{
SWITCH_TO(BeforeDOCTYPEPublicIdentifier);
}
ON('"')
{
TODO();
}
ON('\'')
{
TODO();
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
TODO();
}
}
END_STATE
BEGIN_STATE(AfterDOCTYPESystemKeyword)
{
ON_WHITESPACE
{
SWITCH_TO(BeforeDOCTYPESystemIdentifier);
}
ON('"')
{
TODO();
}
ON('\'')
{
TODO();
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
TODO();
}
}
END_STATE
BEGIN_STATE(BeforeDOCTYPEPublicIdentifier)
{
ON_WHITESPACE
{
continue;
}
ON('"')
{
m_current_token.m_doctype.public_identifier.clear();
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.public_identifier.clear();
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
TODO();
}
}
END_STATE
BEGIN_STATE(BeforeDOCTYPESystemIdentifier)
{
ON_WHITESPACE
{
continue;
}
ON('"')
{
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
TODO();
}
}
END_STATE
BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuoted)
{
ON('"')
{
SWITCH_TO(AfterDOCTYPEPublicIdentifier);
}
ON(0)
{
TODO();
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
m_current_token.m_doctype.public_identifier.append(current_input_character.value());
continue;
}
}
END_STATE
BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuoted)
{
ON('\'')
{
SWITCH_TO(AfterDOCTYPEPublicIdentifier);
}
ON(0)
{
TODO();
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
m_current_token.m_doctype.public_identifier.append(current_input_character.value());
continue;
}
}
END_STATE
BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuoted)
{
ON('"')
{
SWITCH_TO(AfterDOCTYPESystemIdentifier);
}
ON(0)
{
TODO();
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
m_current_token.m_doctype.system_identifier.append(current_input_character.value());
continue;
}
}
END_STATE
BEGIN_STATE(DOCTYPESystemIdentifierSingleQuoted)
{
ON('\'')
{
SWITCH_TO(AfterDOCTYPESystemIdentifier);
}
ON(0)
{
TODO();
}
ON('>')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
m_current_token.m_doctype.system_identifier.append(current_input_character.value());
continue;
}
}
END_STATE
BEGIN_STATE(AfterDOCTYPEPublicIdentifier)
{
ON_WHITESPACE
{
SWITCH_TO(BetweenDOCTYPEPublicAndSystemIdentifiers);
}
ON('>')
{
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON('"')
{
TODO();
}
ON('\'')
{
TODO();
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
TODO();
}
}
END_STATE
BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiers)
{
ON_WHITESPACE
{
continue;
}
ON('>')
{
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON('"')
{
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON_EOF
{
TODO();
}
ANYTHING_ELSE
{
TODO();
}
}
END_STATE
BEGIN_STATE(AfterDOCTYPESystemIdentifier)
{
ON_WHITESPACE
{