LibWeb: Set the correct end position of HTML attribute names

We were previously setting the end position of attribute names in self-
closing HTML tags to the end of the attribute value. To illustrate the
previous behavior, consider this tag and its attribute's start and end
positions (shown inclusively below):

    <meta charset="UTF-8" />
          ^ name start
                  ^ value start
                        ^ value end
                        ^ name end

Rather than setting the end position of the attribute name when we parse
the closing slash, ensure the end position is already set while we are
in the AttributeName state. We now have:

    <meta charset="UTF-8" />
          ^ name start
                ^ name end
                  ^ value start
                        ^ value end

The tokenizer unit test has been extended to test these positions.
This commit is contained in:
Timothy Flynn 2023-08-24 17:43:05 -04:00 committed by Andreas Kling
parent 5b2bc90b50
commit 5a2bf7fdd1
Notes: sideshowbarker 2024-07-17 02:28:18 +09:00
3 changed files with 46 additions and 19 deletions

View File

@ -59,9 +59,15 @@ using Token = Web::HTML::HTMLToken;
EXPECT_EQ(current_token->type(), Token::Type::DOCTYPE); \
NEXT_TOKEN();
#define EXPECT_TAG_TOKEN_ATTRIBUTE(name, value) \
VERIFY(last_token); \
EXPECT_EQ(last_token->attribute(#name), value);
#define EXPECT_TAG_TOKEN_ATTRIBUTE(name, attribute_value, name_start_column, name_end_column, value_start_column, value_end_column) \
VERIFY(last_token); \
auto name##_attr = last_token->raw_attribute(#name); \
VERIFY(name##_attr.has_value()); \
EXPECT_EQ(name##_attr->value, attribute_value); \
EXPECT_EQ(name##_attr->name_start_position.column, name_start_column); \
EXPECT_EQ(name##_attr->name_end_position.column, name_end_column); \
EXPECT_EQ(name##_attr->value_start_position.column, value_start_column); \
EXPECT_EQ(name##_attr->value_end_position.column, value_end_column);
#define EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(count) \
VERIFY(last_token); \
@ -128,7 +134,7 @@ TEST_CASE(unquoted_attributes)
BEGIN_ENUMERATION(tokens);
EXPECT_START_TAG_TOKEN(p, 1u, 10u);
EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar");
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 10u);
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}
@ -139,7 +145,7 @@ TEST_CASE(single_quoted_attributes)
BEGIN_ENUMERATION(tokens);
EXPECT_START_TAG_TOKEN(p, 1u, 12u);
EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar");
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 12u);
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}
@ -150,20 +156,32 @@ TEST_CASE(double_quoted_attributes)
BEGIN_ENUMERATION(tokens);
EXPECT_START_TAG_TOKEN(p, 1u, 12u);
EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar");
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 12u);
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}
TEST_CASE(valueless_attribute)
{
auto tokens = run_tokenizer("<p foo>"sv);
BEGIN_ENUMERATION(tokens);
EXPECT_START_TAG_TOKEN(p, 1u, 6u);
EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "", 3u, 6u, 0u, 0u);
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}
TEST_CASE(multiple_attributes)
{
auto tokens = run_tokenizer("<p foo=\"bar\" baz=foobar foo2=\"bar2\">"sv);
auto tokens = run_tokenizer("<p foo=\"bar\" baz=foobar biz foo2=\"bar2\">"sv);
BEGIN_ENUMERATION(tokens);
EXPECT_START_TAG_TOKEN(p, 1u, 35u);
EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(3);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar");
EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "foobar");
EXPECT_TAG_TOKEN_ATTRIBUTE(foo2, "bar2");
EXPECT_START_TAG_TOKEN(p, 1u, 39u);
EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(4);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 12u);
EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "foobar", 13u, 16u, 17u, 23u);
EXPECT_TAG_TOKEN_ATTRIBUTE(biz, "", 24u, 27u, 0u, 0u);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo2, "bar2", 28u, 32u, 33u, 39u);
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}
@ -174,9 +192,9 @@ TEST_CASE(character_reference_in_attribute)
BEGIN_ENUMERATION(tokens);
EXPECT_START_TAG_TOKEN(p, 1u, 43u);
EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(3);
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "a&b");
EXPECT_TAG_TOKEN_ATTRIBUTE(bar, "a&b");
EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "a&b");
EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "a&b", 3u, 6u, 7u, 14u);
EXPECT_TAG_TOKEN_ATTRIBUTE(bar, "a&b", 15u, 18u, 19u, 28u);
EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "a&b", 29u, 32u, 33u, 43u);
EXPECT_END_OF_FILE_TOKEN();
END_ENUMERATION();
}

View File

@ -246,6 +246,13 @@ public:
}
StringView attribute(DeprecatedFlyString const& attribute_name) const
{
if (auto result = raw_attribute(attribute_name); result.has_value())
return result->value;
return {};
}
Optional<Attribute const&> raw_attribute(DeprecatedFlyString const& attribute_name) const
{
VERIFY(is_start_tag() || is_end_tag());
@ -254,7 +261,7 @@ public:
return {};
for (auto& attribute : *ptr) {
if (attribute_name == attribute.local_name)
return attribute.value;
return attribute;
}
return {};
}

View File

@ -1051,8 +1051,6 @@ _StartOfFunction:
}
ON('/')
{
if (m_current_token.has_attributes())
m_current_token.last_attribute().name_end_position = nth_last_position(1);
RECONSUME_IN(AfterAttributeName);
}
ON('>')
@ -1108,21 +1106,25 @@ _StartOfFunction:
{
ON_WHITESPACE
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
ON('/')
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
ON('>')
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
ON_EOF
{
m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName);
}
@ -1196,7 +1198,7 @@ _StartOfFunction:
{
m_current_token.add_attribute({});
if (!m_source_positions.is_empty())
m_current_token.last_attribute().name_start_position = m_source_positions.last();
m_current_token.last_attribute().name_start_position = nth_last_position(1);
RECONSUME_IN(AttributeName);
}
}