LibWeb: Stop parsing after document.write at the insertion point

If a call to `document.write` inserts an incomplete HTML tag, e.g.:

    document.write("<p");

we would previously continue parsing the document until we reached a
closing angle bracket. However, the spec states we should stop once we
reach the new insertion point.
This commit is contained in:
Timothy Flynn 2024-02-18 12:45:53 -05:00 committed by Andreas Kling
parent 64dcd3f1f4
commit af57bd5cca
Notes: sideshowbarker 2024-07-16 23:05:02 +09:00
7 changed files with 62 additions and 10 deletions

View File

@ -0,0 +1,34 @@
Viewport <#document> at (0,0) content-size 800x600 children: not-inline
BlockContainer <html> at (0,0) content-size 800x600 [BFC] children: not-inline
BlockContainer <body> at (8,16) content-size 784x83 children: not-inline
BlockContainer <p> at (8,16) content-size 784x17 children: inline
frag 0 from TextNode start: 0, length: 4, rect: [8,16 30.078125x17] baseline: 13.296875
"Well"
TextNode <#text>
BlockContainer <(anonymous)> at (8,49) content-size 784x0 children: inline
TextNode <#text>
BlockContainer <p> at (8,49) content-size 784x17 children: inline
frag 0 from TextNode start: 0, length: 5, rect: [8,49 36.84375x17] baseline: 13.296875
"hello"
TextNode <#text>
BlockContainer <(anonymous)> at (8,82) content-size 784x0 children: inline
TextNode <#text>
BlockContainer <p> at (8,82) content-size 784x17 children: inline
frag 0 from TextNode start: 0, length: 8, rect: [8,82 59.21875x17] baseline: 13.296875
"friends!"
TextNode <#text>
BlockContainer <(anonymous)> at (8,115) content-size 784x0 children: inline
TextNode <#text>
ViewportPaintable (Viewport<#document>) [0,0 800x600]
PaintableWithLines (BlockContainer<HTML>) [0,0 800x600]
PaintableWithLines (BlockContainer<BODY>) [8,16 784x83] overflow: [8,16 784x99]
PaintableWithLines (BlockContainer<P>) [8,16 784x17]
TextPaintable (TextNode<#text>)
PaintableWithLines (BlockContainer(anonymous)) [8,49 784x0]
PaintableWithLines (BlockContainer<P>) [8,49 784x17]
TextPaintable (TextNode<#text>)
PaintableWithLines (BlockContainer(anonymous)) [8,82 784x0]
PaintableWithLines (BlockContainer<P>) [8,82 784x17]
TextPaintable (TextNode<#text>)
PaintableWithLines (BlockContainer(anonymous)) [8,115 784x0]

View File

@ -0,0 +1,8 @@
<p>Well</p>
<script type="text/javascript">
document.write("<p");
document.write(">hello</p>");
</script>
<p>friends!</p>

View File

@ -530,9 +530,12 @@ WebIDL::ExceptionOr<void> Document::run_the_document_write_steps(StringView inpu
// 5. Insert input into the input stream just before the insertion point.
m_parser->tokenizer().insert_input_at_insertion_point(input);
// 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time, processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script end tag token is emitted by the tokenizer).
// 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time,
// processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point
// or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script
// end tag token is emitted by the tokenizer).
if (!pending_parsing_blocking_script())
m_parser->run();
m_parser->run(HTML::HTMLTokenizer::StopAtInsertionPoint::Yes);
return {};
}

View File

@ -169,14 +169,14 @@ void HTMLParser::visit_edges(Cell::Visitor& visitor)
m_list_of_active_formatting_elements.visit_edges(visitor);
}
void HTMLParser::run()
void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
{
for (;;) {
// FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached())
return;
auto optional_token = m_tokenizer.next_token();
auto optional_token = m_tokenizer.next_token(stop_at_insertion_point);
if (!optional_token.has_value())
break;
auto& token = optional_token.value();
@ -216,11 +216,11 @@ void HTMLParser::run()
flush_character_insertions();
}
void HTMLParser::run(const AK::URL& url)
void HTMLParser::run(const AK::URL& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
{
m_document->set_url(url);
m_document->set_source(MUST(String::from_byte_string(m_tokenizer.source())));
run();
run(stop_at_insertion_point);
the_end(*m_document, this);
m_document->detach_parser({});
}

View File

@ -53,8 +53,8 @@ public:
static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input);
static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, ByteString const& encoding);
void run();
void run(const AK::URL&);
void run(HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
void run(const AK::URL&, HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);

View File

@ -248,7 +248,7 @@ HTMLToken::Position HTMLTokenizer::nth_last_position(size_t n)
return m_source_positions.at(m_source_positions.size() - 1 - n);
}
Optional<HTMLToken> HTMLTokenizer::next_token()
Optional<HTMLToken> HTMLTokenizer::next_token(StopAtInsertionPoint stop_at_insertion_point)
{
if (!m_source_positions.is_empty()) {
auto last_position = m_source_positions.last();
@ -263,6 +263,9 @@ _StartOfFunction:
return {};
for (;;) {
if (stop_at_insertion_point == StopAtInsertionPoint::Yes && is_insertion_point_reached())
return {};
auto current_input_character = next_code_point();
switch (m_state) {
// 13.2.5.1 Data state, https://html.spec.whatwg.org/multipage/parsing.html#data-state

View File

@ -111,7 +111,11 @@ public:
#undef __ENUMERATE_TOKENIZER_STATE
};
Optional<HTMLToken> next_token();
enum class StopAtInsertionPoint {
No,
Yes,
};
Optional<HTMLToken> next_token(StopAtInsertionPoint = StopAtInsertionPoint::No);
void set_parser(Badge<HTMLParser>, HTMLParser& parser) { m_parser = &parser; }