2022-03-28 14:55:17 +03:00
/*
* Copyright ( c ) 2022 , Ali Mohammad Pur < mpfard @ serenityos . org >
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
2022-08-28 14:42:07 +03:00
# include <LibWeb/DOM/Event.h>
2022-03-28 14:55:17 +03:00
# include <LibWeb/HTML/HTMLTemplateElement.h>
# include <LibWeb/HTML/Window.h>
2022-10-04 23:30:29 +03:00
# include <LibWeb/HighResolutionTime/TimeOrigin.h>
2022-03-28 14:55:17 +03:00
# include <LibWeb/XML/XMLDocumentBuilder.h>
inline namespace {
2022-04-01 20:58:27 +03:00
extern char const * s_xhtml_unified_dtd ;
2022-03-28 14:55:17 +03:00
}
2023-01-09 03:23:00 +03:00
static DeprecatedFlyString s_html_namespace = " http://www.w3.org/1999/xhtml " ;
2022-03-28 14:55:17 +03:00
namespace Web {
2022-12-04 21:02:33 +03:00
ErrorOr < DeprecatedString > resolve_xml_resource ( XML : : SystemID const & , Optional < XML : : PublicID > const & public_id )
2022-03-28 14:55:17 +03:00
{
if ( ! public_id . has_value ( ) )
return Error : : from_string_literal ( " Refusing to load disallowed external entity " ) ;
auto public_literal = public_id - > public_literal ;
if ( ! public_literal . is_one_of (
" -//W3C//DTD XHTML 1.0 Transitional//EN " ,
" -//W3C//DTD XHTML 1.1//EN " ,
" -//W3C//DTD XHTML 1.0 Strict//EN " ,
" -//W3C//DTD XHTML 1.0 Frameset//EN " ,
" -//W3C//DTD XHTML Basic 1.0//EN " ,
" -//W3C//DTD XHTML 1.1 plus MathML 2.0//EN " ,
" -//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN " ,
" -//W3C//DTD MathML 2.0//EN " ,
" -//WAPFORUM//DTD XHTML Mobile 1.0//EN " ) )
return Error : : from_string_literal ( " Refusing to load disallowed external entity " ) ;
return s_xhtml_unified_dtd ;
}
XMLDocumentBuilder : : XMLDocumentBuilder ( DOM : : Document & document , XMLScriptingSupport scripting_support )
: m_document ( document )
2023-02-27 02:09:02 +03:00
, m_current_node ( m_document )
2022-03-28 14:55:17 +03:00
, m_scripting_support ( scripting_support )
{
}
2022-12-04 21:02:33 +03:00
void XMLDocumentBuilder : : set_source ( DeprecatedString source )
2022-11-03 16:43:34 +03:00
{
2023-02-27 02:09:02 +03:00
m_document - > set_source ( move ( source ) ) ;
2022-11-03 16:43:34 +03:00
}
2022-12-04 21:02:33 +03:00
void XMLDocumentBuilder : : element_start ( const XML : : Name & name , HashMap < XML : : Name , DeprecatedString > const & attributes )
2022-03-28 14:55:17 +03:00
{
if ( m_has_error )
return ;
// FIXME: This should not live here at all.
if ( auto it = attributes . find ( " xmlns " ) ; it ! = attributes . end ( ) ) {
if ( name = = HTML : : TagNames : : html & & it - > value ! = s_html_namespace ) {
m_has_error = true ;
return ;
}
}
2023-02-19 18:22:24 +03:00
auto node = DOM : : create_element ( m_document , name , { } ) . release_value_but_fixme_should_propagate_errors ( ) ;
2022-03-28 14:55:17 +03:00
// When an XML parser with XML scripting support enabled creates a script element,
2022-11-20 16:22:14 +03:00
// it must have its parser document set and its "force async" flag must be unset.
2022-03-28 14:55:17 +03:00
// FIXME: If the parser was created as part of the XML fragment parsing algorithm, then the element must be marked as "already started" also.
if ( m_scripting_support = = XMLScriptingSupport : : Enabled & & HTML : : TagNames : : script = = name ) {
auto & script_element = static_cast < HTML : : HTMLScriptElement & > ( * node ) ;
script_element . set_parser_document ( Badge < XMLDocumentBuilder > { } , m_document ) ;
2022-11-20 16:22:14 +03:00
script_element . set_force_async ( Badge < XMLDocumentBuilder > { } , false ) ;
2022-03-28 14:55:17 +03:00
}
if ( HTML : : TagNames : : template_ = = m_current_node - > node_name ( ) ) {
// When an XML parser would append a node to a template element, it must instead append it to the template element's template contents (a DocumentFragment node).
2022-10-30 20:50:04 +03:00
MUST ( static_cast < HTML : : HTMLTemplateElement & > ( * m_current_node ) . content ( ) - > append_child ( node ) ) ;
2022-03-28 14:55:17 +03:00
} else {
2022-10-30 20:50:04 +03:00
MUST ( m_current_node - > append_child ( node ) ) ;
2022-03-28 14:55:17 +03:00
}
for ( auto & attribute : attributes )
2022-10-30 20:50:04 +03:00
MUST ( node - > set_attribute ( attribute . key , attribute . value ) ) ;
2022-03-28 14:55:17 +03:00
m_current_node = node . ptr ( ) ;
}
void XMLDocumentBuilder : : element_end ( const XML : : Name & name )
{
if ( m_has_error )
return ;
VERIFY ( m_current_node - > node_name ( ) = = name ) ;
// When an XML parser with XML scripting support enabled creates a script element, [...]
// When the element's end tag is subsequently parsed,
if ( m_scripting_support = = XMLScriptingSupport : : Enabled & & HTML : : TagNames : : script = = name ) {
// the user agent must perform a microtask checkpoint,
HTML : : perform_a_microtask_checkpoint ( ) ;
// and then prepare the script element.
auto & script_element = static_cast < HTML : : HTMLScriptElement & > ( * m_current_node ) ;
script_element . prepare_script ( Badge < XMLDocumentBuilder > { } ) ;
// If this causes there to be a pending parsing-blocking script, then the user agent must run the following steps:
2023-02-27 02:09:02 +03:00
if ( m_document - > pending_parsing_blocking_script ( ) ) {
2022-03-28 14:55:17 +03:00
// Block this instance of the XML parser, such that the event loop will not run tasks that invoke it.
// NOTE: Noop.
// Spin the event loop until the parser's Document has no style sheet that is blocking scripts and the pending parsing-blocking script's "ready to be parser-executed" flag is set.
2023-02-27 02:09:02 +03:00
if ( m_document - > has_a_style_sheet_that_is_blocking_scripts ( ) | | ! script_element . is_ready_to_be_parser_executed ( ) ) {
2022-03-28 14:55:17 +03:00
HTML : : main_thread_event_loop ( ) . spin_until ( [ & ] {
2023-02-27 02:09:02 +03:00
return ! m_document - > has_a_style_sheet_that_is_blocking_scripts ( ) & & script_element . is_ready_to_be_parser_executed ( ) ;
2022-03-28 14:55:17 +03:00
} ) ;
}
// Unblock this instance of the XML parser, such that tasks that invoke it can again be run.
// NOTE: Noop.
// Execute the pending parsing-blocking script.
script_element . execute_script ( ) ;
// There is no longer a pending parsing-blocking script.
}
}
m_current_node = m_current_node - > parent_node ( ) ;
}
2023-01-08 13:48:33 +03:00
void XMLDocumentBuilder : : text ( StringView data )
2022-03-28 14:55:17 +03:00
{
if ( m_has_error )
return ;
auto last = m_current_node - > last_child ( ) ;
if ( last & & last - > is_text ( ) ) {
auto & text_node = static_cast < DOM : : Text & > ( * last ) ;
text_builder . append ( text_node . data ( ) ) ;
text_builder . append ( data ) ;
2022-12-06 04:12:49 +03:00
text_node . set_data ( text_builder . to_deprecated_string ( ) ) ;
2022-03-28 14:55:17 +03:00
text_builder . clear ( ) ;
} else {
2023-01-08 13:48:33 +03:00
auto string = DeprecatedString : : empty ( ) ;
if ( ! data . is_null ( ) )
string = data . to_deprecated_string ( ) ;
2023-02-27 02:09:02 +03:00
auto node = m_document - > create_text_node ( string ) ;
2022-10-30 20:50:04 +03:00
MUST ( m_current_node - > append_child ( node ) ) ;
2022-03-28 14:55:17 +03:00
}
}
2023-01-08 13:48:33 +03:00
void XMLDocumentBuilder : : comment ( StringView data )
2022-03-28 14:55:17 +03:00
{
if ( m_has_error )
return ;
2023-01-08 13:48:33 +03:00
auto string = DeprecatedString : : empty ( ) ;
if ( ! data . is_null ( ) )
string = data . to_deprecated_string ( ) ;
2023-02-27 02:09:02 +03:00
MUST ( m_document - > append_child ( m_document - > create_comment ( string ) ) ) ;
2022-03-28 14:55:17 +03:00
}
void XMLDocumentBuilder : : document_end ( )
{
// When an XML parser reaches the end of its input, it must stop parsing.
// If the active speculative HTML parser is not null, then stop the speculative HTML parser and return.
// NOTE: Noop.
// Set the insertion point to undefined.
m_current_node = nullptr ;
// Update the current document readiness to "interactive".
2023-02-27 02:09:02 +03:00
m_document - > update_readiness ( HTML : : DocumentReadyState : : Interactive ) ;
2022-03-28 14:55:17 +03:00
// Pop all the nodes off the stack of open elements.
// NOTE: Noop.
// While the list of scripts that will execute when the document has finished parsing is not empty:
2023-02-27 02:09:02 +03:00
while ( ! m_document - > scripts_to_execute_when_parsing_has_finished ( ) . is_empty ( ) ) {
2022-03-28 14:55:17 +03:00
// Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing has its "ready to be parser-executed" flag set
// and the parser's Document has no style sheet that is blocking scripts.
HTML : : main_thread_event_loop ( ) . spin_until ( [ & ] {
2023-02-27 02:09:02 +03:00
return m_document - > scripts_to_execute_when_parsing_has_finished ( ) . first ( ) - > is_ready_to_be_parser_executed ( )
& & ! m_document - > has_a_style_sheet_that_is_blocking_scripts ( ) ;
2022-03-28 14:55:17 +03:00
} ) ;
// Execute the first script in the list of scripts that will execute when the document has finished parsing.
2023-02-27 02:09:02 +03:00
m_document - > scripts_to_execute_when_parsing_has_finished ( ) . first ( ) - > execute_script ( ) ;
2022-03-28 14:55:17 +03:00
// Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list).
2023-02-27 02:09:02 +03:00
( void ) m_document - > scripts_to_execute_when_parsing_has_finished ( ) . take_first ( ) ;
2022-03-28 14:55:17 +03:00
}
// Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps:
2023-02-27 02:09:02 +03:00
old_queue_global_task_with_document ( HTML : : Task : : Source : : DOMManipulation , m_document , [ document = m_document ] {
2022-09-21 01:43:38 +03:00
// Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object.
2022-10-04 23:25:00 +03:00
document - > load_timing_info ( ) . dom_content_loaded_event_start_time = HighResolutionTime : : unsafe_shared_current_time ( ) ;
2022-03-28 14:55:17 +03:00
// Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true.
2023-02-15 00:43:17 +03:00
auto content_loaded_event = DOM : : Event : : create ( document - > realm ( ) , HTML : : EventNames : : DOMContentLoaded ) . release_value_but_fixme_should_propagate_errors ( ) ;
2022-03-28 14:55:17 +03:00
content_loaded_event - > set_bubbles ( true ) ;
2023-02-15 00:43:17 +03:00
document - > dispatch_event ( content_loaded_event ) ;
2022-03-28 14:55:17 +03:00
2022-09-21 01:43:38 +03:00
// Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object.
2022-10-04 23:25:00 +03:00
document - > load_timing_info ( ) . dom_content_loaded_event_end_time = HighResolutionTime : : unsafe_shared_current_time ( ) ;
2022-03-28 14:55:17 +03:00
// FIXME: Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object.
// FIXME: Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL.
} ) ;
// Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts that will execute in order as soon as possible are empty.
HTML : : main_thread_event_loop ( ) . spin_until ( [ & ] {
2023-02-27 02:09:02 +03:00
return m_document - > scripts_to_execute_as_soon_as_possible ( ) . is_empty ( ) ;
2022-03-28 14:55:17 +03:00
} ) ;
// Spin the event loop until there is nothing that delays the load event in the Document.
HTML : : main_thread_event_loop ( ) . spin_until ( [ & ] {
2023-02-27 02:09:02 +03:00
return m_document - > number_of_things_delaying_the_load_event ( ) = = 0 ;
2022-03-28 14:55:17 +03:00
} ) ;
// Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps:
2023-02-27 02:09:02 +03:00
old_queue_global_task_with_document ( HTML : : Task : : Source : : DOMManipulation , m_document , [ document = m_document ] {
2022-03-28 14:55:17 +03:00
// Update the current document readiness to "complete".
document - > update_readiness ( HTML : : DocumentReadyState : : Complete ) ;
// If the Document object's browsing context is null, then abort these steps.
if ( ! document - > browsing_context ( ) )
return ;
// Let window be the Document's relevant global object.
LibWeb: Remove unecessary dependence on Window from assorted classes
These classes only needed Window to get at its realm. Pass a realm
directly to construct Crypto, Encoding, HRT, IntersectionObserver,
NavigationTiming, Page, RequestIdleCallback, Selection, Streams, URL,
and XML classes.
2022-09-26 03:11:21 +03:00
JS : : NonnullGCPtr < HTML : : Window > window = verify_cast < HTML : : Window > ( relevant_global_object ( * document ) ) ;
2022-03-28 14:55:17 +03:00
2022-09-21 01:43:38 +03:00
// Set the Document's load timing info's load event start time to the current high resolution time given window.
2022-10-04 23:25:00 +03:00
document - > load_timing_info ( ) . load_event_start_time = HighResolutionTime : : unsafe_shared_current_time ( ) ;
2022-03-28 14:55:17 +03:00
// Fire an event named load at window, with legacy target override flag set.
// FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event()
// We should reorganize this so that the flag appears explicitly here instead.
2023-02-15 00:43:17 +03:00
window - > dispatch_event ( DOM : : Event : : create ( document - > realm ( ) , HTML : : EventNames : : load ) . release_value_but_fixme_should_propagate_errors ( ) ) ;
2022-03-28 14:55:17 +03:00
// FIXME: Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL.
// FIXME: Set the Document object's navigation id to null.
2022-09-21 01:43:38 +03:00
// Set the Document's load timing info's load event end time to the current high resolution time given window.
2022-10-04 23:25:00 +03:00
document - > load_timing_info ( ) . dom_content_loaded_event_end_time = HighResolutionTime : : unsafe_shared_current_time ( ) ;
2022-03-28 14:55:17 +03:00
// Assert: Document's page showing is false.
VERIFY ( ! document - > page_showing ( ) ) ;
// Set the Document's page showing flag to true.
document - > set_page_showing ( true ) ;
// Fire a page transition event named pageshow at window with false.
window - > fire_a_page_transition_event ( HTML : : EventNames : : pageshow , false ) ;
// Completely finish loading the Document.
document - > completely_finish_loading ( ) ;
// FIXME: Queue the navigation timing entry for the Document.
} ) ;
// FIXME: If the Document's print when loaded flag is set, then run the printing steps.
// The Document is now ready for post-load tasks.
2023-02-27 02:09:02 +03:00
m_document - > set_ready_for_post_load_tasks ( true ) ;
2022-03-28 14:55:17 +03:00
}
}
inline namespace {
2022-04-01 20:58:27 +03:00
char const * s_xhtml_unified_dtd = R " xmlxmlxml(
2022-03-28 14:55:17 +03:00
< ! ENTITY Tab " 	 " > < ! ENTITY NewLine " 
 " > < ! ENTITY excl " ! " > < ! ENTITY quot " " " > < ! ENTITY QUOT " " " > < ! ENTITY num " # " > < ! ENTITY dollar " $ " > < ! ENTITY percnt " % " > < ! ENTITY amp " &#x26; " > < ! ENTITY AMP " &#x26; " > < ! ENTITY apos " ' " > < ! ENTITY lpar " ( " > < ! ENTITY rpar " ) " > < ! ENTITY ast " * " > < ! ENTITY midast " * " > < ! ENTITY plus " + " > < ! ENTITY comma " , " > < ! ENTITY period " . " > < ! ENTITY sol " / " > < ! ENTITY colon " : " > < ! ENTITY semi " ; " > < ! ENTITY lt " &#x3C; " > < ! ENTITY LT " &#x3C; " > < ! ENTITY nvlt " &#x3C;⃒ " > < ! ENTITY equals " = " > < ! ENTITY bne " =⃥ " > < ! ENTITY gt " > " > < ! ENTITY GT " > " > < ! ENTITY nvgt " >⃒ " > < ! ENTITY quest " ? " > < ! ENTITY commat " @ " > < ! ENTITY lsqb " [ " > < ! ENTITY lbrack " [ " > < ! ENTITY bsol " \ " > < ! ENTITY rsqb " ] " > < ! ENTITY rbrack " ] " > < ! ENTITY Hat " ^ " > < ! ENTITY lowbar " _ " > < ! ENTITY UnderBar " _ " > < ! ENTITY grave " ` " > < ! ENTITY DiacriticalGrave " ` " > < ! ENTITY fjlig " fj " > < ! ENTITY lcub " { " > < ! ENTITY lbrace " { " > < ! ENTITY verbar " | " > < ! ENTITY vert " | " > < ! ENTITY VerticalLine " | " > < ! ENTITY rcub " } " > < ! ENTITY rbrace " } " > < ! ENTITY nbsp "   " > < ! ENTITY NonBreakingSpace "   " > < ! ENTITY iexcl " ¡ " > < ! ENTITY cent " ¢ " > < ! ENTITY pound " £ " > < ! ENTITY curren " ¤ " > < ! ENTITY yen " ¥ " > < ! ENTITY brvbar " ¦ " > < ! ENTITY sect " § " > < ! ENTITY Dot " ¨ " > < ! ENTITY die " ¨ " > < ! ENTITY DoubleDot " ¨ " > < ! ENTITY uml " ¨ " > < ! ENTITY copy " © " > < ! ENTITY COPY " © " > < ! ENTITY ordf " ª " > < ! ENTITY laquo " « " > < ! ENTITY not " ¬ " > < ! ENTITY shy " ­ " > < ! ENTITY reg " ® " > < ! ENTITY circledR " ® " > < ! ENTITY REG " ® " > < ! ENTITY macr " ¯ " > < ! ENTITY strns " ¯ " > < ! ENTITY deg " ° " > < ! ENTITY plusmn " ± " > < ! ENTITY pm " ± " > < ! ENTITY PlusMinus " ± " > < ! ENTITY sup2 " ² " > < ! ENTITY sup3 " ³ " > < ! ENTITY acute " ´ " > < ! ENTITY DiacriticalAcute " ´ " > < ! ENTITY micro " µ " > < ! ENTITY para " ¶ " > < ! ENTITY middot " · " > < ! ENTITY centerdot " · " > < ! ENTITY CenterDot " · " > < ! ENTITY cedil " ¸ " > < ! ENTITY Cedilla " ¸ " > < ! ENTITY sup1 " ¹ " > < ! ENTITY ordm " º " > < ! ENTITY raquo " » " > < ! ENTITY frac14 " ¼ " > < ! ENTITY frac12 " ½ " > < ! ENTITY half " ½ " > < ! ENTITY frac34 " ¾ " > < ! ENTITY iquest " ¿ " > < ! ENTITY Agrave " À " > < ! ENTITY Aacute " Á " > < ! ENTITY Acirc " Â " > < ! ENTITY Atilde " Ã " > < ! ENTITY Auml " Ä " > < ! ENTITY Aring " Å " > < ! ENTITY angst " Å " > < ! ENTITY AElig " Æ " > < ! ENTITY Ccedil " Ç " > < ! ENTITY Egrave " È " > < ! ENTITY Eacute " É " > < ! ENTITY Ecirc " Ê " > < ! ENTITY Euml " Ë " > < ! ENTITY Igrave " Ì " > < ! ENTITY Iacute " Í " > < ! ENTITY Icirc " Î " > < ! ENTITY Iuml " Ï " > < ! ENTITY ETH " Ð " > < ! ENTITY Ntilde " Ñ " > < ! ENTITY Ograve " Ò " > < ! ENTITY Oacute " Ó " > < ! ENTITY Ocirc " Ô " > < ! ENTITY Otilde " Õ " > < ! ENTITY Ouml " Ö " > < ! ENTITY times " × " > < ! ENTITY Oslash " Ø " > < ! ENTITY Ugrave " Ù " > < ! ENTITY Uacute " Ú " > < ! ENTITY Ucirc " Û " > < ! ENTITY Uuml " Ü " > < ! ENTITY Yacute " Ý " > < ! ENTITY THORN " Þ " > < ! ENTITY szlig " ß " > < ! ENTITY agrave " à " > < ! ENTITY aacute " á " > < ! ENTITY acirc " â " > < ! ENTITY atilde " ã " > < ! ENTITY auml " ä " > < ! ENTITY aring " å " > < ! ENTITY aelig " æ " > < ! ENTITY ccedil " ç " > < ! ENTITY egrave " è " > < ! ENTITY eacute " é " > < ! ENTITY ecirc " ê " > < ! ENTITY euml " ë " > < ! ENTITY igrave " ì " > < ! ENTITY iacute " í " > < ! ENTITY icirc " î " > < ! ENTITY iuml " ï " > < ! ENTITY eth " ð " > < ! ENTITY ntilde " ñ " > < ! ENTITY ograve " ò " > < ! ENTITY oacute " ó " > < ! ENTITY ocirc " ô " > < ! ENTITY otilde " õ " > < ! ENTITY ouml " ö " > < ! ENTITY divide " ÷ " > < ! ENTITY div " ÷ " > < ! ENTITY oslash " ø " > < ! ENTITY ugrave " ù " > < ! ENTITY uacute " ú " > < ! ENTITY ucirc " û " > < ! ENTITY uuml " ü " > < ! ENTITY yacute " ý " > < ! ENTITY thorn " þ " > < ! ENTITY yuml " ÿ " > < ! ENTITY Amacr " Ā " > < ! ENTITY amacr " ā " > < ! ENTITY Abreve " Ă " > < ! ENTITY abreve " ă " > < ! ENTITY Aogon " Ą " > < ! E
) xmlxmlxml " ;
}