fix(html/lexer): Fix lexing of html entity (#4423)

This commit is contained in:
Alexander Akait 2022-04-25 08:34:59 +03:00 committed by GitHub
parent 09565a3533
commit 9b26dbd457
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 3 deletions

View File

@ -0,0 +1,33 @@
<!doctype html>
<html lang="en">
<head>
<title>Document</title>
</head>
<body>
<div id = "John&quot;&amp;Harry">Test</div>
<div id = 'John&quot;&amp;HarryOther'>Test</div>
<div id = 'John&quot;&lt;HarryOtherOther'>Test</div>
<div id = 'John&nbsp;HarryOtherOtherOther'>Test</div>
<div id = 'John<test>'>Test</div>
<div id = 'JohnTest'>Test</div>
<div> Registered Trademark Symbol: </div>
<div>HTML Entities demo: </div>
<p>®</p>
<p>&copy;</p>
<p>&#169;</p>
<p> This Registered Trademark is a Script used in HTML document. </p>
<div>I want to display &lt;br&gt; tag</div>
<div>I want to display &lt;i&gt; tag</div>
<div>The cent sign: &cent;</div>
<div>The cent sign: &#162;</div>
</body>
</html>

View File

@ -0,0 +1,31 @@
<!DOCTYPE html><html lang=en><head>
<title>Document</title>
</head>
<body>
<div id='John"&Harry'>Test</div>
<div id='John"&HarryOther'>Test</div>
<div id='John"<HarryOtherOther'>Test</div>
<div id=John HarryOtherOtherOther>Test</div>
<div id="John<test>">Test</div>
<div id=JohnTest>Test</div>
<div> Registered Trademark Symbol: </div>
<div>HTML Entities demo: </div>
<p>®</p>
<p>©</p>
<p>©</p>
<p> This Registered Trademark is a Script used in HTML document. </p>
<div>I want to display &lt;br&gt; tag</div>
<div>I want to display &lt;i&gt; tag</div>
<div>The cent sign: ¢</div>
<div>The cent sign: ¢</div>
</body></html>

View File

@ -5073,13 +5073,12 @@ where
if let Some(found_entity) = found_entity { if let Some(found_entity) = found_entity {
cur_pos = Some(self.input.cur_pos()); cur_pos = Some(self.input.cur_pos());
entity = Some(found_entity); entity = Some(found_entity);
} }
// We stop when: // We stop when:
// - not ascii alphabetic // - not ascii alphabetic
// - we consume more characters them the longest entity // - we consume more characters than the longest entity
if !c.is_ascii_alphabetic() || temporary_buffer.len() > 33 { if !c.is_ascii_alphabetic() || temporary_buffer.len() > 33 {
if let Some(cur_pos) = cur_pos { if let Some(cur_pos) = cur_pos {
self.input.reset_to(cur_pos); self.input.reset_to(cur_pos);
@ -5138,7 +5137,7 @@ where
// Flush code points consumed as a character reference. Switch to the // Flush code points consumed as a character reference. Switch to the
// return state. // return state.
else { else {
if is_last_semicolon { if !is_last_semicolon {
self.emit_error(ErrorKind::MissingSemicolonAfterCharacterReference); self.emit_error(ErrorKind::MissingSemicolonAfterCharacterReference);
} }