Fix: open <tr> closes hanging <td>

Fixes #5
This commit is contained in:
Dan Neumann 2023-02-11 13:24:55 -06:00
parent cfa2acb7e4
commit b51a29248d
2 changed files with 57 additions and 0 deletions

View File

@ -530,6 +530,10 @@ element cfg =
(if tag == "head" then
notNode cfg [ tag, "body" ]
else if tag == "td" then
-- https://github.com/danneu/html-parser/issues/5
notNode cfg [ tag, "tr" ]
else
notNode cfg [ tag ]
)

View File

@ -162,6 +162,7 @@ basicCommentTests =
, ( "basic4", "<a><!--x--></a>", Ok [ Element "a" [] [ Comment "x" ] ] )
, ( "basic5", "<!--a--><a><!--b--></a><!--c-->", Ok [ Comment "a", Element "a" [] [ Comment "b" ], Comment "c" ] )
, ( "basic6", "<!---->-->", Ok [ Comment "", Text "-->" ] )
, ( "no-nesting", "<!--\nline1\n<!--line2-->\nline3-->", Ok [ Comment "\nline1\n<!--line2", Text "\nline3-->" ] )
]
@ -336,6 +337,49 @@ basicNestingTests =
]
specialAutoclosingTests : Test
specialAutoclosingTests =
describe "special autoclosing cases" <|
testAll
[ ( "body closes head"
, "<head>a<body>b"
, Ok
[ Element "head" [] [ Text "a" ]
, Element "body" [] [ Text "b" ]
]
)
-- https://github.com/danneu/html-parser/issues/5
, ( "tr closes td"
, removeWhitespace """
<table>
<tr>
<td>A1</td>
<td>B1
<tr>
<td>A2</td>
<td>B2
</table>
"""
, Ok
[ Element "table"
[]
[ Element "tr"
[]
[ Element "td" [] [ Text "A1" ]
, Element "td" [] [ Text "B1" ]
]
, Element "tr"
[]
[ Element "td" [] [ Text "A2" ]
, Element "td" [] [ Text "B2" ]
]
]
]
)
]
voidElementTests : Test
voidElementTests =
describe "void elements" <|
@ -809,3 +853,12 @@ jsoupParserTests =
]
)
]
-- UTIL
removeWhitespace : String -> String
removeWhitespace =
String.filter (\c -> c /= ' ' && c /= '\n')