Initial commit

2024-11-22 02:44:04 +03:00 · 2022-05-16 20:28:13 -05:00 · 2022-05-16 20:28:13 -05:00 · f953cc6ee4
commit f953cc6ee4
8 changed files with 6923 additions and 0 deletions
--- a/.prettierrc.json
+++ b/.prettierrc.json
@ -0,0 +1,3 @@
+{
+    "tabWidth": 4
+}
--- a/README.md
+++ b/README.md
@ -0,0 +1,52 @@
+# elm-html-parser
+
+A lenient html5 parser implemented with [Elm](https://elm-lang.org). 
+
+A lenient alternative to [hecrj/elm-html-parser](https://package.elm-lang.org/packages/hecrj/html-parser/latest/).
+
+## Goals
+
+- **Leniency** 
+    - Avoids validating while parsing
+    - Prefers to immitate browser parsing behavior rather than html5 spec.
+    - Prefers to use the html5 spec only to handle ambiguous cases rather than to prohibit invalid html5
+    - Prefers to fall back to text nodes than short-circuit with parse errors
+- **Handle user-written html**
+    - Users don't write character entities like `&amp;` and `&lt;`. This parser should strive to handle cases like `<p><:</p>` -> `Element "p" [] [ Text "<:" ]`. 
+
+## Features / Quirks
+
+- Characters don't need to be escaped into entities. 
+
+  e.g. `<div><:</div>` will parse correctly and doesn't need to be rewritten into `<div>&lt;:</div>`.
+- Tags that should not nest are autoclosed. 
+
+  e.g. `<p>a<p>b` -> `<p>a</p><p>b</p>`.
+- Closing tags that have no matching open tags are ignored. 
+
+  e.g. `</a><div></div></div></b>` -> `<div></div>`
+- Ignores comments in whitespace positions:
+ 
+  e.g. `<div <!--comment-->/>` -> `<div/>`
+- Parses comments in text node positions:
+
+  e.g. `div><!--comment--></div>` -> 
+  `Element "div" [ Comment "comment" ]`
+
+## Differences from existing packages
+
+Currently, there is only one html parser published to Elm packages: [hecrj/elm-html-parser](https://package.elm-lang.org/packages/hecrj/html-parser/latest/).
+
+@hecjr has said that following the html5 spec is a goal of their parser, so their parser is stricter by design and rejects invalid html5.
+
+## Development
+
+`git clone` and `npm install`.
+
+- `npm test` to run tests
+- `npm docs` to preview docs locally
+
+## Special thanks
+
+- @hecrj and their contributors.
+- @ymtszw for their work on the Javascript `<script>` parser.
--- a/elm.json
+++ b/elm.json
@ -0,0 +1,20 @@
+{
+    "type": "package",
+    "name": "danneu/html-parser",
+    "summary": "TODO",
+    "license": "MIT",
+    "version": "1.0.0",
+    "exposed-modules": [
+        "Html.Parser"
+    ],
+    "elm-version": "0.19.1 <= v < 0.20.0",
+    "dependencies": {
+        "elm/core": "1.0.5 <= v < 2.0.0",
+        "elm/html": "1.0.0 <= v < 2.0.0",
+        "elm/parser": "1.1.0 <= v < 2.0.0",
+        "rtfeldman/elm-hex": "1.0.0 <= v < 2.0.0"
+    },
+    "test-dependencies": {
+        "elm-explorations/test": "1.2.2 <= v < 2.0.0"
+    }
+}
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -0,0 +1,11 @@
+{
+    "private": true,
+    "devDependencies": {
+        "elm-doc-preview": "^5.0.5",
+        "elm-test": "^0.19.1-revision7"
+    },
+    "scripts": {
+        "test": "elm-test",
+        "docs": "elm-doc-preview"
+    }
+}
--- a/src/Html/CharRefs.elm
+++ b/src/Html/CharRefs.elm
--- a/src/Html/Parser.elm
+++ b/src/Html/Parser.elm
@ -0,0 +1,957 @@
+module Html.Parser exposing
+    ( Node(..), Document
+    , run, runDocument
+    , nodeToHtml, nodesToHtml, nodeToString, nodesToString, nodeToPrettyString, nodesToPrettyString, documentToString, documentToPrettyString
+    )
+
+{-| Leniently parse html5 documents and fragments and then render them
+into strings or Elm's virtual dom nodes.
+
+
+# Definition
+
+@docs Node, Document
+
+
+# Parsing
+
+@docs run, runDocument
+
+
+# Render
+
+@docs nodeToHtml, nodesToHtml, nodeToString, nodesToString, nodeToPrettyString, nodesToPrettyString, documentToString, documentToPrettyString
+
+-}
+
+import Hex
+import Html
+import Html.Attributes
+import Html.CharRefs
+import Parser exposing (..)
+
+
+{-| An html node is tree of text, comments, and element nodes.
+
+An element (e.g. `<div foo="bar">hello</div>`) can have attributes and child nodes.
+
+-}
+type Node
+    = Text String
+    | Comment String
+    | Element String (List ( String, String )) (List Node)
+
+
+{-| Parse an html fragment into a list of html nodes.
+
+The html fragment can have multiple top-level nodes.
+
+    run "<div>hi</div><div>bye</div>"
+        == Ok
+            [ Element "div" [] [ Text "hi" ]
+            , Element "div" [] [ Text "bye" ]
+            ]
+
+-}
+run : String -> Result (List DeadEnd) (List Node)
+run input =
+    Parser.run parseAll input
+
+
+{-| An html document has a `<!doctype>` and then a root html node.
+-}
+type alias Document =
+    { legacyCompat : Bool
+    , root : Node
+    }
+
+
+{-| Like `Parser.token` except token is matched case-insensitive.
+-}
+caseInsensitiveToken : String -> Parser ()
+caseInsensitiveToken string =
+    let
+        help : String -> Parser.Parser () -> Parser.Parser ()
+        help string_ parser =
+            case String.uncons string_ of
+                Nothing ->
+                    parser
+
+                Just ( char, rest ) ->
+                    parser
+                        |> Parser.andThen
+                            (\_ ->
+                                oneOf
+                                    [ chompIf (\c -> Char.toLower c == Char.toLower char)
+                                    , problem ("expected case-insensitive char '" ++ String.fromChar char ++ "'")
+                                    ]
+                            )
+                        |> help rest
+    in
+    help string (succeed ())
+
+
+doctypeLegacy : Parser Bool
+doctypeLegacy =
+    -- https://html.spec.whatwg.org/multipage/syntax.html#doctype-legacy-string
+    (succeed identity
+        |. chompOneOrMore isSpace
+        |. caseInsensitiveToken "SYSTEM"
+        |. chompOneOrMore isSpace
+        |= (oneOf
+                [ token "\""
+                , token "'"
+                ]
+                |> getChompedString
+           )
+    )
+        |> andThen
+            (\quote ->
+                succeed ()
+                    |. token "about:legacy-compat"
+                    |. token quote
+            )
+        |> andThen (\_ -> succeed True)
+
+
+doctype : Parser Bool
+doctype =
+    -- https://html.spec.whatwg.org/multipage/syntax.html#the-doctype
+    succeed identity
+        |. token "<!"
+        |. caseInsensitiveToken "DOCTYPE"
+        |. chompOneOrMore isSpace
+        |. caseInsensitiveToken "html"
+        |= oneOf
+            [ backtrackable doctypeLegacy
+            , succeed False
+            ]
+        |. chompWhile isSpace
+        |. token ">"
+
+
+{-| Parses `<!doctype html>` and any html nodes after.
+
+Always returns a single root node. Wraps nodes in a root `<html>` node if one is not present.
+
+**Caveat**: If there are multiple top-level nodes and one of them is `<html>`, then this
+function will wrap them all in another `<html>` node.
+
+-}
+runDocument : String -> Result (List DeadEnd) Document
+runDocument input =
+    Parser.run document input
+
+
+document : Parser Document
+document =
+    succeed Document
+        |= doctype
+        |. ws
+        |= (zeroOrMore node
+                |> map
+                    (\nodes ->
+                        case nodes of
+                            [] ->
+                                Element "html" [] []
+
+                            ((Element "html" _ _) as root) :: [] ->
+                                root
+
+                            other :: [] ->
+                                Element "html" [] [ other ]
+
+                            _ ->
+                                Element "html" [] nodes
+                    )
+           )
+
+
+parseAll : Parser (List Node)
+parseAll =
+    Parser.loop [] <|
+        \acc ->
+            oneOf
+                [ node |> map (\n -> Loop (mergeText n acc))
+                , succeed () |> map (\_ -> Done (List.reverse acc))
+                ]
+
+
+mergeText : Node -> List Node -> List Node
+mergeText n nodes =
+    case ( n, nodes ) of
+        ( Text s, (Text prev) :: rest ) ->
+            Text (prev ++ s) :: rest
+
+        _ ->
+            n :: nodes
+
+
+{-| Chomps zero or more space characters or html comments.
+-}
+ws =
+    loop 0 <|
+        ifProgress <|
+            oneOf
+                [ multiComment "<!--" "-->" Nestable
+                , chompWhile isSpace
+                ]
+
+
+isSpace : Char -> Bool
+isSpace c =
+    c == ' ' || c == '\n' || c == '\u{000D}' || c == '\n' || c == '\t' || c == '\u{000C}' || c == '\u{00A0}'
+
+
+
+-- ATTRIBUTES
+
+
+attributeValueUnquoted : Parser String
+attributeValueUnquoted =
+    let
+        isUnquotedValueChar c =
+            not (isSpace c) && c /= '"' && c /= '\'' && c /= '=' && c /= '<' && c /= '>' && c /= '`' && c /= '&'
+    in
+    oneOf
+        [ chompOneOrMore isUnquotedValueChar
+            |> getChompedString
+        , characterReference
+        ]
+        |> oneOrMore "attribute value"
+        |> map (String.join "")
+
+
+attributeValueQuoted : Char -> Parser String
+attributeValueQuoted quote =
+    let
+        isQuotedValueChar c =
+            c /= quote && c /= '&'
+    in
+    Parser.succeed identity
+        |. chompIf ((==) quote)
+        |= (oneOf
+                [ chompOneOrMore isQuotedValueChar
+                    |> getChompedString
+                , characterReference
+                ]
+                |> zeroOrMore
+                |> map (String.join "")
+           )
+        |. chompIf ((==) quote)
+
+
+attributeKey : Parser String
+attributeKey =
+    let
+        isKeyChar c =
+            not (isSpace c) && c /= '"' && c /= '\'' && c /= '>' && c /= '/' && c /= '='
+    in
+    succeed (++)
+        -- Attribute can start with '/' but it's ignored
+        |. oneOf
+            [ -- backtrackable because open tag can end with "/>"
+              backtrackable (chompIf ((==) '/'))
+            , succeed ()
+            ]
+        -- Attribute name can start with '=': https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+        -- e.g. <a =empty />
+        |= oneOf
+            [ chompIf ((==) '=') |> map (\_ -> "=")
+            , succeed ""
+            ]
+        |= (chompOneOrMore isKeyChar
+                |> getChompedString
+                |> map String.toLower
+           )
+
+
+attribute : Parser ( String, String )
+attribute =
+    succeed Tuple.pair
+        |= attributeKey
+        |. ws
+        |= oneOf
+            [ succeed identity
+                |. symbol "="
+                |. ws
+                |= oneOf
+                    [ attributeValueUnquoted -- <div foo=bar>
+                    , attributeValueQuoted '"' -- <div foo="bar">
+                    , attributeValueQuoted '\'' -- <div foo='bar'>
+                    ]
+            , succeed "" -- <div foo>
+            ]
+        -- Reminder: Consume trailing whitespace so that following parsers don't need to consume whitespace
+        -- and then need to backtrack
+        |. ws
+
+
+tagName : Parser String
+tagName =
+    chompOneOrMore (\c -> Char.isAlphaNum c || c == '-')
+        |> getChompedString
+        |> map String.toLower
+
+
+closeTag : String -> Parser ()
+closeTag expectedTag =
+    (succeed identity
+        |. token "</"
+        |= tagName
+        |. ws
+        |. token ">"
+    )
+        |> andThen
+            (\tag ->
+                if tag == expectedTag then
+                    succeed ()
+
+                else
+                    problem ("found closing tag </" ++ tag ++ "> but expected </" ++ expectedTag ++ ">")
+            )
+
+
+type OpenTagEnd
+    = NoClose
+    | SelfClose
+
+
+anyCloseTag : Parser ()
+anyCloseTag =
+    succeed ()
+        |. token "</"
+        |. tagName
+        |. ws
+        |. token ">"
+
+
+node : Parser Node
+node =
+    succeed identity
+        -- HACK: Ignore unmatched close tags like the browser does
+        |. zeroOrMore (backtrackable anyCloseTag)
+        |= oneOf
+            [ text
+            , comment
+            , backtrackable element
+            , justOneChar |> map Text
+            ]
+
+
+comment : Parser Node
+comment =
+    succeed Comment
+        |. symbol "<!--"
+        |= (chompUntil "-->" |> getChompedString)
+        |. symbol "-->"
+
+
+text : Parser Node
+text =
+    oneOf
+        [ succeed Text
+            |= backtrackable characterReference
+        , succeed Text
+            |= (chompOneOrMore (\c -> c /= '<' && c /= '&') |> getChompedString)
+        ]
+
+
+{-| Parse any node unless it's one of the given tags.
+-}
+notNode : List String -> Parser Node
+notNode tags =
+    oneOf
+        [ lookAhead
+            (openTag
+                |> andThen
+                    (\( tag, _, _ ) ->
+                        if List.member tag tags then
+                            problem ""
+
+                        else
+                            succeed ()
+                    )
+            )
+            |> andThen (\_ -> element)
+        , text
+        , comment
+        ]
+
+
+openTag : Parser ( String, List ( String, String ), OpenTagEnd )
+openTag =
+    succeed (\a b c -> ( a, b, c ))
+        |. symbol "<"
+        |. ws
+        |= tagName
+        |. ws
+        |= zeroOrMore attribute
+        |. ws
+        |= oneOf
+            [ succeed NoClose
+                |. symbol ">"
+            , succeed SelfClose
+                |. symbol "/>"
+            ]
+
+
+element : Parser Node
+element =
+    openTag
+        |> andThen
+            (\( tag, attrs, end ) ->
+                case end of
+                    SelfClose ->
+                        succeed (Element tag attrs [])
+
+                    NoClose ->
+                        if tag == "script" then
+                            succeed (Element tag attrs)
+                                |= consumeJavascriptUntilClosingTag
+
+                        else if isVoidTag tag then
+                            -- Void element expects no closing tag
+                            succeed (Element tag attrs [])
+
+                        else if isAutoclosingTag tag then
+                            -- Autoclosing tag is automatically closed by an opening tag of the same name
+                            succeed (Element tag attrs)
+                                |= oneOf
+                                    [ succeed identity
+                                        |= zeroOrMore
+                                            (if tag == "head" then
+                                                notNode [ tag, "body" ]
+
+                                             else
+                                                notNode [ tag ]
+                                            )
+                                        |. oneOf
+                                            [ backtrackable (closeTag tag)
+                                            , succeed ()
+                                            ]
+                                    ]
+
+                        else
+                            -- Normal elements parse all nodes as children until their closing tag
+                            succeed (Element tag attrs)
+                                |= (loop [] <|
+                                        \acc ->
+                                            oneOf
+                                                [ backtrackable (closeTag tag) |> map (\_ -> Done (List.reverse acc))
+                                                , succeed (\n -> Loop (mergeText n acc))
+                                                    |= backtrackable node
+                                                , succeed () |> map (\_ -> Done (List.reverse acc))
+                                                ]
+                                   )
+            )
+
+
+
+-- CHARACTER REFERENCE
+
+
+{-| Parse one or more hexadecimal digits into an integer.
+-}
+base16 : Parser Int
+base16 =
+    chompOneOrMore Char.isHexDigit
+        |> getChompedString
+        |> andThen
+            (\hex ->
+                case Hex.fromString (String.toLower hex) of
+                    Ok num ->
+                        succeed num
+
+                    Err msg ->
+                        problem msg
+            )
+
+
+{-| Parse one or more 0-9 digits into an integer.
+-}
+base10 : Parser Int
+base10 =
+    chompOneOrMore Char.isDigit
+        |> getChompedString
+        |> andThen
+            (\digits ->
+                String.toInt digits
+                    |> Maybe.map succeed
+                    |> Maybe.withDefault (problem "bad number")
+            )
+
+
+numericCharacterReference : Parser String
+numericCharacterReference =
+    let
+        codepoint =
+            oneOf
+                [ succeed identity
+                    |. chompIf (\c -> c == 'x' || c == 'X')
+                    |= base16
+                , succeed identity
+                    |= base10
+                ]
+    in
+    succeed identity
+        |. chompIf ((==) '#')
+        |= (codepoint
+                |> andThen
+                    (\code ->
+                        -- https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
+                        if code == 0 then
+                            succeed '<EFBFBD>'
+
+                        else if 0xD800 <= code && code <= 0xDFFF then
+                            -- Is surrogate
+                            succeed '<EFBFBD>'
+
+                        else
+                            succeed (Char.fromCode code)
+                    )
+                |> map String.fromChar
+           )
+
+
+namedCharacterReference : Parser String
+namedCharacterReference =
+    chompOneOrMore Char.isAlpha
+        |> getChompedString
+        |> map
+            (\ref ->
+                Html.CharRefs.decode ref
+                    |> Maybe.withDefault ("&" ++ ref ++ ";")
+            )
+
+
+characterReference : Parser String
+characterReference =
+    succeed identity
+        |. chompIf ((==) '&')
+        |= oneOf
+            [ backtrackable numericCharacterReference
+                |. chompIf ((==) ';')
+            , backtrackable namedCharacterReference
+                |. chompIf ((==) ';')
+            , succeed "&"
+            ]
+
+
+
+-- SPECIAL ELEMENTS
+
+
+isVoidTag : String -> Bool
+isVoidTag tag =
+    List.member tag voidTags
+
+
+voidTags : List String
+voidTags =
+    [ "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "source", "track", "wbr" ]
+
+
+isAutoclosingTag : String -> Bool
+isAutoclosingTag tag =
+    List.member tag autoclosingTags
+
+
+autoclosingTags : List String
+autoclosingTags =
+    [ "body", "colgroup", "dd", "dt", "head", "html", "li", "option", "p", "tbody", "td", "tfoot", "th", "thead", "tr" ]
+
+
+
+-- HELPERS
+
+
+chompOneOrMore : (Char -> Bool) -> Parser ()
+chompOneOrMore predicate =
+    Parser.chompIf predicate
+        |. Parser.chompWhile predicate
+
+
+{-| Loop a parser only if it actually consumes something.
+
+For example, parsers like `spaces` and `chompWhile` will happily
+consume 0 input, so when put in a loop the parser will never terminate.
+
+-}
+ifProgress : Parser a -> Int -> Parser (Step Int ())
+ifProgress parser offset =
+    succeed identity
+        |. parser
+        |= getOffset
+        |> map
+            (\newOffset ->
+                if offset == newOffset then
+                    Done ()
+
+                else
+                    Loop newOffset
+            )
+
+
+zeroOrMore : Parser a -> Parser (List a)
+zeroOrMore parser =
+    Parser.loop []
+        (\acc ->
+            oneOf
+                [ succeed (\val -> Loop (val :: acc))
+                    |= parser
+                , succeed (Done (List.reverse acc))
+                ]
+        )
+
+
+oneOrMore : String -> Parser a -> Parser (List a)
+oneOrMore name parser =
+    Parser.loop []
+        (\acc ->
+            oneOf
+                [ succeed (\val -> Loop (val :: acc))
+                    |= parser
+                , if List.isEmpty acc then
+                    problem ("expecting at least one " ++ name)
+
+                  else
+                    succeed (Done (List.reverse acc))
+                ]
+        )
+
+
+{-| Create a parser that backtracks on success.
+-}
+lookAhead : Parser a -> Parser ()
+lookAhead parser =
+    oneOf
+        [ oneOf
+            [ parser
+                |> backtrackable
+                |> andThen (\_ -> commit ())
+                |> andThen (\_ -> problem "")
+            , succeed
+                (parser
+                    |> backtrackable
+                    |> map (\_ -> ())
+                )
+            ]
+            |> backtrackable
+        , succeed (succeed ())
+        ]
+        |> andThen identity
+
+
+
+-- JAVASCRIPT / <script>
+
+
+{-| Chomp inside a <script> tag until the next </script>.
+
+This can't be implemented as `chompUntil "</script>"` because
+the Javascript inside the script tag may contain the string "</script>".
+
+For example: "<script>alert('</script>')</script>"
+
+-}
+consumeJavascriptUntilClosingTag : Parser (List Node)
+consumeJavascriptUntilClosingTag =
+    Parser.loop [] <|
+        \acc ->
+            let
+                accumulate newNode =
+                    case ( acc, newNode ) of
+                        ( [], first ) ->
+                            Loop [ first ]
+
+                        ( (Text accChunk) :: tail, Text newChunk ) ->
+                            -- Merge top-most text node unless HTML comment nodes are interleaved
+                            Loop (Text (accChunk ++ newChunk) :: tail)
+
+                        ( nonTextNode :: tail, _ ) ->
+                            Loop (newNode :: nonTextNode :: tail)
+            in
+            Parser.oneOf
+                [ -- HTML comments are, albeit considered a bad practice recently,
+                  -- allowed inside <script> to hide scripts from really ancient web browser
+                  comment
+                    |> map accumulate
+                , lineComment "//"
+                    |> Parser.getChompedString
+                    |> Parser.map (Text >> accumulate)
+                , Parser.multiComment "/*" "*/" Parser.NotNestable
+                    |> Parser.getChompedString
+                    |> Parser.map (Text >> accumulate)
+                , javaScriptStringLike '"'
+                    |> Parser.map (Text >> accumulate)
+                , javaScriptStringLike '\''
+                    |> Parser.map (Text >> accumulate)
+                , javaScriptStringLike '`'
+                    |> Parser.map (Text >> accumulate)
+                , closeTag "script"
+                    |> Parser.map (\() -> Done (List.reverse acc))
+                , Parser.chompIf (always True)
+                    |> Parser.getChompedString
+                    |> Parser.map (Text >> accumulate)
+                ]
+
+
+javaScriptStringLike : Char -> Parser String
+javaScriptStringLike terminatorChar =
+    let
+        terminatorStr =
+            String.fromChar terminatorChar
+    in
+    Parser.succeed identity
+        |. Parser.token terminatorStr
+        |= Parser.loop "" (stringHelp terminatorChar terminatorStr)
+        -- Restoring original shape
+        |> Parser.map (\chunk -> terminatorStr ++ chunk ++ terminatorStr)
+
+
+stringHelp : Char -> String -> String -> Parser (Parser.Step String String)
+stringHelp terminatorChar terminatorStr acc =
+    Parser.oneOf
+        [ Parser.succeed (\char -> Parser.Loop (acc ++ "\\" ++ char))
+            |. Parser.token "\\"
+            |= justOneChar
+        , Parser.token terminatorStr
+            |> Parser.map (\_ -> Parser.Done acc)
+
+        -- Orig code caused infinite loop with single terminator char <script>'</script>
+        -- , Parser.chompWhile (\char -> char /= '\\' && char /= terminatorChar)
+        , chompOneOrMore (\char -> char /= '\\' && char /= terminatorChar)
+            |> Parser.getChompedString
+            |> Parser.map (\chunk -> Parser.Loop (acc ++ chunk))
+        ]
+
+
+justOneChar : Parser String
+justOneChar =
+    chompIf (always True)
+        |> getChompedString
+
+
+
+-- RENDER
+
+
+openTagToString : String -> List ( String, String ) -> String
+openTagToString tag attrs =
+    "<"
+        ++ tag
+        ++ (if List.isEmpty attrs then
+                ""
+
+            else
+                List.map
+                    (\( k, v ) ->
+                        if String.isEmpty v then
+                            k
+
+                        else
+                            k ++ "=\"" ++ v ++ "\""
+                    )
+                    attrs
+                    |> String.join " "
+           )
+        ++ ">"
+
+
+{-| Convert an html node into a non-pretty string.
+
+    nodeToString (Element "a" [] [ Text "hi" ])
+        == "<a>hi</a>"
+
+-}
+nodeToString : Node -> String
+nodeToString node_ =
+    case node_ of
+        Text s ->
+            s
+
+        Comment s ->
+            "<!--" ++ s ++ "-->"
+
+        Element tag attrs kids ->
+            if isVoidTag tag && List.isEmpty kids then
+                openTagToString tag attrs
+
+            else
+                openTagToString tag attrs
+                    ++ (List.map nodeToString kids
+                            |> String.join ""
+                       )
+                    ++ "</"
+                    ++ tag
+                    ++ ">"
+
+
+{-| Convert multiple html nodes into a non-pretty string.
+
+    nodesToString
+        [ Element "a" [] [ Text "hi" ]
+        , Element "div" [] [ Element "span" [] [] ]
+        ]
+        == "<a>hi</a><div><span></span></div>"
+
+-}
+nodesToString : List Node -> String
+nodesToString nodes =
+    List.map nodeToString nodes
+        |> String.join ""
+
+
+{-| Turn a single node into an Elm html node that Elm can render.
+-}
+nodeToHtml : Node -> Html.Html msg
+nodeToHtml node_ =
+    case node_ of
+        Text s ->
+            Html.text s
+
+        Comment _ ->
+            Html.text ""
+
+        Element tag attrs kids ->
+            Html.node tag
+                (List.map (\( k, v ) -> Html.Attributes.attribute k v) attrs)
+                (List.map nodeToHtml kids)
+
+
+{-| Turn a multiple html nodes into Elm html that Elm can render.
+
+    view : Html Msg
+    view =
+        Html.div
+            []
+            ("<p>hello world</p>"
+                |> Html.Parser.run
+                |> Result.map Html.Parser.nodesToHtml
+                |> Result.withDefault [ Html.text "parse error" ]
+            )
+
+-}
+nodesToHtml : List Node -> List (Html.Html msg)
+nodesToHtml nodes =
+    List.foldr
+        (\node_ acc ->
+            nodeToHtml node_ :: acc
+        )
+        []
+        nodes
+
+
+{-| Generate a pretty string for a single html node.
+-}
+nodeToPrettyString : Node -> String
+nodeToPrettyString node_ =
+    prettyNode_ 0 node_
+
+
+prettyNode_ : Int -> Node -> String
+prettyNode_ indent node_ =
+    let
+        pad =
+            String.padLeft (indent * 4) ' ' ""
+    in
+    case node_ of
+        Text s ->
+            if String.isEmpty (String.trim s) then
+                ""
+
+            else
+                let
+                    ss =
+                        s
+                            |> String.split "\n"
+                            |> List.filter (String.trim >> String.isEmpty >> not)
+                            |> String.join ("\n" ++ String.padLeft (indent * 4) ' ' "")
+                in
+                -- "\n" ++ pad ++ s
+                "\n" ++ pad ++ ss
+
+        Comment s ->
+            if String.isEmpty s then
+                ""
+
+            else
+                "\n" ++ pad ++ "<!--" ++ s ++ "-->"
+
+        Element tag attrs kids ->
+            String.join ""
+                (List.concat
+                    [ [ "\n" ++ pad ++ openTagToString tag attrs ]
+                    , List.map (prettyNode_ (indent + 1)) kids
+                    , [ if List.isEmpty kids then
+                            ""
+
+                        else
+                            "\n"
+                      ]
+                    , [ (if List.isEmpty kids then
+                            ""
+
+                         else
+                            pad
+                        )
+                            ++ (if isVoidTag tag && List.isEmpty kids then
+                                    ""
+
+                                else
+                                    "</"
+                                        ++ tag
+                                        ++ ">"
+                               )
+                      ]
+                    ]
+                )
+
+
+{-| Turn a node tree into a pretty-printed, indented html string.
+
+    ("<a><b><c>hello</c></b></a>"
+        |> Html.Parser.run
+        |> Result.map nodesToPrettyString
+    )
+        == Ok """<a>
+        <b>
+            <c>
+                hello
+            </c>
+        </b>
+    </a>"""
+
+-}
+nodesToPrettyString : List Node -> String
+nodesToPrettyString nodes =
+    List.foldl
+        (\node_ acc ->
+            acc ++ nodeToPrettyString node_
+        )
+        ""
+        nodes
+
+
+doctypeToString : Bool -> String
+doctypeToString legacyCompat =
+    if legacyCompat then
+        "<!DOCTYPE html SYSTEM \"about:legacy-compat\">"
+
+    else
+        "<!DOCTYPE html>"
+
+
+{-| Convert a document into a string starting with `<!doctype html>` followed by the root html node.
+-}
+documentToString : Document -> String
+documentToString doc =
+    doctypeToString doc.legacyCompat ++ "\n" ++ nodeToString doc.root
+
+
+{-| Convert a document into a pretty, indented string.
+-}
+documentToPrettyString : Document -> String
+documentToPrettyString doc =
+    doctypeToString doc.legacyCompat ++ "\n" ++ nodeToPrettyString doc.root
--- a/tests/ParserTests.elm
+++ b/tests/ParserTests.elm
@ -0,0 +1,693 @@
+module ParserTests exposing (..)
+
+import Expect exposing (Expectation)
+import Fuzz exposing (Fuzzer, int, list, string)
+import Html.Parser exposing (Document, Node(..))
+import Parser exposing (DeadEnd)
+import Test exposing (..)
+
+
+testDoc : List ( String, String, Result (List DeadEnd) Document ) -> List Test
+testDoc cases =
+    List.map
+        (\( name, html, expected ) ->
+            test (name ++ ": " ++ html)
+                (\_ ->
+                    let
+                        actual =
+                            Html.Parser.runDocument html
+                    in
+                    case expected of
+                        Ok _ ->
+                            Expect.equal actual expected
+
+                        Err _ ->
+                            case actual of
+                                Err _ ->
+                                    Expect.pass
+
+                                _ ->
+                                    Expect.equal actual expected
+                )
+        )
+        cases
+
+
+testStringRoundtrip : List ( String, String, Result (List DeadEnd) String ) -> List Test
+testStringRoundtrip cases =
+    List.map
+        (\( name, html, expected ) ->
+            test (name ++ " \"" ++ html ++ "\"")
+                (\_ ->
+                    let
+                        actual =
+                            Html.Parser.run html
+                                |> Result.map Html.Parser.nodesToString
+                    in
+                    case expected of
+                        Err _ ->
+                            case actual of
+                                Err _ ->
+                                    Expect.pass
+
+                                _ ->
+                                    Expect.equal actual expected
+
+                        Ok _ ->
+                            Expect.equal actual expected
+                )
+        )
+        cases
+
+
+testAll : List ( String, String, Result (List DeadEnd) (List Node) ) -> List Test
+testAll cases =
+    List.map
+        (\( name, html, expected ) ->
+            test (name ++ " \"" ++ html ++ "\"")
+                (\_ ->
+                    let
+                        actual =
+                            Html.Parser.run html
+                    in
+                    case expected of
+                        Err _ ->
+                            case actual of
+                                Err _ ->
+                                    Expect.pass
+
+                                _ ->
+                                    Expect.equal actual expected
+
+                        Ok _ ->
+                            Expect.equal actual expected
+                )
+        )
+        cases
+
+
+renderStringTests =
+    describe "stringify tests" <|
+        testStringRoundtrip
+            [ ( "basic", "<a></a>", Ok "<a></a>" )
+            , ( "basic", "<a>foo</a>", Ok "<a>foo</a>" )
+            , ( "basic", "<a> foo </a>", Ok "<a> foo </a>" )
+            , ( "basic", "<a><b><c>foo</c></b></a>", Ok "<a><b><c>foo</c></b></a>" )
+            , ( "basic", "<A><B><C>foo</C></B></A>", Ok "<a><b><c>foo</c></b></a>" )
+            , ( "basic", "<a><!--a-->b<!--c--></a>", Ok "<a><!--a-->b<!--c--></a>" )
+            ]
+
+
+ambiguousTextTests =
+    describe "ambiguous text node parsing" <|
+        testAll
+            [ ( "basic1", "<div>:></div>", Ok [ Element "div" [] [ Text ":>" ] ] )
+            , ( "basic2", "<div><:</div>", Ok [ Element "div" [] [ Text "<:" ] ] )
+            , ( "basic3", "<:", Ok [ Text "<:" ] )
+            , ( "basic4", ":>", Ok [ Text ":>" ] )
+            ]
+
+
+voidTests =
+    describe "void nodes" <|
+        testAll
+            [ ( "without closing tag", "<hr>a", Ok [ Element "hr" [] [], Text "a" ] )
+            , ( "with closing tag", "<hr>a</hr>", Ok [ Element "hr" [] [], Text "a" ] )
+            ]
+
+
+documentTests =
+    describe "document parsing" <|
+        testDoc
+            [ ( "", "<!doctype html>", Ok (Document False (Element "html" [] [])) )
+            , ( "", "<!DOCTYPE HTML>", Ok (Document False (Element "html" [] [])) )
+            , ( "", "<!doctype htmlSYSTEM \"about:legacy-compat\">", Err [] )
+            , ( "", "<!doctype html SYSTEM \"about:legacy-compat\">", Ok (Document True (Element "html" [] [])) )
+            , ( "", "<!doctype html sYsTem 'about:legacy-compat'>", Ok (Document True (Element "html" [] [])) )
+            , ( ""
+              , "<!doctype html><head>a<body>b"
+              , Ok
+                    (Document False
+                        (Element "html"
+                            []
+                            [ Element "head" [] [ Text "a" ]
+                            , Element "body" [] [ Text "b" ]
+                            ]
+                        )
+                    )
+              )
+            , ( ""
+              , "<!doctype html><meta charset=\"utf-8\">"
+              , Ok
+                    (Document False
+                        (Element "html"
+                            []
+                            [ Element "meta" [ ( "charset", "utf-8" ) ] []
+                            ]
+                        )
+                    )
+              )
+            ]
+
+
+basicCommentTests =
+    describe "basic comment parsing" <|
+        testAll
+            [ ( "basic1", "<!---->", Ok [ Comment "" ] )
+            , ( "basic2", "<!-- -->", Ok [ Comment " " ] )
+            , ( "basic3", "<!--x-->", Ok [ Comment "x" ] )
+            , ( "basic4", "<a><!--x--></a>", Ok [ Element "a" [] [ Comment "x" ] ] )
+            , ( "basic5", "<!--a--><a><!--b--></a><!--c-->", Ok [ Comment "a", Element "a" [] [ Comment "b" ], Comment "c" ] )
+            , ( "basic6", "<!---->-->", Ok [ Comment "", Text "-->" ] )
+            ]
+
+
+basicElementTests =
+    describe "basic element parsing" <|
+        testAll
+            [ ( "my-basic1", "<a>:></a>", Ok [ Element "a" [] [ Text ":>" ] ] )
+
+            -- , ( "my-basic2", "<a><:</a>", Ok [ Element "a" [] [ Text "<:" ] ] )
+            -- Tests from hecrj/elm-html-parser
+            , ( "basic1", "<a></a>", Ok [ Element "a" [] [] ] )
+            , ( "basic2", "<a></a >", Ok [ Element "a" [] [] ] )
+            , ( "basic3", "<A></A >", Ok [ Element "a" [] [] ] )
+            , ( "basic4", " <a></a> ", Ok [ Text " ", Element "a" [] [], Text " " ] )
+            , ( "basic5", "a<a></a>b", Ok [ Text "a", Element "a" [] [], Text "b" ] )
+            , ( "basic6", "<A></A>", Ok [ Element "a" [] [] ] )
+            ]
+
+
+basicAttributeTests =
+    describe "basic attribute parsing" <|
+        testAll
+            [ ( "unquoted1", "<div a=b/></div>", Ok [ Element "div" [ ( "a", "b/" ) ] [] ] )
+            , ( "unquoted2", "<div a=b />", Ok [ Element "div" [ ( "a", "b" ) ] [] ] )
+            , ( "single-quoted", "<div a='b'/>", Ok [ Element "div" [ ( "a", "b" ) ] [] ] )
+            , ( "double-quoted", "<div a=\"b\"/>", Ok [ Element "div" [ ( "a", "b" ) ] [] ] )
+            , ( "key-only1", "<div a></div>", Ok [ Element "div" [ ( "a", "" ) ] [] ] )
+            , ( "key-only2", "<div a/>", Ok [ Element "div" [ ( "a", "" ) ] [] ] )
+            , ( "everything"
+              , "<div a=b c='d' e=\"f\" g/>"
+              , Ok
+                    [ Element "div"
+                        [ ( "a", "b" )
+                        , ( "c", "d" )
+                        , ( "e", "f" )
+                        , ( "g", "" )
+                        ]
+                        []
+                    ]
+              )
+            ]
+
+
+autoclosingTests =
+    describe "autoclosing elements" <|
+        testAll
+            [ ( "p-basic1", "<p>a<p>b", Ok [ Element "p" [] [ Text "a" ], Element "p" [] [ Text "b" ] ] )
+            , ( "li-basic", "<li><li>", Ok [ Element "li" [] [], Element "li" [] [] ] )
+            , ( "li-basic", "<ul><li><li></ul>", Ok [ Element "ul" [] [ Element "li" [] [], Element "li" [] [] ] ] )
+            , ( "li-basic", "<li>a<li>b</li>", Ok [ Element "li" [] [ Text "a" ], Element "li" [] [ Text "b" ] ] )
+            , ( "li-comment"
+              , "<li>a<!--c--><li>b<!--d--></li>"
+              , Ok
+                    [ Element "li" [] [ Text "a", Comment "c" ]
+                    , Element "li" [] [ Text "b", Comment "d" ]
+                    ]
+              )
+
+            -- TODO
+            -- , ( "li-comment-backtrack"
+            --   , "<li>a<!-c<li>b<!-d</li>"
+            --   , Ok
+            --         [ Element "li" [] [ Text "a<!-c" ]
+            --         , Element "li" [] [ Text "b<!-d" ]
+            --         ]
+            --   )
+            , ( "li-basic", "<li>a</li><li>b", Ok [ Element "li" [] [ Text "a" ], Element "li" [] [ Text "b" ] ] )
+            , ( "li-basic1", "<li>a</li><li>b</li>", Ok [ Element "li" [] [ Text "a" ], Element "li" [] [ Text "b" ] ] )
+            , ( "li-basic2"
+              , "<li>a<li>b</li>c</li>"
+              , Ok
+                    [ Element "li" [] [ Text "a" ]
+                    , Element "li" [] [ Text "b" ]
+                    , Text "c"
+                    ]
+              )
+            , ( "li-basic3"
+              , "<li>a<ul><li>b</li></ul>c</li>"
+              , Ok
+                    [ Element "li"
+                        []
+                        [ Text "a"
+                        , Element "ul"
+                            []
+                            [ Element "li" [] [ Text "b" ]
+                            ]
+                        , Text "c"
+                        ]
+                    ]
+              )
+
+            -- Not valid html, but the parser should still parse it.
+            , ( "head1"
+              , "<head>a<head>b"
+              , Ok
+                    [ Element "head" [] [ Text "a" ]
+                    , Element "head" [] [ Text "b" ]
+                    ]
+              )
+
+            -- Unlike the previous test, here's an example of where the parser must invoke the html5
+            -- spec only to disambiguate where <body> should be a child vs. sibling
+            -- of the unended <head> element.
+            , ( "head2"
+              , "<head><title>hello</title><body>"
+              , Ok
+                    [ Element "head"
+                        []
+                        [ Element "title" [] [ Text "hello" ] ]
+                    , Element "body" [] []
+                    ]
+              )
+            ]
+
+
+basicNestingTests =
+    describe "nested elements" <|
+        testAll
+            [ ( "abc"
+              , "<a><b><c></c></b></a>"
+              , Ok
+                    [ Element "a"
+                        []
+                        [ Element "b"
+                            []
+                            [ Element "c" [] []
+                            ]
+                        ]
+                    ]
+              )
+            , ( "nested <ul> where all <li> are closed </li>"
+              , """<ul><li>a</li><li>b<ul><li>x</li><li>y</li></ul></li><li>c</li></ul>"""
+              , Ok
+                    [ Element "ul"
+                        []
+                        [ Element "li" [] [ Text "a" ]
+                        , Element "li"
+                            []
+                            [ Text "b"
+                            , Element "ul"
+                                []
+                                [ Element "li" [] [ Text "x" ]
+                                , Element "li" [] [ Text "y" ]
+                                ]
+                            ]
+                        , Element "li" [] [ Text "c" ]
+                        ]
+                    ]
+              )
+            , ( "nested <ul> where zero <li> are closed with </li>"
+              , """<ul><li>a<li>b<ul><li>x<li>y</ul><li>c</ul>"""
+              , Ok
+                    [ Element "ul"
+                        []
+                        [ Element "li" [] [ Text "a" ]
+                        , Element "li"
+                            []
+                            [ Text "b"
+                            , Element "ul"
+                                []
+                                [ Element "li" [] [ Text "x" ]
+                                , Element "li" [] [ Text "y" ]
+                                ]
+                            ]
+                        , Element "li" [] [ Text "c" ]
+                        ]
+                    ]
+              )
+            ]
+
+
+voidElementTests : Test
+voidElementTests =
+    describe "void elements" <|
+        testAll
+            [ ( "invalid", "<hr></hr>", Ok [ Element "hr" [] [] ] )
+            , ( "valid1", "<hr>", Ok [ Element "hr" [] [] ] )
+            , ( "valid2", "<hr/>", Ok [ Element "hr" [] [] ] )
+            ]
+
+
+textNodeTests : Test
+textNodeTests =
+    describe "text node parsing" <|
+        testAll
+            [ ( "empty", "", Ok [] )
+            , ( "space", " ", Ok [ Text " " ] )
+            , ( "basic1", "1", Ok [ Text "1" ] )
+            , ( "basic2", "a", Ok [ Text "a" ] )
+            , ( "basic3", "1a", Ok [ Text "1a" ] )
+            , ( "basic4", "^", Ok [ Text "^" ] )
+            , ( "decode1", "&", Ok [ Text "&" ] )
+            , ( "decode2", "&amp;", Ok [ Text "&" ] )
+            , ( "decode3", "&lt;", Ok [ Text "<" ] )
+            , ( "decode4", "&gt;", Ok [ Text ">" ] )
+            , ( "decode5", "&apos;", Ok [ Text "'" ] )
+            , ( "decode6", "&#38;", Ok [ Text "&" ] )
+            , ( "decode7", "&#x26;", Ok [ Text "&" ] )
+            , ( "decode8", "&#x3E;", Ok [ Text ">" ] )
+            , ( "decode9", "&#383;", Ok [ Text "ſ" ] )
+            , ( "decodeA", "&nbsp;", Ok [ Text "\u{00A0}" ] )
+            , ( "decodeB", "&nbsp;&nbsp;", Ok [ Text "\u{00A0}\u{00A0}" ] )
+            , ( "decodeC", "a&nbsp;b", Ok [ Text "a\u{00A0}b" ] )
+            , ( "decodeD", "a&nbsp;&nbsp;b", Ok [ Text "a\u{00A0}\u{00A0}b" ] )
+            , ( "decodeE", """<img alt="&lt;">""", Ok [ Element "img" [ ( "alt", "<" ) ] [] ] )
+            , ( "decodeF", "&#0038;", Ok [ Text "&" ] )
+            ]
+
+
+scriptTests : Test
+scriptTests =
+    describe "<script> node" <|
+        testAll
+            [ ( "basic1", "<script></script>", Ok [ Element "script" [] [] ] )
+            , ( "basic2", "<script>foo</script>", Ok [ Element "script" [] [ Text "foo" ] ] )
+
+            -- Copy browser behavior
+            , ( "basic3", "<script></script></script>", Ok [ Element "script" [] [] ] )
+            , ( "basic4", "<script><script></script>", Ok [ Element "script" [] [ Text "<script>" ] ] )
+            , ( "attrs1", "<script src=index.js></script>", Ok [ Element "script" [ ( "src", "index.js" ) ] [] ] )
+            , ( "js1", "<script>'</script>'</script>", Ok [ Element "script" [] [ Text "'</script>'" ] ] )
+            , ( "js2", "<script>\"</script>\"</script>", Ok [ Element "script" [] [ Text "\"</script>\"" ] ] )
+            , ( "js3", "<script>`</script>`</script>", Ok [ Element "script" [] [ Text "`</script>`" ] ] )
+            , ( "js4", "<script>x < 42 || x > 42</script>", Ok [ Element "script" [] [ Text "x < 42 || x > 42" ] ] )
+            , ( "comment1", "<script>\n//</script>\n</script>", Ok [ Element "script" [] [ Text "\n//</script>\n" ] ] )
+            , ( "comment2", "<script>\n/*\n</script>\n*/\n</script>", Ok [ Element "script" [] [ Text "\n/*\n</script>\n*/\n" ] ] )
+            ]
+
+
+
+-- TESTS FROM hecrj/elm-html-parser
+
+
+testParseAll : String -> List Node -> (() -> Expectation)
+testParseAll s astList =
+    \_ ->
+        Expect.equal (Ok astList) (Html.Parser.run s)
+
+
+testParse : String -> Node -> (() -> Expectation)
+testParse input expected =
+    \_ ->
+        case Html.Parser.run input of
+            Err message ->
+                Expect.fail (Parser.deadEndsToString message)
+
+            Ok actual ->
+                Expect.equal (Ok actual) (Ok [ expected ])
+
+
+hecrjNodeTests : Test
+hecrjNodeTests =
+    describe "Node"
+        [ test "basic1" (testParse "<a></a>" (Element "a" [] []))
+        , test "basic2" (testParse "<a></a >" (Element "a" [] []))
+        , test "basic3" (testParse "<A></A >" (Element "a" [] []))
+        , test "basic4" (testParseAll " <a></a> " [ Text " ", Element "a" [] [], Text " " ])
+        , test "basic5" (testParseAll "a<a></a>b" [ Text "a", Element "a" [] [], Text "b" ])
+        , test "basic6" (testParse "<A></A>" (Element "a" [] []))
+        , test "basic7" (testParse "<a>a</a>" (Element "a" [] [ Text "a" ]))
+        , test "basic8" (testParse "<a> a </a>" (Element "a" [] [ Text " a " ]))
+        , test "basic10" (testParse "<br>" (Element "br" [] []))
+        , test "basic11" (testParse "<a><a></a></a>" (Element "a" [] [ Element "a" [] [] ]))
+        , test "basic12" (testParse "<a> <a> </a> </a>" (Element "a" [] [ Text " ", Element "a" [] [ Text " " ], Text " " ]))
+        , test "basic13" (testParse "<a> <br> </a>" (Element "a" [] [ Text " ", Element "br" [] [], Text " " ]))
+        , test "basic14" (testParse "<a><a></a><a></a></a>" (Element "a" [] [ Element "a" [] [], Element "a" [] [] ]))
+        , test "basic15" (testParse "<a><a><a></a></a></a>" (Element "a" [] [ Element "a" [] [ Element "a" [] [] ] ]))
+        , test "basic16" (testParse "<a><a></a><b></b></a>" (Element "a" [] [ Element "a" [] [], Element "b" [] [] ]))
+        , test "basic17" (testParse "<h1></h1>" (Element "h1" [] []))
+        , test "start-only-tag1" (testParse "<br>" (Element "br" [] []))
+        , test "start-only-tag2" (testParse "<BR>" (Element "br" [] []))
+        , test "start-only-tag3" (testParse "<br >" (Element "br" [] []))
+        , test "start-only-tag4" (testParse "<BR >" (Element "br" [] []))
+        , test "start-only-tag5" (testParse "<a> <br> </a>" (Element "a" [] [ Text " ", Element "br" [] [], Text " " ]))
+        , test "start-only-tag6" (testParse "<a><br><br></a>" (Element "a" [] [ Element "br" [] [], Element "br" [] [] ]))
+        , test "start-only-tag7" (testParse "<a><br><img><hr><meta></a>" (Element "a" [] [ Element "br" [] [], Element "img" [] [], Element "hr" [] [], Element "meta" [] [] ]))
+        , test "start-only-tag8" (testParse "<a>foo<br>bar</a>" (Element "a" [] [ Text "foo", Element "br" [] [], Text "bar" ]))
+        , test "self-closing-tag1" (testParse "<br/>" (Element "br" [] []))
+        , test "self-closing-tag2" (testParse "<br />" (Element "br" [] []))
+        , test "self-closing-tag3" (testParse "<link href=\"something\" rel=\"something else\"/>" (Element "link" [ ( "href", "something" ), ( "rel", "something else" ) ] []))
+        , test "web-component-tag" (testParse "<a-web-component></a-web-component>" (Element "a-web-component" [] []))
+        ]
+
+
+hecrjAttributeTests : Test
+hecrjAttributeTests =
+    describe "Attribute"
+        [ test "basic1" (testParse """<a href="example.com"></a>""" (Element "a" [ ( "href", "example.com" ) ] []))
+        , test "basic2" (testParse """<a href='example.com'></a>""" (Element "a" [ ( "href", "example.com" ) ] []))
+        , test "basic3" (testParse """<a href=example.com></a>""" (Element "a" [ ( "href", "example.com" ) ] []))
+        , test "basic4" (testParse """<a HREF=example.com></a>""" (Element "a" [ ( "href", "example.com" ) ] []))
+        , test "basic5" (testParse """<a href=bare></a>""" (Element "a" [ ( "href", "bare" ) ] []))
+        , test "basic6" (testParse """<a href="example.com?a=b&amp;c=d"></a>""" (Element "a" [ ( "href", "example.com?a=b&c=d" ) ] []))
+        , test "basic7" (testParse """<a href="example.com?a=b&c=d"></a>""" (Element "a" [ ( "href", "example.com?a=b&c=d" ) ] []))
+        , test "basic8" (testParse """<input max=100 min = 10.5>""" (Element "input" [ ( "max", "100" ), ( "min", "10.5" ) ] []))
+        , test "basic9" (testParse """<input disabled>""" (Element "input" [ ( "disabled", "" ) ] []))
+        , test "basic10" (testParse """<input DISABLED>""" (Element "input" [ ( "disabled", "" ) ] []))
+        , test "basic11" (testParse """<meta http-equiv=Content-Type>""" (Element "meta" [ ( "http-equiv", "Content-Type" ) ] []))
+        , test "basic12" (testParse """<input data-foo2="a">""" (Element "input" [ ( "data-foo2", "a" ) ] []))
+        , test "basic13" (testParse """<html xmlns:v="urn:schemas-microsoft-com:vml"></html>""" (Element "html" [ ( "xmlns:v", "urn:schemas-microsoft-com:vml" ) ] []))
+        , test "basic14" (testParse """<link rel=stylesheet
+        href="">""" (Element "link" [ ( "rel", "stylesheet" ), ( "href", "" ) ] []))
+
+        -- Invalid attribute names shouldn't be parsed: https://github.com/elm/html/issues/46
+        , test "invalid character" (testParse """<p\u{00A0} ></p>""" (Element "p" [] []))
+        ]
+
+
+hecrjScriptTests : Test
+hecrjScriptTests =
+    describe "Script"
+        [ test "script1" (testParse """<script></script>""" (Element "script" [] []))
+        , test "script2" (testParse """<SCRIPT></SCRIPT>""" (Element "script" [] []))
+        , test "script3" (testParse """<script src="script.js">foo</script>""" (Element "script" [ ( "src", "script.js" ) ] [ Text "foo" ]))
+        , test "script4" (testParse """<script>var a = 0 < 1; b = 1 > 0;</script>""" (Element "script" [] [ Text "var a = 0 < 1; b = 1 > 0;" ]))
+        , test "script5" (testParse """<script><!----></script>""" (Element "script" [] [ Comment "" ]))
+        , test "script6" (testParse """<script>a<!--</script><script>-->b</script>""" (Element "script" [] [ Text "a", Comment "</script><script>", Text "b" ]))
+        , test "style" (testParse """<style>a<!--</style><style>-->b</style>""" (Element "style" [] [ Text "a", Comment "</style><style>", Text "b" ]))
+        ]
+
+
+hecrjCommentTests : Test
+hecrjCommentTests =
+    describe "Comment"
+        [ test "basic1" (testParse """<!---->""" (Comment ""))
+        , test "basic2" (testParse """<!--<div></div>-->""" (Comment "<div></div>"))
+        , test "basic3" (testParse """<div><!--</div>--></div>""" (Element "div" [] [ Comment "</div>" ]))
+        , test "basic4" (testParse """<!--<!---->""" (Comment "<!--"))
+        , test "basic5" (testParse """<!--foo\t\u{000D}
+        -->""" (Comment "foo\t\u{000D}\n        "))
+        ]
+
+
+svgTests =
+    test "self-closing svg path"
+        (testParse
+            """<svg viewBox="0 0 20 20" fill="currentColor" aria-hidden="true"><path fill-rule="evenodd" d="1 2 3" clip-rule="evenodd" /></svg>"""
+            (Element "svg"
+                [ ( "viewbox", "0 0 20 20" )
+                , ( "fill", "currentColor" )
+                , ( "aria-hidden", "true" )
+                ]
+                [ Element "path"
+                    [ ( "fill-rule", "evenodd" )
+                    , ( "d", "1 2 3" )
+                    , ( "clip-rule", "evenodd" )
+                    ]
+                    []
+                ]
+            )
+        )
+
+
+
+-- https://github.com/taoqf/node-html-parser/blob/main/test/tests/html.js
+
+
+nodeHtmlParserTests =
+    describe "taoqf/node-html-parser tests" <|
+        testAll
+            [ ( "test1"
+              , "<p id=\"id\"><a class='cls'>Hello</a><ul><li><li></ul><span></span></p>"
+              , Ok
+                    [ Element "p"
+                        [ ( "id", "id" ) ]
+                        [ Element "a"
+                            [ ( "class", "cls" ) ]
+                            [ Text "Hello"
+                            ]
+                        , Element "ul"
+                            []
+                            [ Element "li" [] []
+                            , Element "li" [] []
+                            ]
+                        , Element "span" [] []
+                        ]
+                    ]
+              )
+            , ( "test2"
+              , "<DIV><a><img/></A><p></P></div>"
+              , Ok
+                    [ Element "div"
+                        []
+                        [ Element "a"
+                            []
+                            [ Element "img" [] []
+                            ]
+                        , Element "p" [] []
+                        ]
+                    ]
+              )
+            , ( "test3"
+              , "<div><a><img/></a><p></p></div>"
+              , Ok
+                    [ Element "div"
+                        []
+                        [ Element "a"
+                            []
+                            [ Element "img" [] []
+                            ]
+                        , Element "p" [] []
+                        ]
+                    ]
+              )
+            , ( "test4"
+              , "<div><a><!-- my comment --></a></div>"
+              , Ok
+                    [ Element "div"
+                        []
+                        [ Element "a"
+                            []
+                            [ Comment " my comment "
+                            ]
+                        ]
+                    ]
+              )
+            , ( "test5"
+              , "<div><!--<a></a>--></div>"
+              , Ok
+                    [ Element "div"
+                        []
+                        [ Comment "<a></a>"
+                        ]
+                    ]
+              )
+            , ( "test6"
+              , "<picture><source srcset=\"/images/example-1.jpg 1200w, /images/example-2.jpg 1600w\" sizes=\"100vw\"><img src=\"/images/example.jpg\" alt=\"Example\"/></picture>"
+              , Ok
+                    [ Element "picture"
+                        []
+                        [ Element "source"
+                            [ ( "srcset", "/images/example-1.jpg 1200w, /images/example-2.jpg 1600w" )
+                            , ( "sizes", "100vw" )
+                            ]
+                            []
+                        , Element "img" [ ( "src", "/images/example.jpg" ), ( "alt", "Example" ) ] []
+                        ]
+                    ]
+              )
+            , ( "test7"
+              , "<script>1</script><style>2&amp;</style>"
+              , Ok
+                    [ Element "script" [] [ Text "1" ]
+                    , Element "style" [] [ Text "2&" ]
+                    ]
+              )
+            ]
+
+
+
+-- JSOUP TESTS
+-- https://github.com/jhy/jsoup/blob/master/src/test/java/org/jsoup/parser/AttributeParseTest.java
+
+
+jsoupAttributeTests =
+    describe "(from jsoup) attributes" <|
+        testAll
+            [ ( "parses rough attribute string"
+              , "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />"
+              , Ok
+                    [ Element "a"
+                        [ ( "id", "123" )
+                        , ( "class", "baz = 'bar'" )
+                        , ( "style", "border: 2px" )
+                        , ( "qux", "" )
+                        , ( "zim", "" )
+                        , ( "foo", "12" )
+                        , ( "mux", "18" )
+                        ]
+                        []
+                    ]
+              )
+            , ( "handles newlines and returns"
+              , --   "<a\r\nfoo='bar\r\nqux'\r\nbar\r\n=\r\ntwo>One</a>"
+                "<a\u{000D}\nfoo='bar\u{000D}\nqux'\u{000D}\nbar\u{000D}\n=\u{000D}\ntwo>One</a>"
+              , Ok
+                    [ Element "a"
+                        [ ( "foo", "bar\u{000D}\nqux" )
+                        , ( "bar", "two" )
+                        ]
+                        [ Text "One" ]
+                    ]
+              )
+            , ( "parses empty string", "<a />", Ok [ Element "a" [] [] ] )
+
+            -- https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+            , ( "can start with '='"
+              , "<a =empty />"
+              , Ok [ Element "a" [ ( "=empty", "" ) ] [] ]
+              )
+            , ( "strict attribute unescapes"
+              , -- "<a id=1 href='?foo=bar&mid&lt=true'>One</a> <a id=2 href='?foo=bar&lt;qux&lg=1'>Two</a>"
+                "<a id=1 href='?foo=bar&mid&lt=true'>One</a> <a id=2 href='?foo=bar&lt;qux&lg=1'>Two</a>"
+              , Ok
+                    [ Element "a"
+                        [ ( "id", "1" )
+                        , ( "href", "?foo=bar&mid&lt=true" )
+                        ]
+                        [ Text "One" ]
+                    , Text " "
+                    , Element "a"
+                        [ ( "id", "2" )
+                        , ( "href", "?foo=bar<qux&lg=1" )
+                        ]
+                        [ Text "Two" ]
+                    ]
+              )
+            , ( "more attribute unescapes"
+              , "<a href='&wr_id=123&mid-size=true&ok=&wr'>Check</a>"
+              , Ok
+                    [ Element "a"
+                        [ ( "href", "&wr_id=123&mid-size=true&ok=&wr" )
+                        ]
+                        [ Text "Check" ]
+                    ]
+              )
+            , ( "drops slash from attribute"
+              , "<img /onerror='doMyJob' /a /=b/>"
+              , Ok
+                    [ Element "img"
+                        [ ( "onerror", "doMyJob" )
+                        , ( "a", "" )
+                        , ( "=b", "" )
+                        ]
+                        []
+                    ]
+              )
+            ]
+
+
+
+-- TODO: https://github.com/jhy/jsoup/blob/master/src/test/java/org/jsoup/parser/HtmlParserTest.java