mirror of
https://github.com/serokell/xrefcheck.git
synced 2024-08-16 17:10:26 +03:00
[#95] Support HTML tag parsing compatible with HTML spec
Problem: We had hardcoded HTML tag parser, that doesn't work with add valid HTML tags Solution: Replace it with `tagsoup` library, that care about all parsing stuff
This commit is contained in:
parent
e4af3c7310
commit
a6b4513587
@ -101,6 +101,7 @@ library:
|
||||
- regex-tdfa
|
||||
- req
|
||||
- roman-numerals
|
||||
- tagsoup
|
||||
- text
|
||||
- text-metrics
|
||||
- th-lift-instances
|
||||
|
@ -26,6 +26,7 @@ import Data.Default (def)
|
||||
import Data.Text qualified as T
|
||||
import Data.Text.Lazy qualified as LT
|
||||
import Fmt (Buildable (..), blockListF, nameF, (+|), (|+))
|
||||
import Text.HTML.TagSoup
|
||||
|
||||
import Xrefcheck.Core
|
||||
import Xrefcheck.Scan
|
||||
@ -146,7 +147,16 @@ nodeExtractInfo input@(Node _ _ nSubs) = do
|
||||
return $ FileInfoDiff DList.empty $ DList.singleton $ Anchor {aType, aName, aPos}
|
||||
|
||||
HTML_INLINE text -> do
|
||||
let mName = T.stripSuffix "\">" =<< T.stripPrefix "<a name=\"" text
|
||||
let
|
||||
mName = do
|
||||
tag <- safeHead $ parseTags text
|
||||
attributes <- case tag of
|
||||
TagOpen a attrs
|
||||
| T.toLower a == "a" -> Just attrs
|
||||
_ -> Nothing
|
||||
(_, name) <- find (\(field, _) -> T.toLower field == "name") attributes
|
||||
pure name
|
||||
|
||||
case mName of
|
||||
Just aName -> do
|
||||
let aType = HandAnchor
|
||||
|
13
tests/golden/check-html/check-html.md
Normal file
13
tests/golden/check-html/check-html.md
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
- SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
|
||||
-
|
||||
- SPDX-License-Identifier: MPL-2.0
|
||||
-->
|
||||
|
||||
## <a name='one'> <a name=two> <a NAME="three"> <a name="four"></a> <a NAME=five > Title1
|
||||
|
||||
[One](#one)
|
||||
[Two](#two)
|
||||
[Three](#three)
|
||||
[Four](#four)
|
||||
[Five](#five)
|
16
tests/golden/check-html/check.html.bats
Normal file
16
tests/golden/check-html/check.html.bats
Normal file
@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bats
|
||||
|
||||
# SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
|
||||
#
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
load '../helpers/bats-support/load'
|
||||
load '../helpers/bats-assert/load'
|
||||
load '../helpers'
|
||||
|
||||
|
||||
@test "All HTML anchors should be valid" {
|
||||
run xrefcheck
|
||||
|
||||
assert_output --partial "All repository links are valid."
|
||||
}
|
Loading…
Reference in New Issue
Block a user