mirror of
https://github.com/serokell/xrefcheck.git
synced 2024-08-16 09:00:45 +03:00
[#125] Display URL parsing errors
Problem: We use a 2-step process to parse a URL: we use `parseURI` and then `mkURIBs`. Both of these functions can fail. At the moment, we're ignoring their errors and simply throwing a `ExternalResourceInvalidUri`, and then displaying a generic error message to the user. Solution: Catch errors from `parseUri` and `mkURIBs` and use them to tell user why the URL was invalid.
This commit is contained in:
parent
391a5ea040
commit
a3f2d28216
@ -35,6 +35,8 @@ Unreleased
|
||||
+ Allow certain reserved characters to be present in the query strings of the URLs.
|
||||
* [#130](https://github.com/serokell/xrefcheck/pull/130)
|
||||
+ Fixed bug with ignoring checks for relative anchors.
|
||||
* [#132](https://github.com/serokell/xrefcheck/pull/132)
|
||||
+ Display URL parsing errors.
|
||||
|
||||
0.2.1
|
||||
==========
|
||||
|
@ -85,6 +85,7 @@ library:
|
||||
- directory-tree
|
||||
- directory
|
||||
- dlist
|
||||
- exceptions
|
||||
- filepath
|
||||
- raw-strings-qq
|
||||
- fmt
|
||||
@ -151,6 +152,7 @@ tests:
|
||||
- time
|
||||
- universum
|
||||
- modern-uri
|
||||
- uri-bytestring
|
||||
- yaml
|
||||
build-tools:
|
||||
- hspec-discover
|
||||
|
@ -17,6 +17,7 @@ import Fmt (Buildable (..), unlinesF, (+|), (|+))
|
||||
import Network.FTP.Client
|
||||
(FTPException (..), FTPMessage (..), FTPResponse (..), ResponseStatus (..))
|
||||
import Text.URI (RText, unRText)
|
||||
import URI.ByteString (URIParseError (..), SchemaError (..))
|
||||
|
||||
instance ToString (RText t) where
|
||||
toString = toString . unRText
|
||||
@ -45,3 +46,20 @@ instance Buildable FTPException where
|
||||
build (BogusResponseFormatException e) = build e
|
||||
|
||||
deriving stock instance Eq FTPException
|
||||
|
||||
instance Buildable URIParseError where
|
||||
build = \case
|
||||
MalformedScheme e -> build e
|
||||
MalformedUserInfo -> "Malformed user info"
|
||||
MalformedQuery -> "Malformed query"
|
||||
MalformedFragment -> "Malformed fragment"
|
||||
MalformedHost -> "Malformed host"
|
||||
MalformedPort -> "Malformed port"
|
||||
MalformedPath -> "Malformed path"
|
||||
OtherError e -> build e
|
||||
|
||||
instance Buildable SchemaError where
|
||||
build = \case
|
||||
NonAlphaLeading -> "Scheme must start with an alphabet character"
|
||||
InvalidChars -> "Subsequent characters in the schema were invalid"
|
||||
MissingColon -> "Schemas must be followed by a colon"
|
||||
|
@ -44,7 +44,7 @@ import Data.Text.Metrics (damerauLevenshteinNorm)
|
||||
import Data.Time (UTCTime, defaultTimeLocale, formatTime, readPTime, rfc822DateFormat)
|
||||
import Data.Time.Clock.POSIX (getPOSIXTime)
|
||||
import Data.Traversable (for)
|
||||
import Fmt (Buildable (..), blockListF', listF, maybeF, nameF, (+|), (|+))
|
||||
import Fmt (Buildable (..), blockListF', listF, maybeF, nameF, (+|), (|+), unlinesF, indentF)
|
||||
import GHC.Exts qualified as Exts
|
||||
import GHC.Read (Read (readPrec))
|
||||
import Network.FTP.Client
|
||||
@ -62,9 +62,10 @@ import System.FilePath (takeDirectory, (</>), normalise)
|
||||
import System.FilePath.Glob qualified as Glob
|
||||
import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift)
|
||||
import Text.Regex.TDFA.Text (Regex, regexec)
|
||||
import Text.URI (Authority (..), URI (..), mkURIBs)
|
||||
import Text.URI (Authority (..), URI (..), mkURIBs, ParseExceptionBs)
|
||||
import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+), (-:-))
|
||||
import URI.ByteString qualified as URIBS
|
||||
import Control.Monad.Catch (handleJust)
|
||||
|
||||
import Data.Bits (toIntegralSized)
|
||||
import Xrefcheck.Config
|
||||
@ -122,7 +123,8 @@ data VerifyError
|
||||
= LocalFileDoesNotExist FilePath
|
||||
| AnchorDoesNotExist Text [Anchor]
|
||||
| AmbiguousAnchorRef FilePath Text (NonEmpty Anchor)
|
||||
| ExternalResourceInvalidUri
|
||||
| ExternalResourceInvalidUri URIBS.URIParseError
|
||||
| ExternalResourceUriConversionError ParseExceptionBs
|
||||
| ExternalResourceInvalidUrl (Maybe Text)
|
||||
| ExternalResourceUnknownProtocol
|
||||
| ExternalHttpResourceUnavailable Status
|
||||
@ -150,8 +152,14 @@ instance Buildable VerifyError where
|
||||
" Use of such anchors is discouraged because referenced object\n\
|
||||
\ can change silently whereas the document containing it evolves.\n"
|
||||
|
||||
ExternalResourceInvalidUri ->
|
||||
"⛂ Invalid URI\n"
|
||||
ExternalResourceInvalidUri err ->
|
||||
"⛂ Invalid URI (" +| err |+ ")\n"
|
||||
|
||||
ExternalResourceUriConversionError err ->
|
||||
unlinesF
|
||||
[ "⛂ Invalid URI"
|
||||
, indentF 4 . build $ displayException err
|
||||
]
|
||||
|
||||
ExternalResourceInvalidUrl Nothing ->
|
||||
"⛂ Invalid URL\n"
|
||||
@ -468,7 +476,7 @@ parseUri link = do
|
||||
-- RFC 3986, and the other -- 'laxURIParseOptions' -- allows brackets
|
||||
-- in the queries, which draws us closer to the WHATWG URL standard.
|
||||
uri' <- URIBS.parseURI URIBS.laxURIParserOptions (encodeUtf8 link)
|
||||
& either (const $ throwError ExternalResourceInvalidUri) pure
|
||||
& either (throwError . ExternalResourceInvalidUri) pure
|
||||
|
||||
-- We stick to our infrastructure by continuing to operate on the datatypes
|
||||
-- from `modern-uri`, which are used in the 'req' library. First we
|
||||
@ -476,7 +484,12 @@ parseUri link = do
|
||||
-- with all the necessary special characters *percent-encoded*, and then
|
||||
-- call 'mkURIBs'.
|
||||
mkURIBs (URIBS.serializeURIRef' uri')
|
||||
& maybe (throwError ExternalResourceInvalidUri) pure
|
||||
-- Ideally, this exception should never be thrown, as the URI
|
||||
-- already *percent-encoded* with 'parseURI' from 'uri-bytestring'
|
||||
-- and 'mkURIBs' is only used to convert to 'URI' type from
|
||||
-- 'modern-uri' package.
|
||||
& handleJust (fromException @ParseExceptionBs)
|
||||
(throwError . ExternalResourceUriConversionError)
|
||||
|
||||
checkExternalResource :: VerifyConfig -> Text -> IO (VerifyResult VerifyError)
|
||||
checkExternalResource VerifyConfig{..} link
|
||||
|
@ -12,6 +12,7 @@ import Universum
|
||||
import Test.Hspec (Spec, describe, it, shouldReturn)
|
||||
import Text.URI (URI)
|
||||
import Text.URI.QQ (uri)
|
||||
import URI.ByteString (URIParseError (..), SchemaError (..))
|
||||
|
||||
import Xrefcheck.Verify (parseUri, VerifyError (..))
|
||||
|
||||
@ -35,10 +36,18 @@ spec = do
|
||||
describe "URI parsing should be unsuccessful" $ do
|
||||
it "With the special characters anywhere else" do
|
||||
parseUri' "https://exa<mple.co>m/?q=a&p=b#fra{g}ment" `shouldReturn`
|
||||
Left ExternalResourceInvalidUri
|
||||
Left (ExternalResourceInvalidUri MalformedPath)
|
||||
|
||||
parseUri' "https://example.com/pa[t]h/to[/]smth?q=a&p=b" `shouldReturn`
|
||||
Left ExternalResourceInvalidUri
|
||||
Left (ExternalResourceInvalidUri MalformedPath)
|
||||
|
||||
it "With malformed scheme" do
|
||||
parseUri' "https//example.com/" `shouldReturn`
|
||||
Left (ExternalResourceInvalidUri $ MalformedScheme MissingColon)
|
||||
|
||||
it "With malformed fragment" do
|
||||
parseUri' "https://example.com/?q=a&p=b#fra{g}ment" `shouldReturn`
|
||||
Left (ExternalResourceInvalidUri MalformedFragment)
|
||||
where
|
||||
parseUri' :: Text -> IO $ Either VerifyError URI
|
||||
parseUri' = runExceptT . parseUri
|
||||
|
Loading…
Reference in New Issue
Block a user