[#125] Display URL parsing errors

Problem: We use a 2-step process to parse a URL: we use `parseURI` and
then `mkURIBs`. Both of these functions can fail. At the moment, we're
ignoring their errors and simply throwing a `ExternalResourceInvalidUri`,
and then displaying a generic error message to the user.

Solution: Catch errors from `parseUri` and `mkURIBs` and use them to
tell user why the URL was invalid.
This commit is contained in:
Sergey Gulin 2022-09-07 07:19:17 +10:00
parent 391a5ea040
commit a3f2d28216
No known key found for this signature in database
GPG Key ID: 67CBDE9BE7E6399B
5 changed files with 53 additions and 9 deletions

View File

@ -35,6 +35,8 @@ Unreleased
+ Allow certain reserved characters to be present in the query strings of the URLs.
* [#130](https://github.com/serokell/xrefcheck/pull/130)
+ Fixed bug with ignoring checks for relative anchors.
* [#132](https://github.com/serokell/xrefcheck/pull/132)
+ Display URL parsing errors.
0.2.1
==========

View File

@ -85,6 +85,7 @@ library:
- directory-tree
- directory
- dlist
- exceptions
- filepath
- raw-strings-qq
- fmt
@ -151,6 +152,7 @@ tests:
- time
- universum
- modern-uri
- uri-bytestring
- yaml
build-tools:
- hspec-discover

View File

@ -17,6 +17,7 @@ import Fmt (Buildable (..), unlinesF, (+|), (|+))
import Network.FTP.Client
(FTPException (..), FTPMessage (..), FTPResponse (..), ResponseStatus (..))
import Text.URI (RText, unRText)
import URI.ByteString (URIParseError (..), SchemaError (..))
instance ToString (RText t) where
toString = toString . unRText
@ -45,3 +46,20 @@ instance Buildable FTPException where
build (BogusResponseFormatException e) = build e
deriving stock instance Eq FTPException
instance Buildable URIParseError where
build = \case
MalformedScheme e -> build e
MalformedUserInfo -> "Malformed user info"
MalformedQuery -> "Malformed query"
MalformedFragment -> "Malformed fragment"
MalformedHost -> "Malformed host"
MalformedPort -> "Malformed port"
MalformedPath -> "Malformed path"
OtherError e -> build e
instance Buildable SchemaError where
build = \case
NonAlphaLeading -> "Scheme must start with an alphabet character"
InvalidChars -> "Subsequent characters in the schema were invalid"
MissingColon -> "Schemas must be followed by a colon"

View File

@ -44,7 +44,7 @@ import Data.Text.Metrics (damerauLevenshteinNorm)
import Data.Time (UTCTime, defaultTimeLocale, formatTime, readPTime, rfc822DateFormat)
import Data.Time.Clock.POSIX (getPOSIXTime)
import Data.Traversable (for)
import Fmt (Buildable (..), blockListF', listF, maybeF, nameF, (+|), (|+))
import Fmt (Buildable (..), blockListF', listF, maybeF, nameF, (+|), (|+), unlinesF, indentF)
import GHC.Exts qualified as Exts
import GHC.Read (Read (readPrec))
import Network.FTP.Client
@ -62,9 +62,10 @@ import System.FilePath (takeDirectory, (</>), normalise)
import System.FilePath.Glob qualified as Glob
import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift)
import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (Authority (..), URI (..), mkURIBs)
import Text.URI (Authority (..), URI (..), mkURIBs, ParseExceptionBs)
import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+), (-:-))
import URI.ByteString qualified as URIBS
import Control.Monad.Catch (handleJust)
import Data.Bits (toIntegralSized)
import Xrefcheck.Config
@ -122,7 +123,8 @@ data VerifyError
= LocalFileDoesNotExist FilePath
| AnchorDoesNotExist Text [Anchor]
| AmbiguousAnchorRef FilePath Text (NonEmpty Anchor)
| ExternalResourceInvalidUri
| ExternalResourceInvalidUri URIBS.URIParseError
| ExternalResourceUriConversionError ParseExceptionBs
| ExternalResourceInvalidUrl (Maybe Text)
| ExternalResourceUnknownProtocol
| ExternalHttpResourceUnavailable Status
@ -150,8 +152,14 @@ instance Buildable VerifyError where
" Use of such anchors is discouraged because referenced object\n\
\ can change silently whereas the document containing it evolves.\n"
ExternalResourceInvalidUri ->
"⛂ Invalid URI\n"
ExternalResourceInvalidUri err ->
"⛂ Invalid URI (" +| err |+ ")\n"
ExternalResourceUriConversionError err ->
unlinesF
[ "⛂ Invalid URI"
, indentF 4 . build $ displayException err
]
ExternalResourceInvalidUrl Nothing ->
"⛂ Invalid URL\n"
@ -468,7 +476,7 @@ parseUri link = do
-- RFC 3986, and the other -- 'laxURIParseOptions' -- allows brackets
-- in the queries, which draws us closer to the WHATWG URL standard.
uri' <- URIBS.parseURI URIBS.laxURIParserOptions (encodeUtf8 link)
& either (const $ throwError ExternalResourceInvalidUri) pure
& either (throwError . ExternalResourceInvalidUri) pure
-- We stick to our infrastructure by continuing to operate on the datatypes
-- from `modern-uri`, which are used in the 'req' library. First we
@ -476,7 +484,12 @@ parseUri link = do
-- with all the necessary special characters *percent-encoded*, and then
-- call 'mkURIBs'.
mkURIBs (URIBS.serializeURIRef' uri')
& maybe (throwError ExternalResourceInvalidUri) pure
-- Ideally, this exception should never be thrown, as the URI
-- already *percent-encoded* with 'parseURI' from 'uri-bytestring'
-- and 'mkURIBs' is only used to convert to 'URI' type from
-- 'modern-uri' package.
& handleJust (fromException @ParseExceptionBs)
(throwError . ExternalResourceUriConversionError)
checkExternalResource :: VerifyConfig -> Text -> IO (VerifyResult VerifyError)
checkExternalResource VerifyConfig{..} link

View File

@ -12,6 +12,7 @@ import Universum
import Test.Hspec (Spec, describe, it, shouldReturn)
import Text.URI (URI)
import Text.URI.QQ (uri)
import URI.ByteString (URIParseError (..), SchemaError (..))
import Xrefcheck.Verify (parseUri, VerifyError (..))
@ -35,10 +36,18 @@ spec = do
describe "URI parsing should be unsuccessful" $ do
it "With the special characters anywhere else" do
parseUri' "https://exa<mple.co>m/?q=a&p=b#fra{g}ment" `shouldReturn`
Left ExternalResourceInvalidUri
Left (ExternalResourceInvalidUri MalformedPath)
parseUri' "https://example.com/pa[t]h/to[/]smth?q=a&p=b" `shouldReturn`
Left ExternalResourceInvalidUri
Left (ExternalResourceInvalidUri MalformedPath)
it "With malformed scheme" do
parseUri' "https//example.com/" `shouldReturn`
Left (ExternalResourceInvalidUri $ MalformedScheme MissingColon)
it "With malformed fragment" do
parseUri' "https://example.com/?q=a&p=b#fra{g}ment" `shouldReturn`
Left (ExternalResourceInvalidUri MalformedFragment)
where
parseUri' :: Text -> IO $ Either VerifyError URI
parseUri' = runExceptT . parseUri