From a3f2d282164496a2a6b1a371297299058f530456 Mon Sep 17 00:00:00 2001 From: Sergey Gulin Date: Wed, 7 Sep 2022 07:19:17 +1000 Subject: [PATCH] [#125] Display URL parsing errors Problem: We use a 2-step process to parse a URL: we use `parseURI` and then `mkURIBs`. Both of these functions can fail. At the moment, we're ignoring their errors and simply throwing a `ExternalResourceInvalidUri`, and then displaying a generic error message to the user. Solution: Catch errors from `parseUri` and `mkURIBs` and use them to tell user why the URL was invalid. --- CHANGES.md | 2 ++ package.yaml | 2 ++ src/Xrefcheck/Orphans.hs | 18 +++++++++++++++++ src/Xrefcheck/Verify.hs | 27 +++++++++++++++++++------- tests/Test/Xrefcheck/URIParsingSpec.hs | 13 +++++++++++-- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 60723db..c4069ef 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -35,6 +35,8 @@ Unreleased + Allow certain reserved characters to be present in the query strings of the URLs. * [#130](https://github.com/serokell/xrefcheck/pull/130) + Fixed bug with ignoring checks for relative anchors. +* [#132](https://github.com/serokell/xrefcheck/pull/132) + + Display URL parsing errors. 0.2.1 ========== diff --git a/package.yaml b/package.yaml index 3ddbb41..1e13321 100644 --- a/package.yaml +++ b/package.yaml @@ -85,6 +85,7 @@ library: - directory-tree - directory - dlist + - exceptions - filepath - raw-strings-qq - fmt @@ -151,6 +152,7 @@ tests: - time - universum - modern-uri + - uri-bytestring - yaml build-tools: - hspec-discover diff --git a/src/Xrefcheck/Orphans.hs b/src/Xrefcheck/Orphans.hs index ba1f3cc..f7f4286 100644 --- a/src/Xrefcheck/Orphans.hs +++ b/src/Xrefcheck/Orphans.hs @@ -17,6 +17,7 @@ import Fmt (Buildable (..), unlinesF, (+|), (|+)) import Network.FTP.Client (FTPException (..), FTPMessage (..), FTPResponse (..), ResponseStatus (..)) import Text.URI (RText, unRText) +import URI.ByteString (URIParseError (..), SchemaError (..)) instance ToString (RText t) where toString = toString . unRText @@ -45,3 +46,20 @@ instance Buildable FTPException where build (BogusResponseFormatException e) = build e deriving stock instance Eq FTPException + +instance Buildable URIParseError where + build = \case + MalformedScheme e -> build e + MalformedUserInfo -> "Malformed user info" + MalformedQuery -> "Malformed query" + MalformedFragment -> "Malformed fragment" + MalformedHost -> "Malformed host" + MalformedPort -> "Malformed port" + MalformedPath -> "Malformed path" + OtherError e -> build e + +instance Buildable SchemaError where + build = \case + NonAlphaLeading -> "Scheme must start with an alphabet character" + InvalidChars -> "Subsequent characters in the schema were invalid" + MissingColon -> "Schemas must be followed by a colon" diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index 0e97d1e..e4351ca 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -44,7 +44,7 @@ import Data.Text.Metrics (damerauLevenshteinNorm) import Data.Time (UTCTime, defaultTimeLocale, formatTime, readPTime, rfc822DateFormat) import Data.Time.Clock.POSIX (getPOSIXTime) import Data.Traversable (for) -import Fmt (Buildable (..), blockListF', listF, maybeF, nameF, (+|), (|+)) +import Fmt (Buildable (..), blockListF', listF, maybeF, nameF, (+|), (|+), unlinesF, indentF) import GHC.Exts qualified as Exts import GHC.Read (Read (readPrec)) import Network.FTP.Client @@ -62,9 +62,10 @@ import System.FilePath (takeDirectory, (), normalise) import System.FilePath.Glob qualified as Glob import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift) import Text.Regex.TDFA.Text (Regex, regexec) -import Text.URI (Authority (..), URI (..), mkURIBs) +import Text.URI (Authority (..), URI (..), mkURIBs, ParseExceptionBs) import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+), (-:-)) import URI.ByteString qualified as URIBS +import Control.Monad.Catch (handleJust) import Data.Bits (toIntegralSized) import Xrefcheck.Config @@ -122,7 +123,8 @@ data VerifyError = LocalFileDoesNotExist FilePath | AnchorDoesNotExist Text [Anchor] | AmbiguousAnchorRef FilePath Text (NonEmpty Anchor) - | ExternalResourceInvalidUri + | ExternalResourceInvalidUri URIBS.URIParseError + | ExternalResourceUriConversionError ParseExceptionBs | ExternalResourceInvalidUrl (Maybe Text) | ExternalResourceUnknownProtocol | ExternalHttpResourceUnavailable Status @@ -150,8 +152,14 @@ instance Buildable VerifyError where " Use of such anchors is discouraged because referenced object\n\ \ can change silently whereas the document containing it evolves.\n" - ExternalResourceInvalidUri -> - "⛂ Invalid URI\n" + ExternalResourceInvalidUri err -> + "⛂ Invalid URI (" +| err |+ ")\n" + + ExternalResourceUriConversionError err -> + unlinesF + [ "⛂ Invalid URI" + , indentF 4 . build $ displayException err + ] ExternalResourceInvalidUrl Nothing -> "⛂ Invalid URL\n" @@ -468,7 +476,7 @@ parseUri link = do -- RFC 3986, and the other -- 'laxURIParseOptions' -- allows brackets -- in the queries, which draws us closer to the WHATWG URL standard. uri' <- URIBS.parseURI URIBS.laxURIParserOptions (encodeUtf8 link) - & either (const $ throwError ExternalResourceInvalidUri) pure + & either (throwError . ExternalResourceInvalidUri) pure -- We stick to our infrastructure by continuing to operate on the datatypes -- from `modern-uri`, which are used in the 'req' library. First we @@ -476,7 +484,12 @@ parseUri link = do -- with all the necessary special characters *percent-encoded*, and then -- call 'mkURIBs'. mkURIBs (URIBS.serializeURIRef' uri') - & maybe (throwError ExternalResourceInvalidUri) pure + -- Ideally, this exception should never be thrown, as the URI + -- already *percent-encoded* with 'parseURI' from 'uri-bytestring' + -- and 'mkURIBs' is only used to convert to 'URI' type from + -- 'modern-uri' package. + & handleJust (fromException @ParseExceptionBs) + (throwError . ExternalResourceUriConversionError) checkExternalResource :: VerifyConfig -> Text -> IO (VerifyResult VerifyError) checkExternalResource VerifyConfig{..} link diff --git a/tests/Test/Xrefcheck/URIParsingSpec.hs b/tests/Test/Xrefcheck/URIParsingSpec.hs index a175500..7d5f64c 100644 --- a/tests/Test/Xrefcheck/URIParsingSpec.hs +++ b/tests/Test/Xrefcheck/URIParsingSpec.hs @@ -12,6 +12,7 @@ import Universum import Test.Hspec (Spec, describe, it, shouldReturn) import Text.URI (URI) import Text.URI.QQ (uri) +import URI.ByteString (URIParseError (..), SchemaError (..)) import Xrefcheck.Verify (parseUri, VerifyError (..)) @@ -35,10 +36,18 @@ spec = do describe "URI parsing should be unsuccessful" $ do it "With the special characters anywhere else" do parseUri' "https://exam/?q=a&p=b#fra{g}ment" `shouldReturn` - Left ExternalResourceInvalidUri + Left (ExternalResourceInvalidUri MalformedPath) parseUri' "https://example.com/pa[t]h/to[/]smth?q=a&p=b" `shouldReturn` - Left ExternalResourceInvalidUri + Left (ExternalResourceInvalidUri MalformedPath) + + it "With malformed scheme" do + parseUri' "https//example.com/" `shouldReturn` + Left (ExternalResourceInvalidUri $ MalformedScheme MissingColon) + + it "With malformed fragment" do + parseUri' "https://example.com/?q=a&p=b#fra{g}ment" `shouldReturn` + Left (ExternalResourceInvalidUri MalformedFragment) where parseUri' :: Text -> IO $ Either VerifyError URI parseUri' = runExceptT . parseUri