[#31] Handle the "429 too many requests" errors

Problem: The current version of xrefcheck handles the HTTP responses
with the 429 status code just like every other error, when it is
possible to try and eliminate the occurrences of such errors within the
program itself.

Solution: Each time the result of performing a request on a given link
is a 429 error, retrieve the Retry-After information, describing the
delay (in seconds), from the headers of the HTTP response, or,
alternatively, use a configurable default value if the Retry-After
header is absent, and rerun the request after an amount of time
described by the said value had passed. Only after the number of retries
had reached its limiting value, which, as of right now, is not
configurable and is hardcoded, is when the 429 error is converted into
becoming 'unfixable', and any further attempts to remove the error are
terminated.

Additionally, the progress bar has been upgraded and the following
elements are supplied:
1. an extra color -- Blue -- indicating the errors that might get
   eliminated during the verification;
2. a timer with the number of seconds left to wait for the restart of
   the request; if, during the verification, a new 429 error had emerged
   with the new Retry-After value being greater than or equal to the
   elapsed time, the timer is immediately updated with that value and
   begins ticking down each second from scratch.
This commit is contained in:
Constantine Ter-Matevosian 2021-10-22 01:00:18 +03:00
parent 561750a8ac
commit 032395007b
No known key found for this signature in database
GPG Key ID: 824C79FC6C4EF3DB
13 changed files with 511 additions and 74 deletions

View File

@ -17,6 +17,11 @@ Unreleased
+ Add the duplication detection & verification result caching algorithm for external references.
* [#82](https://github.com/serokell/xrefcheck/pull/82)
+ Fix the issue of having the lowest level context duplicated, caused by the root's trailing path separator.
* [#31](https://github.com/serokell/xrefcheck/pull/88)
+ Handle the "429 too many requests" errors & attempt to eliminate them during verification.
0.2.1
==========
* [#68](https://github.com/serokell/xrefcheck/pull/68)
+ Recognise manual HTML-anchors inside headers.

View File

@ -104,6 +104,7 @@ library:
- text
- text-metrics
- th-lift-instances
- time
- universum
- yaml
@ -131,6 +132,8 @@ tests:
generated-other-modules:
- Paths_xrefcheck
dependencies:
- case-insensitive
- containers
- firefly
- hspec
- hspec-expectations
@ -141,7 +144,9 @@ tests:
- fmt
- http-types
- HUnit
- o-clock
- regex-tdfa
- time
- universum
- yaml
build-tools:

View File

@ -62,6 +62,9 @@ data VerifyConfig = VerifyConfig
, vcIgnoreAuthFailures :: Bool
-- ^ If True - links which return 403 or 401 code will be skipped,
-- otherwise will be marked as broken, because we can't check it.
, vcDefaultRetryAfter :: Time Second
-- ^ Default Retry-After delay, applicable when we receive a 429 response
-- and it does not contain a @Retry-After@ header.
}
normaliseVerifyConfigFilePaths :: VerifyConfig -> VerifyConfig

View File

@ -54,6 +54,11 @@ verification:
# Skip links which return 403 or 401 code.
ignoreAuthFailures: true
# When a verification result is a "429 Too Many Requests" response
# and it does not contain a "Retry-After" header,
# wait this amount of time before attempting to verify the link again.
defaultRetryAfter: 30s
# Parameters of scanners for various file types.
scanners:
markdown:

View File

@ -23,6 +23,7 @@ import Fmt (Buildable (..), blockListF, blockListF', nameF, (+|), (|+))
import System.Console.Pretty (Color (..), Style (..), color, style)
import System.FilePath (isPathSeparator, pathSeparator)
import Text.Numeral.Roman (toRoman)
import Time (Second, Time)
import Xrefcheck.Progress
import Xrefcheck.Util
@ -189,7 +190,7 @@ data LocationType
-- ^ Reference to a file at outer site
| OtherLoc
-- ^ Entry not to be processed (e.g. "mailto:e-mail")
deriving stock (Show)
deriving stock (Eq, Show)
instance Buildable LocationType where
build = \case
@ -308,21 +309,21 @@ data VerifyProgress = VerifyProgress
initVerifyProgress :: [Reference] -> VerifyProgress
initVerifyProgress references = VerifyProgress
{ vrLocal = initProgress (length localRefs)
, vrExternal = initProgress (length extRefs)
, vrExternal = initProgress (length (L.nubBy ((==) `on` rLink) extRefs))
}
where
(extRefs, localRefs) = L.partition isExternal $
map (locationType . rLink) references
(extRefs, localRefs) = L.partition (isExternal . locationType . rLink) references
showAnalyseProgress :: VerifyMode -> VerifyProgress -> Text
showAnalyseProgress mode VerifyProgress{..} = mconcat . mconcat $
[ [ "Verifying " ]
, [ showProgress "local" 10 White vrLocal <> " "
| shouldCheckLocal mode ]
, [ showProgress "external" 15 Yellow vrExternal
| shouldCheckExternal mode ]
]
showAnalyseProgress :: VerifyMode -> Time Second -> VerifyProgress -> Text
showAnalyseProgress mode posixTime VerifyProgress{..} =
mconcat . mconcat $
[ [ "Verifying " ]
, [ showProgress "local" 10 White posixTime vrLocal <> " "
| shouldCheckLocal mode ]
, [ showProgress "external" 15 Yellow posixTime vrExternal
| shouldCheckExternal mode ]
]
reprintAnalyseProgress :: Rewrite -> VerifyMode -> VerifyProgress -> IO ()
reprintAnalyseProgress rw mode p = putTextRewrite rw $
showAnalyseProgress mode p
reprintAnalyseProgress :: Rewrite -> VerifyMode -> Time Second -> VerifyProgress -> IO ()
reprintAnalyseProgress rw mode posixTime p = putTextRewrite rw $
showAnalyseProgress mode posixTime p

View File

@ -5,11 +5,20 @@
-- | Printing progress bars.
module Xrefcheck.Progress
( -- * Progress
Progress (..)
( -- * Task timestamp
TaskTimestamp (..)
-- * Progress
, Progress (..)
, initProgress
, incProgress
, incProgressErrors
, incProgressUnfixableErrors
, incProgressFixableErrors
, decProgressFixableErrors
, fixableToUnfixable
, setTaskTimestamp
, removeTaskTimestamp
, checkTaskTimestamp
, showProgress
-- * Printing
@ -22,7 +31,21 @@ import Universum
import Data.Ratio ((%))
import System.Console.Pretty (Color (..), Style (..), color, style)
import Time (ms, threadDelay)
import Time (Second, Time, ms, sec, threadDelay, unTime, (-:-))
-----------------------------------------------------------
-- Task timestamp
-----------------------------------------------------------
-- | Data type defining a point in time when an anonymous task had started
-- and its time to completion.
data TaskTimestamp = TaskTimestamp
{ ttTimeToCompletion :: Time Second
-- ^ The amount of time required for the task to be completed.
, ttStart :: Time Second
-- ^ The timestamp of when the task had started, represented by the number of seconds
-- since the Unix epoch.
} deriving stock (Show)
-----------------------------------------------------------
-- Progress
@ -30,51 +53,146 @@ import Time (ms, threadDelay)
-- | Processing progress of any thing.
data Progress a = Progress
{ pCurrent :: a
-- ^ How much has been completed.
, pTotal :: a
{ pTotal :: a
-- ^ Overall amount of work.
, pErrors :: !a
-- ^ How many of the completed work finished with an error.
, pCurrent :: a
-- ^ How much has been completed.
, pErrorsUnfixable :: !a
-- ^ How much of the completed work finished with an unfixable error.
, pErrorsFixable :: !a
-- ^ How much of the completed work finished with an error that can be
-- eliminated upon further verification.
, pTaskTimestamp :: Maybe TaskTimestamp
-- ^ A timestamp of an anonymous timer task, where its time to completion is
-- the time needed to pass for the action to be retried immediately after.
} deriving stock (Show)
-- | Initialise null progress.
initProgress :: Num a => a -> Progress a
initProgress a = Progress{ pTotal = a, pCurrent = 0, pErrors = 0 }
initProgress a = Progress{ pTotal = a
, pCurrent = 0
, pErrorsUnfixable = 0
, pErrorsFixable = 0
, pTaskTimestamp = Nothing
}
-- | Increase progress amount.
incProgress :: (Num a) => Progress a -> Progress a
incProgress Progress{..} = Progress{ pCurrent = pCurrent + 1, .. }
-- | Increase errors amount.
incProgressErrors :: (Num a) => Progress a -> Progress a
incProgressErrors Progress{..} = Progress{ pErrors = pErrors + 1, .. }
-- | Increase the number of unfixable errors.
incProgressUnfixableErrors :: (Num a) => Progress a -> Progress a
incProgressUnfixableErrors Progress{..} = Progress{ pErrorsUnfixable = pErrorsUnfixable + 1
, ..
}
-- | Increase the number of fixable errors.
incProgressFixableErrors :: (Num a) => Progress a -> Progress a
incProgressFixableErrors Progress{..} = Progress{ pErrorsFixable = pErrorsFixable + 1
, ..
}
-- | Decrease the number of fixable errors. This function indicates the situation where one of
-- such errors had been successfully eliminated.
decProgressFixableErrors :: (Num a) => Progress a -> Progress a
decProgressFixableErrors Progress{..} = Progress{ pErrorsFixable = pErrorsFixable - 1
, ..
}
fixableToUnfixable :: (Num a) => Progress a -> Progress a
fixableToUnfixable Progress{..} = Progress{ pErrorsFixable = pErrorsFixable - 1
, pErrorsUnfixable = pErrorsUnfixable + 1
, ..
}
setTaskTimestamp :: Time Second -> Time Second -> Progress a -> Progress a
setTaskTimestamp ttc startTime Progress{..} = Progress{ pTaskTimestamp =
Just $ TaskTimestamp ttc startTime
, ..
}
removeTaskTimestamp :: Progress a -> Progress a
removeTaskTimestamp Progress{..} = Progress{ pTaskTimestamp = Nothing
, ..
}
checkTaskTimestamp :: Time Second -> Progress a -> Progress a
checkTaskTimestamp posixTime p@Progress{..} =
case pTaskTimestamp of
Nothing -> p
Just TaskTimestamp{..} ->
if ttTimeToCompletion >= posixTime -:- ttStart
then p
else removeTaskTimestamp p
-- | Visualise progress bar.
showProgress :: Text -> Int -> Color -> Progress Int -> Text
showProgress name width col Progress{..} = mconcat
showProgress :: Text -> Int -> Color -> Time Second -> Progress Int -> Text
showProgress name width col posixTime Progress{..} = mconcat
[ color col (name <> ": [")
, toText bar
, timer
, color col "]"
, status
]
where
-- | Each of the following values represents the number of the progress bar cells
-- corresponding to the respective "class" of processed references: the valid ones,
-- the ones containing an unfixable error (a.k.a. the invalid ones), and the ones
-- containing a fixable error.
--
-- The current overall number of proccessed errors.
done = floor $ (pCurrent % pTotal) * fromIntegral @Int @(Ratio Int) width
errs = ceiling $ (pErrors % pTotal) * fromIntegral @Int @(Ratio Int) width
done' = max 0 $ done - errs
remained' = width - errs - done'
-- | The current number of the invalid references.
errsU = ceiling $ (pErrorsUnfixable % pTotal) * fromIntegral @Int @(Ratio Int) width
-- | The current number of (fixable) errors that may be eliminated during further
-- verification.
-- Notice!
-- 1. Both this and the previous values use @ceiling@ as the rounding function.
-- This is done to ensure that as soon as at least 1 faulty reference occurs during
-- the verification, the cell of its respective color is mathematically guaranteed
-- to be visible in the progress bar visualization.
-- 2. @errsF@ is bounded from above by @width - errsU@ to prevent an overflow in the
-- number of the progress bar cells that could be caused by the two @ceilings@s.
errsF = min (width - errsU) . ceiling $ (pErrorsFixable % pTotal) *
fromIntegral @Int @(Ratio Int) width
-- | The number of valid references.
-- The value is bounded from below by 0 to ensure the number never gets negative.
-- This situation is plausible due to the different rounding functions used for each value:
-- @floor@ for the minuend @done@, @ceiling@ for the two subtrahends @errsU@ & @errsF@.
successful = max 0 $ done - errsU - errsF
-- | The remaining number of references to be verified.
remaining = width - successful - errsU - errsF
bar
| pTotal == 0 = replicate width '-'
| otherwise = mconcat
[ color Red $ replicate errs '■'
, color col $ replicate done' '■'
, color col $ replicate remained' ' '
[ color Blue $ replicate errsF '■'
, color Red $ replicate errsU '■'
, color col $ replicate successful '■'
, color col $ replicate remaining ' '
, " "
]
status
| pTotal == 0 = ""
| pErrors == 0 = style Faint $ color White ""
| otherwise = color Red "!"
timer = case pTaskTimestamp of
Nothing -> ""
Just TaskTimestamp{..} -> mconcat
[ color col "|"
, color Blue . show . timeSecondCeiling
$ ttTimeToCompletion -:- (posixTime -:- ttStart)
]
status = mconcat
[ if pCurrent == pTotal && pErrorsFixable == 0 && pErrorsUnfixable == 0
then style Faint $ color White ""
else ""
, if pErrorsFixable /= 0 then color Blue "!" else ""
, if pErrorsUnfixable /= 0 then color Red "!" else ""
]
timeSecondCeiling :: Time Second -> Time Second
timeSecondCeiling = sec . fromInteger . ceiling . unTime
-----------------------------------------------------------
-- Rewritable output
@ -113,8 +231,17 @@ allowRewrite enabled = bracket prepare erase
-- | Return caret and print the given text.
putTextRewrite :: MonadIO m => Rewrite -> Text -> m ()
putTextRewrite RewriteDisabled _ = pass
putTextRewrite (Rewrite RewriteCtx{..}) msg = do
liftIO $ hPutStr stderr ('\r' : toString msg)
liftIO $ hPutStr stderr ('\r' : toString msg ++ fill)
atomicModifyIORef' rMaxPrintedSize $ \maxPrinted ->
(max maxPrinted (length msg), ())
putTextRewrite RewriteDisabled _ = pass
where
-- | The maximum possible difference between two progress text representations,
-- including the timer & the status, is 9 characters. This is a temporary
-- solution to the problem of re-printing a smaller string on top of another
-- that'll leave some of the trailing characters in the original string
-- untouched, and is most likely going to be either replaced by an adequate
-- workaround or by another way to form a text representation of a progress and
-- its respective rewriting logic.
fill = replicate 9 ' '

View File

@ -11,16 +11,23 @@ module Xrefcheck.Util
, postfixFields
, (-:)
, aesonConfigOption
, normaliseWithNoTrailing) where
, normaliseWithNoTrailing
, posixTimeToTimeSecond
) where
import Universum
import Control.Lens (LensRules, lensField, lensRules, mappingNamer)
import Data.Aeson qualified as Aeson
import Data.Aeson.Casing (aesonPrefix, camelCase)
import Data.Fixed (Fixed (MkFixed), HasResolution (resolution))
import Data.Ratio ((%))
import Data.Time.Clock (nominalDiffTimeToSeconds)
import Data.Time.Clock.POSIX (POSIXTime)
import Fmt (Builder, build, fmt, nameF)
import System.Console.Pretty (Pretty (..), Style (Faint))
import System.FilePath (dropTrailingPathSeparator, normalise)
import Time (Second, Time (..), sec)
instance Pretty Builder where
colorize s c = build @Text . colorize s c . fmt
@ -47,3 +54,8 @@ aesonConfigOption = aesonPrefix camelCase
normaliseWithNoTrailing :: FilePath -> FilePath
normaliseWithNoTrailing = dropTrailingPathSeparator . normalise
posixTimeToTimeSecond :: POSIXTime -> Time Second
posixTimeToTimeSecond posixTime =
let picos@(MkFixed ps) = nominalDiffTimeToSeconds posixTime
in sec . fromRational $ ps % resolution picos

View File

@ -16,9 +16,14 @@ module Xrefcheck.Verify
, WithReferenceLoc (..)
-- * Concurrent traversal with caching
, NeedsCaching (..)
, forConcurrentlyCaching
-- * Cross-references validation
, VerifyError (..)
, verifyRepo
, verifyReference
, checkExternalResource
) where
@ -28,18 +33,22 @@ import Control.Concurrent.Async (wait, withAsync)
import Control.Exception (throwIO)
import Control.Monad.Except (MonadError (..))
import Data.ByteString qualified as BS
import Data.List qualified as L
import Data.Map qualified as M
import Data.Text qualified as T
import Data.Text.Metrics (damerauLevenshteinNorm)
import Data.Time.Clock.POSIX (getPOSIXTime)
import Data.Traversable (for)
import Fmt (Buildable (..), blockListF', listF, (+|), (|+))
import GHC.Exts qualified as Exts
import Network.FTP.Client
(FTPException (..), FTPResponse (..), ResponseStatus (..), login, nlst, size, withFTP, withFTPS)
import Network.HTTP.Client (HttpException (..), HttpExceptionContent (..), responseStatus)
import Network.HTTP.Client
(HttpException (..), HttpExceptionContent (..), Response, responseHeaders, responseStatus)
import Network.HTTP.Req
(AllowsBody, CanHaveBody (NoBody), GET (..), HEAD (..), HttpBodyAllowed, HttpException (..),
HttpMethod, NoReqBody (..), defaultHttpConfig, ignoreResponse, req, runReq, useURI)
import Network.HTTP.Types.Header (hRetryAfter)
import Network.HTTP.Types.Status (Status, statusCode, statusMessage)
import System.Console.Pretty (Style (..), style)
import System.Directory (canonicalizePath, doesDirectoryExist, doesFileExist)
@ -47,7 +56,7 @@ import System.FilePath (takeDirectory, (</>), normalise)
import System.FilePath.Glob qualified as Glob
import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (Authority (..), URI (..), mkURI)
import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout)
import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+))
import Data.Bits (toIntegralSized)
import Xrefcheck.Config
@ -55,6 +64,7 @@ import Xrefcheck.Core
import Xrefcheck.Orphans ()
import Xrefcheck.Progress
import Xrefcheck.System
import Xrefcheck.Util
{-# ANN module ("HLint: ignore Use uncurry" :: Text) #-}
{-# ANN module ("HLint: ignore Use 'runExceptT' from Universum" :: Text) #-}
@ -108,6 +118,7 @@ data VerifyError
| ExternalResourceInvalidUrl (Maybe Text)
| ExternalResourceUnknownProtocol
| ExternalHttpResourceUnavailable Status
| ExternalHttpTooManyRequests (Time Second)
| ExternalFtpResourceUnavailable FTPResponse
| ExternalFtpException FTPException
| FtpEntryDoesNotExist FilePath
@ -147,6 +158,10 @@ instance Buildable VerifyError where
"⛂ Resource unavailable (" +| statusCode status |+ " " +|
decodeUtf8 @Text (statusMessage status) |+ ")\n"
ExternalHttpTooManyRequests retryAfter ->
"⛂ Resource unavailable (429 Too Many Requests; retry after " +|
show @Text retryAfter |+ ")\n"
ExternalFtpResourceUnavailable response ->
"⛂ Resource unavailable:\n" +| response |+ "\n"
@ -164,6 +179,11 @@ instance Buildable VerifyError where
[h] -> ",\n did you mean " +| h |+ "?\n"
hs -> ", did you mean:\n" +| blockListF' " -" build hs
-- | Determine whether the verification result contains a fixable error.
isFixable :: VerifyError -> Bool
isFixable (ExternalHttpTooManyRequests _) = True
isFixable _ = False
data NeedsCaching key
= NoCaching
| CacheUnderKey key
@ -223,7 +243,14 @@ verifyRepo
return $ fold accumulated
where
printer progressRef = forever $ do
readIORef progressRef >>= reprintAnalyseProgress rw mode
posixTime <- getPOSIXTime <&> posixTimeToTimeSecond
progress <- atomicModifyIORef' progressRef $ \VerifyProgress{..} ->
let prog = VerifyProgress{ vrExternal =
checkTaskTimestamp posixTime vrExternal
, ..
}
in (prog, prog)
reprintAnalyseProgress rw mode posixTime progress
threadDelay (ms 100)
ifExternalThenCache (_, Reference{..}) = case locationType rLink of
@ -253,32 +280,98 @@ verifyReference
root
fileWithReference
ref@Reference{..}
= do
let locType = locationType rLink
if shouldCheckLocType mode locType
then do
res <- case locType of
LocalLoc -> checkRef rAnchor fileWithReference
RelativeLoc -> checkRef rAnchor
(takeDirectory fileWithReference
</> toString (canonizeLocalRef rLink))
AbsoluteLoc -> checkRef rAnchor (root <> toString rLink)
ExternalLoc -> checkExternalResource config rLink
OtherLoc -> verifying pass
let moveProgress = incProgress .
(if verifyOk res then id else incProgressErrors)
atomicModifyIORef' progressRef $ \VerifyProgress{..} ->
( if isExternal locType
then VerifyProgress{ vrExternal = moveProgress vrExternal, .. }
else VerifyProgress{ vrLocal = moveProgress vrLocal, .. }
, ()
)
return $ fmap (WithReferenceLoc fileWithReference ref) res
else return mempty
= retryVerification 0 $ do
let locType = locationType rLink
if shouldCheckLocType mode locType
then case locType of
LocalLoc -> checkRef rAnchor fileWithReference
RelativeLoc -> checkRef rAnchor
(takeDirectory fileWithReference
</> toString (canonizeLocalRef rLink))
AbsoluteLoc -> checkRef rAnchor (root <> toString rLink)
ExternalLoc -> checkExternalResource config rLink
OtherLoc -> verifying pass
else return mempty
where
retryVerification
:: Int
-> IO (VerifyResult VerifyError)
-> IO (VerifyResult $ WithReferenceLoc VerifyError)
retryVerification numberOfRetries resIO = do
res@(VerifyResult ves) <- resIO
let toRetry = any isFixable ves && numberOfRetries < maxRetries
currentRetryAfter = extractRetryAfterInfo res
let moveProgress = alterOverallProgress numberOfRetries
. alterProgressErrors res numberOfRetries
posixTime' <- getPOSIXTime
let posixTime = posixTimeToTimeSecond posixTime'
atomicModifyIORef' progressRef $ \VerifyProgress{..} ->
( if isExternal $ locationType rLink
then VerifyProgress{ vrExternal =
let vrExternalAdvanced = moveProgress vrExternal
in if toRetry
then case pTaskTimestamp vrExternal of
Just (TaskTimestamp ttc start)
| currentRetryAfter +:+ posixTime <= ttc +:+ start -> vrExternalAdvanced
_ -> setTaskTimestamp currentRetryAfter posixTime vrExternalAdvanced
else vrExternalAdvanced, .. }
else VerifyProgress{ vrLocal = moveProgress vrLocal, .. }
, ()
)
if toRetry
then do
threadDelay currentRetryAfter
retryVerification (numberOfRetries + 1) resIO
else return $ fmap (WithReferenceLoc fileWithReference ref) res
alterOverallProgress
:: (Num a)
=> Int
-> Progress a
-> Progress a
alterOverallProgress retryNumber
| retryNumber > 0 = id
| otherwise = incProgress
alterProgressErrors
:: (Num a)
=> VerifyResult VerifyError
-> Int
-> Progress a
-> Progress a
alterProgressErrors res@(VerifyResult ves) retryNumber
| maxRetries == 0 =
if ok then id
else incProgressUnfixableErrors
| retryNumber == 0 =
if ok then id
else if fixable then incProgressFixableErrors
else incProgressUnfixableErrors
| retryNumber == maxRetries =
if ok then decProgressFixableErrors
else fixableToUnfixable
-- 0 < retryNumber < maxRetries
| otherwise =
if ok then decProgressFixableErrors
else if fixable then id
else fixableToUnfixable
where
ok = verifyOk res
fixable = any isFixable ves
extractRetryAfterInfo :: VerifyResult VerifyError -> Time Second
extractRetryAfterInfo = \case
VerifyResult [ExternalHttpTooManyRequests retryAfter] -> retryAfter
_ -> vcDefaultRetryAfter
-- | Maximum number of retries available until a fixable error becomes unfixable.
-- Soon to become a configurable value.
maxRetries = 20
checkRef mAnchor referredFile = verifying $ do
checkReferredFileExists referredFile
case M.lookup referredFile repoInfo of
@ -358,8 +451,9 @@ checkExternalResource VerifyConfig{..} link
Left _ -> False
checkHttp :: URI -> ExceptT VerifyError IO ()
checkHttp uri = makeHttpRequest uri HEAD 0.3 `catchError` \_ ->
makeHttpRequest uri GET 0.7
checkHttp uri = makeHttpRequest uri HEAD 0.3 `catchError` \case
e | isFixable e -> throwError e
_ -> makeHttpRequest uri GET 0.7
makeHttpRequest
:: (HttpMethod method, HttpBodyAllowed (AllowsBody method) 'NoBody)
@ -404,8 +498,18 @@ checkExternalResource VerifyConfig{..} link
HttpExceptionRequest _ exc -> case exc of
StatusCodeException resp _
| isAllowedErrorCode (statusCode $ responseStatus resp) -> Right ()
| otherwise -> Left $ ExternalHttpResourceUnavailable (responseStatus resp)
| otherwise -> case statusCode (responseStatus resp) of
429 -> Left . ExternalHttpTooManyRequests $ retryAfterInfo resp
_ -> Left . ExternalHttpResourceUnavailable $ responseStatus resp
other -> Left . ExternalResourceSomeError $ show other
where
retryAfterInfo :: Response a -> Time Second
retryAfterInfo response =
fromMaybe vcDefaultRetryAfter $ do
retryAfterByteString <- L.lookup hRetryAfter $ responseHeaders response
retryAfterString <- rightToMaybe $ decodeUtf8Strict @String retryAfterByteString
retryAfter <- readMaybe @Natural retryAfterString
pure . sec $ fromIntegral @Natural @RatioNat retryAfter
checkFtp :: URI -> Bool -> ExceptT VerifyError IO ()
checkFtp uri secure = do

View File

@ -0,0 +1,166 @@
{- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}
module Test.Xrefcheck.TooManyRequestsSpec where
import Universum
import Control.Concurrent (forkIO, killThread)
import Control.Exception qualified as E
import Data.CaseInsensitive qualified as CI
import Data.Map qualified as M
import Data.Time.Clock.POSIX (getPOSIXTime)
import Fmt (indentF, pretty, unlinesF)
import Network.HTTP.Types (Status (..), ok200, serviceUnavailable503, tooManyRequests429)
import Network.HTTP.Types.Header (hRetryAfter)
import Test.Hspec (Spec, describe, it, shouldBe)
import Test.HUnit (assertBool)
import Time (sec, (-:-))
import Web.Firefly (ToResponse (toResponse), route, run, getMethod)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.Util
import Xrefcheck.Verify
spec :: Spec
spec = do
describe "429 response tests" $ do
it "Returns 200 eventually" $ do
let prog = Progress{ pTotal = 1
, pCurrent = 1
, pErrorsUnfixable = 0
, pErrorsFixable = 0
, pTaskTimestamp = Nothing
}
checkLinkAndProgressWithServer (mock429 "1" ok200)
"http://127.0.0.1:5000/429" prog $ VerifyResult []
it "Returns 503 eventually" $ do
let prog = Progress{ pTotal = 1
, pCurrent = 1
, pErrorsUnfixable = 1
, pErrorsFixable = 0
, pTaskTimestamp = Nothing
}
checkLinkAndProgressWithServer (mock429 "1" serviceUnavailable503)
"http://127.0.0.1:5000/429" prog $ VerifyResult
[ ExternalHttpResourceUnavailable $
Status { statusCode = 503, statusMessage = "Service Unavailable"}
]
it "Successfully updates the new retry-after value" $ do
E.bracket (forkIO $ mock429 "2" ok200) killThread $ \_ -> do
now <- getPOSIXTime <&> posixTimeToTimeSecond
progressRef <- newIORef VerifyProgress
{ vrLocal = initProgress 0
, vrExternal = Progress
{ pTotal = 2
, pCurrent = 1
, pErrorsUnfixable = 0
, pErrorsFixable = 0
, pTaskTimestamp = Just (TaskTimestamp (sec 3) (now -:- sec 1.5))
}
}
_ <- verifyReferenceWithProgress
(Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing))
progressRef
Progress{..} <- vrExternal <$> readIORef progressRef
let ttc = ttTimeToCompletion <$> pTaskTimestamp
flip assertBool (ttc == Just (sec 2)) $
"Expected time to completion be equal to " ++ show (Just $ sec 2) ++
", but instead it's " ++ show ttc
it "The GET request should not be attempted after catching a 429" $ do
let
mock429WithGlobalIORef :: IORef [(Text, Status)] -> IO ()
mock429WithGlobalIORef infoReverseAccumulatorRef = do
callCountRef <- newIORef @_ @Int 0
run 5000 $ do
route "/429grandfinale" $ do
m <- getMethod
callCount <- atomicModifyIORef' callCountRef $ \cc -> (cc + 1, cc)
atomicModifyIORef' infoReverseAccumulatorRef $ \lst ->
( ( m
, if | m == "GET" -> ok200
| callCount == 0 -> tooManyRequests429
| otherwise -> serviceUnavailable503
) : lst
, ()
)
pure $ if
| m == "GET" -> toResponse ("" :: Text, ok200)
| callCount == 0 -> toResponse
( "" :: Text
, tooManyRequests429
, M.fromList [(CI.map (decodeUtf8 @Text) hRetryAfter, ["1" :: Text])]
)
| otherwise -> toResponse ("" :: Text, serviceUnavailable503)
infoReverseAccumulatorRef <- newIORef []
E.bracket (forkIO $ mock429WithGlobalIORef infoReverseAccumulatorRef) killThread $ \_ -> do
_ <- verifyLink "http://127.0.0.1:5000/429grandfinale"
infoReverseAccumulator <- readIORef infoReverseAccumulatorRef
reverse infoReverseAccumulator `shouldBe`
[ ("HEAD", tooManyRequests429)
, ("HEAD", serviceUnavailable503)
, ("GET", ok200)
]
where
checkLinkAndProgressWithServer mock link progress vrExpectation =
E.bracket (forkIO mock) killThread $ \_ -> do
(result, progRes) <- verifyLink link
flip assertBool (result == vrExpectation) . pretty $ unlinesF
[ "Verification results differ: expected"
, indentF 2 (show vrExpectation)
, "but got"
, indentF 2 (show result)
]
flip assertBool (progRes `progEquiv` progress) . pretty $ unlinesF
[ "Expected the progress bar state to be"
, indentF 2 (show progress)
, "but got"
, indentF 2 (show progRes)
]
where
-- | Check whether the two @Progress@ values are equal up to similarity of their essential
-- components, ignoring the comparison of @pTaskTimestamp@s, which is done to prevent test
-- failures when comparing the resulting progress, gotten from running the link
-- verification algorithm, with the expected one, where @pTaskTimestamp@ is hardcoded
-- as @Nothing@.
progEquiv :: Eq a => Progress a -> Progress a -> Bool
progEquiv p1 p2 = and [ ((==) `on` pCurrent) p1 p2
, ((==) `on` pTotal) p1 p2
, ((==) `on` pErrorsUnfixable) p1 p2
, ((==) `on` pErrorsFixable) p1 p2
]
verifyLink :: Text -> IO (VerifyResult VerifyError, Progress Int)
verifyLink link = do
let reference = Reference "" link Nothing (Position Nothing)
progRef <- newIORef $ initVerifyProgress [reference]
result <- verifyReferenceWithProgress reference progRef
progress <- readIORef progRef
return (result, vrExternal progress)
verifyReferenceWithProgress :: Reference -> IORef VerifyProgress -> IO (VerifyResult VerifyError)
verifyReferenceWithProgress reference progRef = do
fmap wrlItem <$> verifyReference
((cVerification $ defConfig GitHub) { vcCheckLocalhost = True }) FullMode
progRef (RepoInfo M.empty) "." "" reference
-- | When called for the first time, returns with a 429 and `Retry-After: @retryAfter@`.
-- Subsequent calls will respond with @status@.
mock429 :: Text -> Status -> IO ()
mock429 retryAfter status = do
callCountRef <- newIORef @_ @Int 0
run 5000 $
route "/429" $ do
callCount <- atomicModifyIORef' callCountRef $ \cc -> (cc + 1, cc)
pure $
if callCount == 0
then toResponse
( "" :: Text
, tooManyRequests429
, M.fromList [(CI.map (decodeUtf8 @Text) hRetryAfter, [retryAfter])]
)
else toResponse ("" :: Text, status)

View File

@ -45,6 +45,11 @@ verification:
# Skip links which return 403 or 401 code.
ignoreAuthFailures: true
# When a verification result is a "429 Too Many Requests" response
# and it does not contain a "Retry-After" header,
# wait this amount of time before attempting to verify the link again.
defaultRetryAfter: 30s
# Parameters of scanners for various file types.
scanners:
markdown:

View File

@ -13,6 +13,8 @@ verification:
ignoreRefs: []
checkLocalhost: false
ignoreAuthFailures: true
defaultRetryAfter: 30s
maxRetries: 3
scanners:
markdown:

View File

@ -13,6 +13,7 @@ verification:
ignoreRefs: []
checkLocalhost: false
ignoreAuthFailures: true
defaultRetryAfter: 30s
scanners:
markdown:

View File

@ -13,6 +13,7 @@ verification:
ignoreRefs: []
checkLocalhost: true
ignoreAuthFailures: true
defaultRetryAfter: 30s
scanners:
markdown: