[#217] Retry on response timeout (#234)

Problem: Currently, getting response timeout immediately results in
fail, it's desired to have a possibility to configure retries on
timeouts.

Solution: The new ExternalHttpTimeout error is added, which is treated
in a similar way as the ExternalHttpTooManyRequests error.
A new field is added to the config meaning how many timeouts are
allowed. Default value equals to 1.
This commit is contained in:
YuriRomanowski 2022-12-29 21:59:48 +05:00 committed by GitHub
parent 9213017e60
commit a4dc29bf2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 538 additions and 177 deletions

View File

@ -12,7 +12,6 @@
- ignore: { name: Redundant do }
- ignore: { name: Redundant bracket }
- ignore: { name: Redundant lambda }
- ignore: { name: Redundant $ }
- ignore: { name: Redundant flip }
- ignore: { name: Move brackets to avoid $ }
- ignore: { name: Avoid lambda using `infix` }

View File

@ -18,8 +18,7 @@ import Test.Tasty.Options as Tasty (IsOption (..), OptionDescription (Option), s
import Xrefcheck.Config (Config, cExclusionsL, defConfig)
import Xrefcheck.Core (Flavor (GitHub))
import Xrefcheck.Scan (ecIgnoreExternalRefsToL)
import Xrefcheck.Verify
(VerifyError (..), VerifyResult (VerifyResult), checkExternalResource, verifyErrors)
import Xrefcheck.Verify (VerifyError (..), checkExternalResource)
-- | A list with all the options needed to configure FTP links tests.
ftpOptions :: [OptionDescription]
@ -49,36 +48,34 @@ test_FtpLinks = askOption $ \(FtpHostOpt host) -> do
testGroup "Ftp links handler"
[ testCase "handles correct link to file" $ do
let link = host <> "/pub/file_exists.txt"
result <- checkExternalResource config link
result @?= VerifyResult []
result <- runExceptT $ checkExternalResource config link
result @?= Right ()
, testCase "handles empty link (host only)" $ do
let link = host
result <- checkExternalResource config link
result @?= VerifyResult []
result <- runExceptT $ checkExternalResource config link
result @?= Right ()
, testCase "handles correct link to non empty directory" $ do
let link = host <> "/pub/"
result <- checkExternalResource config link
result @?= VerifyResult []
result <- runExceptT $ checkExternalResource config link
result @?= Right ()
, testCase "handles correct link to empty directory" $ do
let link = host <> "/empty/"
result <- checkExternalResource config link
result @?= VerifyResult []
result <- runExceptT $ checkExternalResource config link
result @?= Right ()
, testCase "throws exception when file not found" $ do
let link = host <> "/pub/file_does_not_exists.txt"
result <- checkExternalResource config link
case verifyErrors result of
Nothing ->
result <- runExceptT $ checkExternalResource config link
case result of
Right () ->
assertFailure "No exception was raised, FtpEntryDoesNotExist expected"
Just errors ->
assertBool "Expected FtpEntryDoesNotExist, got other exceptions"
(any (
\case
Left err ->
assertBool "Expected FtpEntryDoesNotExist, got other exceptions" $
case err of
FtpEntryDoesNotExist _ -> True
ExternalFtpException _ -> True
_ -> False
) $ toList errors)
]

View File

@ -126,7 +126,7 @@ filepathOption :: Mod OptionFields FilePath -> Parser FilePath
filepathOption = strOption
globOption :: Mod OptionFields RelGlobPattern -> Parser RelGlobPattern
globOption = option $ eitherReader $ mkGlobPattern
globOption = option $ eitherReader mkGlobPattern
repoTypeReadM :: ReadM RepoType
repoTypeReadM = eitherReader $ \name ->

View File

@ -79,7 +79,7 @@ defaultAction Options{..} = do
#{interpolateIndentF 2 (build repoInfo)}
|]
whenJust (nonEmpty $ sortBy (compare `on` seFile) scanErrs) $ reportScanErrs
whenJust (nonEmpty $ sortBy (compare `on` seFile) scanErrs) reportScanErrs
verifyRes <- allowRewrite showProgressBar $ \rw -> do
let fullConfig = config

View File

@ -54,6 +54,30 @@ data NetworkingConfig' f = NetworkingConfig
, ncMaxRetries :: Field f Int
-- ^ How many attempts to retry an external link after getting
-- a "429 Too Many Requests" response.
-- Timeouts may also be accounted here, see the description
-- of `maxTimeoutRetries` field.
--
-- If a site once responded with 429 error code, subsequent
-- request timeouts will also be treated as hitting the site's
-- rate limiter and result in retry attempts, unless the
-- maximum retries number has been reached.
--
-- On other errors xrefcheck fails immediately, without retrying.
, ncMaxTimeoutRetries :: Field f Int
-- ^ Querying a given domain that ever returned 429 before,
-- this defines how many timeouts are allowed during retries.
--
-- For such domains, timeouts likely mean hitting the rate limiter,
-- and so xrefcheck considers timeouts in the same way as 429 errors.
--
-- For other domains, a timeout results in a respective error, no retry
-- attempts will be performed. Use `externalRefCheckTimeout` option
-- to increase the time after which timeout is declared.
--
-- This option is similar to `maxRetries`, the difference is that
-- this `maxTimeoutRetries` option limits only the number of retries
-- caused by timeouts, and `maxRetries` limits the number of retries
-- caused both by 429s and timeouts.
} deriving stock (Generic)
-- | Type alias for ScannersConfig' with all required fields.
@ -113,6 +137,7 @@ overrideConfig config
, ncIgnoreAuthFailures = overrideField ncIgnoreAuthFailures
, ncDefaultRetryAfter = overrideField ncDefaultRetryAfter
, ncMaxRetries = overrideField ncMaxRetries
, ncMaxTimeoutRetries = overrideField ncMaxTimeoutRetries
}
where
overrideField :: (forall f. NetworkingConfig' f -> Field f a) -> a

View File

@ -54,8 +54,33 @@ networking:
# How many attempts to retry an external link after getting
# a "429 Too Many Requests" response.
# Timeouts may also be accounted here, see the description
# of `maxTimeoutRetries` field.
# If a site once responded with 429 error code, subsequent
# request timeouts will also be treated as hitting the site's
# rate limiter and result in retry attempts, unless the
# maximum retries number has been reached.
#
# On other errors xrefcheck fails immediately, without retrying.
maxRetries: 3
# Querying a given domain that ever returned 429 before,
# this defines how many timeouts are allowed during retries.
#
# For such domains, timeouts likely mean hitting the rate limiter,
# and so xrefcheck considers timeouts in the same way as 429 errors.
#
# For other domains, a timeout results in a respective error, no retry
# attempts will be performed. Use `externalRefCheckTimeout` option
# to increase the time after which timeout is declared.
#
# This option is similar to `maxRetries`, the difference is that
# this `maxTimeoutRetries` option limits only the number of retries
# caused by timeouts, and `maxRetries` limits the number of retries
# caused both by 429s and timeouts.
maxTimeoutRetries: 1
# Parameters of scanners for various file types.
scanners:
# On 'anchor not found' error, how much similar anchors should be displayed as

View File

@ -239,7 +239,7 @@ scanRepo scanMode rw formatsSupport config root = do
return . ScanResult errs $ RepoInfo
{ riFiles = M.fromList $ processedFiles <> notProcessedFiles
, riDirectories = M.fromList $ (fmap (, TrackedDirectory) trackedDirs
, riDirectories = M.fromList (fmap (, TrackedDirectory) trackedDirs
<> fmap (, UntrackedDirectory) untrackedDirs)
, riRoot = canonicalRoot
}

View File

@ -57,11 +57,11 @@ toPosition :: Maybe PosInfo -> Position
toPosition = Position . \case
Nothing -> Nothing
Just PosInfo{..}
| startLine == endLine -> Just $
| startLine == endLine -> Just
[int|s|
#{startLine}:#{startColumn}-#{endColumn}
|]
| otherwise -> Just $
| otherwise -> Just
[int|s|
#{startLine}:#{startColumn}-#{endLine}:#{endColumn}
|]

View File

@ -9,8 +9,11 @@ module Xrefcheck.Util
, postfixFields
, (-:)
, aesonConfigOption
, composeFuncList
, posixTimeToTimeSecond
, utcTimeToTimeSecond
, unlessFunc
, whenFunc
, module Xrefcheck.Util.Colorize
, module Xrefcheck.Util.Interpolate
) where
@ -59,3 +62,13 @@ posixTimeToTimeSecond posixTime =
utcTimeToTimeSecond :: UTCTime -> Time Second
utcTimeToTimeSecond = posixTimeToTimeSecond . utcTimeToPOSIXSeconds
composeFuncList :: [a -> a] -> a -> a
composeFuncList = foldr (.) id
whenFunc :: Bool -> (a -> a) -> (a -> a)
whenFunc True f = f
whenFunc False _ = id
unlessFunc :: Bool -> (a -> a) -> (a -> a)
unlessFunc = whenFunc . not

View File

@ -9,7 +9,6 @@
module Xrefcheck.Verify
( -- * General verification
VerifyResult (..)
, verifyOk
, verifyErrors
, verifying
@ -21,6 +20,7 @@ module Xrefcheck.Verify
, forConcurrentlyCaching
-- * Cross-references validation
, DomainName (..)
, VerifyError (..)
, verifyRepo
, verifyReference
@ -43,6 +43,7 @@ import Data.List (lookup)
import Data.List qualified as L
import Data.Map qualified as M
import Data.Reflection (Given)
import Data.Set qualified as S
import Data.Text (toCaseFold)
import Data.Text.Metrics (damerauLevenshteinNorm)
import Data.Time (UTCTime, defaultTimeLocale, formatTime, readPTime, rfc822DateFormat)
@ -66,7 +67,7 @@ import System.FilePath.Posix ((</>))
import Text.Interpolation.Nyan
import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift)
import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (Authority (..), ParseExceptionBs, URI (..), mkURIBs)
import Text.URI (Authority (..), ParseExceptionBs, URI (..), mkURIBs, unRText)
import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+), (-:-))
import URI.ByteString qualified as URIBS
@ -92,9 +93,6 @@ newtype VerifyResult e = VerifyResult [e]
deriving newtype instance Semigroup (VerifyResult e)
deriving newtype instance Monoid (VerifyResult e)
verifyOk :: VerifyResult e -> Bool
verifyOk (VerifyResult errors) = null errors
verifyErrors :: VerifyResult e -> Maybe (NonEmpty e)
verifyErrors (VerifyResult errors) = nonEmpty errors
@ -121,6 +119,13 @@ instance (Given ColorMode, Buildable a) => Buildable (WithReferenceLoc a) where
#{wrlItem}
|]
-- | Contains a name of a domain, examples:
-- @DomainName "github.com"@,
-- @DomainName "localhost"@,
-- @DomainName "192.168.0.104"@
newtype DomainName = DomainName { unDomainName :: Text }
deriving stock (Show, Eq, Ord)
data VerifyError
= LocalFileDoesNotExist FilePath
| LocalFileOutsideRepo FilePath
@ -132,7 +137,8 @@ data VerifyError
| ExternalResourceInvalidUrl (Maybe Text)
| ExternalResourceUnknownProtocol
| ExternalHttpResourceUnavailable Status
| ExternalHttpTooManyRequests (Maybe RetryAfter)
| ExternalHttpTooManyRequests (Maybe RetryAfter) (Maybe DomainName)
| ExternalHttpTimeout (Maybe DomainName)
| ExternalFtpResourceUnavailable FTPResponse
| ExternalFtpException FTPException
| FtpEntryDoesNotExist FilePath
@ -214,11 +220,16 @@ instance Given ColorMode => Buildable VerifyError where
Resource unavailable (#{statusCode status} #{decodeUtf8 @Text (statusMessage status)})
|]
ExternalHttpTooManyRequests retryAfter ->
ExternalHttpTooManyRequests retryAfter _ ->
[int||
Resource unavailable (429 Too Many Requests; retry after #{maybeF retryAfter})
|]
ExternalHttpTimeout _ ->
[int||
Response timeout
|]
ExternalFtpResourceUnavailable response ->
[int||
Resource unavailable:
@ -255,6 +266,21 @@ instance Given ColorMode => Buildable VerifyError where
#{redirectedUrl}
|]
data RetryCounter = RetryCounter
{ rcTotalRetries :: Int
, rcTimeoutRetries :: Int
} deriving stock (Show)
errorsOccured :: RetryCounter -> Bool
errorsOccured rc = rcTotalRetries rc > 0
incTotalCounter :: RetryCounter -> RetryCounter
incTotalCounter rc = rc {rcTotalRetries = rcTotalRetries rc + 1}
incTimeoutCounter :: RetryCounter -> RetryCounter
incTimeoutCounter rc = rc {rcTimeoutRetries = rcTimeoutRetries rc + 1}
reportVerifyErrs
:: Given ColorMode => NonEmpty (WithReferenceLoc VerifyError) -> IO ()
reportVerifyErrs errs = fmt
@ -279,11 +305,6 @@ instance Buildable RetryAfter where
fromString $ formatTime defaultTimeLocale rfc822DateFormat d
build (Seconds s) = nameF "seconds" $ show s
-- | Determine whether the verification result contains a fixable error.
isFixable :: VerifyError -> Bool
isFixable (ExternalHttpTooManyRequests _) = True
isFixable _ = False
data NeedsCaching key
= NoCaching
| CacheUnderKey key
@ -373,10 +394,10 @@ verifyRepo
-- added to Git while gathering RepoInfo.
progressRef <- newIORef $ initVerifyProgress (map snd toScan)
domainsReturned429Ref <- newIORef S.empty
accumulated <- loopAsyncUntil (printer progressRef) do
forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) ->
verifyReference config mode progressRef repoInfo file ref
verifyReference config mode domainsReturned429Ref progressRef repoInfo file ref
case accumulated of
Right res -> return $ fold res
Left (exception, partialRes) -> do
@ -384,7 +405,7 @@ verifyRepo
let errs = verifyErrors (fold partialRes)
total = length toScan
checked = length partialRes
whenJust errs $ reportVerifyErrs
whenJust errs reportVerifyErrs
fmt [int|A|
Interrupted (#s{exception}), checked #{checked} out of #{total} references.
|]
@ -419,6 +440,7 @@ shouldCheckLocType mode locType
verifyReference
:: Config
-> VerifyMode
-> IORef (S.Set DomainName)
-> IORef VerifyProgress
-> RepoInfo
-> CanonicalPath
@ -427,109 +449,132 @@ verifyReference
verifyReference
config@Config{..}
mode
domainsReturned429Ref
progressRef
repoInfo@RepoInfo{..}
file
ref@Reference{..}
= retryVerification 0 $
= fmap (fmap addReference . toVerifyRes) $
retryVerification (RetryCounter 0 0) $ runExceptT $
if shouldCheckLocType mode rInfo
then case rInfo of
RIFileLocal -> checkRef rAnchor riRoot file ""
RIFileRelative -> do
let shownFilepath = getPosixRelativeOrAbsoluteChild riRoot (takeDirectory file)
</> toString rLink
canonicalPath <- takeDirectory file </ toString rLink
canonicalPath <- liftIO $ takeDirectory file </ toString rLink
checkRef rAnchor riRoot canonicalPath shownFilepath
RIFileAbsolute -> do
let shownFilepath = dropWhile isPathSeparator (toString rLink)
canonicalPath <- riRoot </ shownFilepath
canonicalPath <- liftIO $ riRoot </ shownFilepath
checkRef rAnchor riRoot canonicalPath shownFilepath
RIExternal -> checkExternalResource config rLink
RIOtherProtocol -> verifying pass
else return mempty
RIOtherProtocol -> pass
else pass
where
addReference :: VerifyError -> WithReferenceLoc VerifyError
addReference = WithReferenceLoc (getPosixRelativeOrAbsoluteChild riRoot file) ref
retryVerification
:: Int
-> IO (VerifyResult VerifyError)
-> IO (VerifyResult $ WithReferenceLoc VerifyError)
retryVerification numberOfRetries resIO = do
res@(VerifyResult ves) <- resIO
:: RetryCounter
-> IO (Either VerifyError ())
-> IO (Either VerifyError ())
retryVerification rc resIO = do
let errorsOccured_ = errorsOccured rc
res <- resIO
now <- getPOSIXTime <&> posixTimeToTimeSecond
case res of
-- Success
Right () -> do
let moveProgressOnSuccess =
if errorsOccured_
then decProgressFixableErrors
else incProgress
modifyProgressRef now Nothing moveProgressOnSuccess
pure res
Left err -> do
setOfReturned429 <- addDomainIf429 domainsReturned429Ref err
case decideWhetherToRetry setOfReturned429 rc err of
-- Unfixable
Nothing -> do
let moveProgressOnUnfixable = composeFuncList
[ unlessFunc errorsOccured_ incProgress
, if errorsOccured_
then fixableToUnfixable
else incProgressUnfixableErrors
]
modifyProgressRef now Nothing moveProgressOnUnfixable
pure res
-- Fixable, retry
Just (mbCurrentRetryAfter, counterModifier) -> do
let toSeconds = \case
Seconds s -> s
-- Calculates the seconds left until @Retry-After@ date.
-- Defaults to 0 if the date has already passed.
Date date | utcTimeToTimeSecond date >= now -> utcTimeToTimeSecond date -:- now
_ -> sec 0
let toSeconds = \case
Seconds s -> s
-- Calculates the seconds left until @Retry-After@ date.
-- Defaults to 0 if the date has already passed.
Date date | utcTimeToTimeSecond date >= now -> utcTimeToTimeSecond date -:- now
_ -> sec 0
let currentRetryAfter = fromMaybe (ncDefaultRetryAfter cNetworking) $
fmap toSeconds mbCurrentRetryAfter
let toRetry = any isFixable ves && numberOfRetries < ncMaxRetries cNetworking
currentRetryAfter = fromMaybe (ncDefaultRetryAfter cNetworking) $
extractRetryAfterInfo res <&> toSeconds
let moveProgressOnFixable = whenFunc (not errorsOccured_) $
composeFuncList
[ incProgressFixableErrors
, incProgress
]
modifyProgressRef now (Just currentRetryAfter) moveProgressOnFixable
threadDelay currentRetryAfter
retryVerification
(counterModifier rc)
resIO
let moveProgress = alterOverallProgress numberOfRetries
. alterProgressErrors res numberOfRetries
modifyProgressRef :: Time Second -> Maybe (Time Second) -> (Progress Int -> Progress Int) -> IO ()
modifyProgressRef now mbRetryAfter moveProgress = atomicModifyIORef' progressRef $ \VerifyProgress{..} ->
( if isExternal rInfo
then VerifyProgress{ vrExternal =
let vrExternalAdvanced = moveProgress vrExternal
in case mbRetryAfter of
Just retryAfter -> case pTaskTimestamp vrExternal of
Just (TaskTimestamp ttc start)
| retryAfter +:+ now <= ttc +:+ start -> vrExternalAdvanced
_ -> setTaskTimestamp retryAfter now vrExternalAdvanced
Nothing -> vrExternalAdvanced, .. }
else VerifyProgress{ vrLocal = moveProgress vrLocal, .. }
, ()
)
atomicModifyIORef' progressRef $ \VerifyProgress{..} ->
( if isExternal rInfo
then VerifyProgress{ vrExternal =
let vrExternalAdvanced = moveProgress vrExternal
in if toRetry
then case pTaskTimestamp vrExternal of
Just (TaskTimestamp ttc start)
| currentRetryAfter +:+ now <= ttc +:+ start -> vrExternalAdvanced
_ -> setTaskTimestamp currentRetryAfter now vrExternalAdvanced
else vrExternalAdvanced, .. }
else VerifyProgress{ vrLocal = moveProgress vrLocal, .. }
, ()
)
if toRetry
then do
threadDelay currentRetryAfter
retryVerification (numberOfRetries + 1) resIO
else return . (<$> res) $
WithReferenceLoc (getPosixRelativeOrAbsoluteChild riRoot file) ref
addDomainIf429 :: IORef (S.Set DomainName) -> VerifyError -> IO (S.Set DomainName)
addDomainIf429 setRef err = atomicModifyIORef' setRef $ \s ->
(\x -> (x, x)) $ case err of
ExternalHttpTooManyRequests _ mbDomain ->
maybe s (flip S.insert s) mbDomain
_ -> s
alterOverallProgress
:: (Num a)
=> Int
-> Progress a
-> Progress a
alterOverallProgress retryNumber
| retryNumber > 0 = id
| otherwise = incProgress
decideWhetherToRetry
:: S.Set DomainName
-> RetryCounter
-> VerifyError
-> Maybe (Maybe RetryAfter, RetryCounter -> RetryCounter)
decideWhetherToRetry setOfReturned429 rc = \case
ExternalHttpTooManyRequests retryAfter _
| totalRetriesNotExceeded -> Just (retryAfter, incTotalCounter)
ExternalHttpTimeout (Just domain)
| totalRetriesNotExceeded && timeoutRetriesNotExceeded ->
-- If a given domain ever returned 429 error, we assume that getting timeout from
-- the domain can be considered as a 429-like error, and hence we retry.
-- If there was no 429 responses from this domain, then getting timeout from
-- it probably means that this site is not working at all.
-- Also, there always remains a possibility that we just didn't get the response
-- in time, but we can't avoid this case here, the only thing that can help
-- is to increase the allowed timeout in the config.
alterProgressErrors
:: (Num a)
=> VerifyResult VerifyError
-> Int
-> Progress a
-> Progress a
alterProgressErrors res@(VerifyResult ves) retryNumber
| (ncMaxRetries cNetworking) == 0 =
if ok then id
else incProgressUnfixableErrors
| retryNumber == 0 =
if ok then id
else if fixable then incProgressFixableErrors
else incProgressUnfixableErrors
| retryNumber == (ncMaxRetries cNetworking) =
if ok then decProgressFixableErrors
else fixableToUnfixable
-- 0 < retryNumber < ncMaxRetries
| otherwise =
if ok then decProgressFixableErrors
else if fixable then id
else fixableToUnfixable
where
ok = verifyOk res
fixable = any isFixable ves
extractRetryAfterInfo :: VerifyResult VerifyError -> Maybe RetryAfter
extractRetryAfterInfo = \case
VerifyResult [ExternalHttpTooManyRequests retryAfter] -> retryAfter
if S.member domain setOfReturned429
then Just (Just (Seconds $ sec 0), incTimeoutCounter . incTotalCounter)
else Nothing
_ -> Nothing
where
totalRetriesNotExceeded = rcTotalRetries rc < ncMaxRetries cNetworking
timeoutRetriesNotExceeded = rcTimeoutRetries rc < ncMaxTimeoutRetries cNetworking
isVirtual canonicalRoot = matchesGlobPatterns canonicalRoot (ecIgnoreLocalRefsTo cExclusions)
@ -540,7 +585,8 @@ verifyReference
-- so it allows indirections and should be suitable for being shown to
-- the user. Also, it will be considered as outside the repository if
-- it is relative and its idirections pass through the repository root.
checkRef mAnchor canonicalRoot referredFile shownFilepath = verifying $
checkRef :: Maybe Text -> CanonicalPath -> CanonicalPath -> FilePath -> ExceptT VerifyError IO ()
checkRef mAnchor canonicalRoot referredFile shownFilepath =
unless (isVirtual canonicalRoot referredFile) do
when (hasIndirectionThroughParent shownFilepath) $
throwError $ LocalFileOutsideRepo shownFilepath
@ -642,10 +688,10 @@ parseUri link = do
& handleJust (fromException @ParseExceptionBs)
(throwError . ExternalResourceUriConversionError)
checkExternalResource :: Config -> Text -> IO (VerifyResult VerifyError)
checkExternalResource :: Config -> Text -> ExceptT VerifyError IO ()
checkExternalResource Config{..} link
| isIgnored = return mempty
| otherwise = fmap toVerifyRes $ runExceptT $ do
| isIgnored = pass
| otherwise = do
uri <- parseUri link
case toString <$> uriScheme uri of
Just "http" -> checkHttp uri
@ -670,7 +716,7 @@ checkExternalResource Config{..} link
checkHttp :: URI -> ExceptT VerifyError IO ()
checkHttp uri = makeHttpRequest uri HEAD 0.3 `catchError` \case
e | isFixable e -> throwError e
e@(ExternalHttpTooManyRequests _ _) -> throwError e
_ -> makeHttpRequest uri GET 0.7
httpConfig :: HttpConfig
@ -700,8 +746,12 @@ checkExternalResource Config{..} link
let maxTime = Time @Second $ unTime ncExternalRefCheckTimeout * timeoutFrac
mres <- liftIO (timeout maxTime $ void reqLink) `catch`
(either throwError (\() -> return (Just ())) . interpretErrors)
maybe (throwError $ ExternalResourceSomeError "Response timeout") pure mres
(either throwError (\() -> return (Just ())) . interpretErrors uri)
maybe (throwError $ ExternalHttpTimeout $ extractHost uri) pure mres
extractHost :: URI -> Maybe DomainName
extractHost =
either (const Nothing) (Just . DomainName . unRText . authHost) . uriAuthority
isTemporaryRedirectCode :: Int -> Bool
isTemporaryRedirectCode = flip elem [302, 303, 307]
@ -719,7 +769,7 @@ checkExternalResource Config{..} link
, (405 ==) -- method mismatch
]
interpretErrors = \case
interpretErrors uri = \case
JsonHttpException _ -> error "External link JSON parse exception"
VanillaHttpException err -> case err of
InvalidUrlException{} -> error "External link URL invalid exception"
@ -733,7 +783,7 @@ checkExternalResource Config{..} link
| isTemporaryRedirectCode code -> Right ()
| isAllowedErrorCode code -> Right ()
| otherwise -> case statusCode (responseStatus resp) of
429 -> Left . ExternalHttpTooManyRequests $ retryAfterInfo resp
429 -> Left $ ExternalHttpTooManyRequests (retryAfterInfo resp) (extractHost uri)
_ -> Left . ExternalHttpResourceUnavailable $ responseStatus resp
where
code = statusCode $ responseStatus resp

View File

@ -17,12 +17,11 @@ import Test.Tasty (TestTree, testGroup)
import Test.Tasty.HUnit (assertFailure, testCase, (@?=))
import Test.Tasty.QuickCheck (ioProperty, testProperty)
import Xrefcheck.Config
(Config, cExclusionsL, cNetworkingL, defConfig, defConfigText, ncIgnoreAuthFailuresL)
import Xrefcheck.Core (Flavor (GitHub), allFlavors)
import Xrefcheck.Scan (ecIgnoreExternalRefsToL)
import Xrefcheck.Verify (VerifyError (..), VerifyResult (..), checkExternalResource)
import Xrefcheck.Verify (VerifyError (..), checkExternalResource)
import Test.Xrefcheck.Util (mockServer)
@ -53,25 +52,23 @@ test_config =
config & cNetworkingL . ncIgnoreAuthFailuresL .~ value
in [ testCase "when True - assume 401 status is valid" $
checkLinkWithServer (setIgnoreAuthFailures True)
"http://127.0.0.1:3000/401" $ VerifyResult []
"http://127.0.0.1:3000/401" $ Right ()
, testCase "when False - assume 401 status is invalid" $
checkLinkWithServer (setIgnoreAuthFailures False)
"http://127.0.0.1:3000/401" $ VerifyResult
[ ExternalHttpResourceUnavailable $
"http://127.0.0.1:3000/401" $
Left $ ExternalHttpResourceUnavailable $
Status { statusCode = 401, statusMessage = "Unauthorized" }
]
, testCase "when True - assume 403 status is valid" $
checkLinkWithServer (setIgnoreAuthFailures True)
"http://127.0.0.1:3000/403" $ VerifyResult []
"http://127.0.0.1:3000/403" $ Right ()
, testCase "when False - assume 403 status is invalid" $
checkLinkWithServer (setIgnoreAuthFailures False)
"http://127.0.0.1:3000/403" $ VerifyResult
[ ExternalHttpResourceUnavailable $
"http://127.0.0.1:3000/403" $
Left $ ExternalHttpResourceUnavailable $
Status { statusCode = 403, statusMessage = "Forbidden" }
]
]
, testGroup "Config parser reject input with unknown fields"
[ testCase "throws error with useful messages" $ do
@ -84,11 +81,8 @@ test_config =
]
]
where
checkLinkWithServer config link expectation =
E.bracket (forkIO mockServer) killThread $ \_ -> do
result <- checkExternalResource config link
result <- runExceptT $ checkExternalResource config link
result @?= expectation

View File

@ -9,6 +9,7 @@ import Universum
import Data.CaseInsensitive qualified as CI
import Data.Map qualified as M
import Data.Set qualified as S
import Network.HTTP.Types (Status, mkStatus)
import Network.HTTP.Types.Header (hLocation)
import Test.Tasty (TestName, TestTree, testGroup)
@ -56,8 +57,10 @@ test_redirectRequests = testGroup "Redirect response tests"
]
redirectAssertion :: Status -> Maybe Text -> Maybe VerifyError -> Assertion
redirectAssertion expectedStatus expectedLocation expectedError =
checkLinkAndProgressWithServer
redirectAssertion expectedStatus expectedLocation expectedError = do
setRef <- newIORef S.empty
checkLinkAndProgressWithServerDefault
setRef
(mockRedirect expectedLocation expectedStatus)
url
(Progress

View File

@ -0,0 +1,146 @@
{- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}
module Test.Xrefcheck.TimeoutSpec where
import Universum
import Data.CaseInsensitive qualified as CI
import Data.Map qualified as M
import Data.Set qualified as S
import Network.HTTP.Types (ok200, tooManyRequests429)
import Network.HTTP.Types.Header (hRetryAfter)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.HUnit (testCase)
import Time (Second, Time, sec, threadDelay)
import Web.Firefly (ToResponse (toResponse), route, run)
import Test.Xrefcheck.UtilRequests
import Xrefcheck.Config
import Xrefcheck.Progress
import Xrefcheck.Verify
-- Here all the delays are doubled because we call sites
-- with HEAD first and then GET methods.
test_timeout :: TestTree
test_timeout = testGroup "Timeout tests"
[ testCase "Succeeds on one timeout if there were no 429 responses and no retries allowed" $
timeoutTestCase [Delay] True
(cNetworkingL . ncMaxTimeoutRetriesL .~ 0)
, testCase "Returns an error on two timeouts if there were no 429 responses" $
timeoutTestCase [Delay, Delay] False
(cNetworkingL . ncMaxTimeoutRetriesL .~ 2)
, testCase "Returns an error if there were 429 but no timeouts allowed" $
timeoutTestCase [Respond429, Delay, Delay] False
(cNetworkingL . ncMaxTimeoutRetriesL .~ 0)
, testCase "Succeeds if there were 429 and one timeout allowed" $
timeoutTestCase [Respond429, Delay, Delay] True
(cNetworkingL . ncMaxTimeoutRetriesL .~ 1)
, testCase "Fails on second timeout if there were 429 and one timeout allowed" $
timeoutTestCase [Respond429, Delay, Delay, Delay, Delay] False
(cNetworkingL . ncMaxTimeoutRetriesL .~ 1)
, testCase "Fails on maximum allowed errors achieved (mixed errors)" $
timeoutTestCase [Respond429, Delay, Delay, Respond429, Delay, Delay] False $ \c -> c
& cNetworkingL . ncMaxTimeoutRetriesL .~ 3
& cNetworkingL . ncMaxRetriesL .~ 3
, testCase "Fails on timeout if another domain returned 429" $ do
setRef <- newIORef S.empty
checkMultipleLinksWithServer
(mockTimeout (sec 0.4) [Respond429, Ok, Delay, Delay])
setRef
[ VerifyLinkTestEntry
{ vlteConfigModifier = \c -> c
& cNetworkingL . ncMaxRetriesL .~ 1
& setAllowedTimeout
, vlteLink = "http://127.0.0.1:5000/timeout"
, vlteExpectedProgress = mkProgressWithOneTask True
, vlteExpectationErrors = VerifyResult []
}
, VerifyLinkTestEntry
{ vlteConfigModifier = \c -> c
& cNetworkingL . ncMaxTimeoutRetriesL .~ 0
& setAllowedTimeout
, vlteLink = "http://localhost:5000/timeout"
, vlteExpectedProgress = mkProgressWithOneTask False
, vlteExpectationErrors = VerifyResult
[ ExternalHttpTimeout (Just $ DomainName "localhost")
]
}
]
, testCase "Succeeds on timeout if another path of this domain returned 429" $ do
setRef <- newIORef S.empty
checkMultipleLinksWithServer
(mockTimeout (sec 0.4) [Respond429, Ok, Delay, Delay])
setRef
[ VerifyLinkTestEntry
{ vlteConfigModifier = \c -> c
& cNetworkingL . ncMaxRetriesL .~ 1
& setAllowedTimeout
, vlteLink = "http://127.0.0.1:5000/timeout"
, vlteExpectedProgress = mkProgressWithOneTask True
, vlteExpectationErrors = VerifyResult []
}
, VerifyLinkTestEntry
{ vlteConfigModifier = \c -> c
& cNetworkingL . ncMaxTimeoutRetriesL .~ 1
& setAllowedTimeout
, vlteLink = "http://127.0.0.1:5000/timeoutother"
, vlteExpectedProgress = mkProgressWithOneTask True
, vlteExpectationErrors = VerifyResult []
}
]
]
where
setAllowedTimeout = cNetworkingL . ncExternalRefCheckTimeoutL .~ (sec 0.25)
mkProgressWithOneTask shouldSucceed =
Progress
{ pTotal = 1
, pCurrent = 1
, pErrorsUnfixable = if shouldSucceed then 0 else 1
, pErrorsFixable = 0
, pTaskTimestamp = Nothing
}
timeoutTestCase mockResponses shouldSucceed configModifier = do
let prog = mkProgressWithOneTask shouldSucceed
setRef <- newIORef S.empty
checkLinkAndProgressWithServer
(\c -> c
& setAllowedTimeout
& configModifier)
setRef
(mockTimeout (sec 0.4) mockResponses)
"http://127.0.0.1:5000/timeout" prog $
VerifyResult $
[ExternalHttpTimeout $ Just (DomainName "127.0.0.1") | not shouldSucceed]
-- When called for the first (N-1) times, waits for specified
-- amount of seconds and returns an arbitrary result.
-- When called N time returns the result immediately.
mockTimeout :: Time Second -> [MockTimeoutBehaviour] -> IO ()
mockTimeout timeout behList = do
ref <- newIORef @_ behList
run 5000 $ do
route "/timeout" $ handler ref
route "/timeoutother" $ handler ref
where
handler ref = do
mbCurrentAction <- atomicModifyIORef' ref $ \case
b : bs -> (bs, Just b)
[] -> ([], Nothing)
let success = toResponse ("" :: Text, ok200, M.empty @(CI.CI Text) @[Text])
case mbCurrentAction of
Nothing -> pure success
Just Ok -> pure success
Just Delay -> do
threadDelay timeout
pure $ toResponse ("" :: Text, ok200, M.empty @(CI.CI Text) @[Text])
Just Respond429 ->
pure $ toResponse
("" :: Text, tooManyRequests429,
M.fromList [(CI.map (decodeUtf8 @Text) hRetryAfter, ["1" :: Text])])
data MockTimeoutBehaviour = Respond429 | Delay | Ok

View File

@ -11,6 +11,7 @@ import Control.Concurrent (forkIO, killThread)
import Control.Exception qualified as E
import Data.CaseInsensitive qualified as CI
import Data.Map qualified as M
import Data.Set qualified as S
import Data.Time (addUTCTime, defaultTimeLocale, formatTime, getCurrentTime, rfc822DateFormat)
import Data.Time.Clock.POSIX (getPOSIXTime)
import Network.HTTP.Types (Status (..), ok200, serviceUnavailable503, tooManyRequests429)
@ -29,22 +30,24 @@ import Xrefcheck.Verify
test_tooManyRequests :: TestTree
test_tooManyRequests = testGroup "429 response tests"
[ testCase "Returns 200 eventually" $ do
setRef <- newIORef S.empty
let prog = Progress{ pTotal = 1
, pCurrent = 1
, pErrorsUnfixable = 0
, pErrorsFixable = 0
, pTaskTimestamp = Nothing
}
checkLinkAndProgressWithServer (mock429 "1" ok200)
checkLinkAndProgressWithServerDefault setRef (mock429 "1" ok200)
"http://127.0.0.1:5000/429" prog $ VerifyResult []
, testCase "Returns 503 eventually" $ do
setRef <- newIORef S.empty
let prog = Progress{ pTotal = 1
, pCurrent = 1
, pErrorsUnfixable = 1
, pErrorsFixable = 0
, pTaskTimestamp = Nothing
}
checkLinkAndProgressWithServer (mock429 "1" serviceUnavailable503)
checkLinkAndProgressWithServerDefault setRef (mock429 "1" serviceUnavailable503)
"http://127.0.0.1:5000/429" prog $ VerifyResult
[ ExternalHttpResourceUnavailable $
Status { statusCode = 503, statusMessage = "Service Unavailable"}
@ -52,6 +55,7 @@ test_tooManyRequests = testGroup "429 response tests"
, testCase "Successfully updates the new retry-after value (as seconds)" $ do
E.bracket (forkIO $ mock429 "2" ok200) killThread $ \_ -> do
now <- getPOSIXTime <&> posixTimeToTimeSecond
setRef <- newIORef S.empty
progressRef <- newIORef VerifyProgress
{ vrLocal = initProgress 0
, vrExternal = Progress
@ -62,8 +66,9 @@ test_tooManyRequests = testGroup "429 response tests"
, pTaskTimestamp = Just (TaskTimestamp (sec 3) (now -:- sec 1.5))
}
}
_ <- verifyReferenceWithProgress
_ <- verifyReferenceWithProgressDefault
(Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) RIExternal)
setRef
progressRef
Progress{..} <- vrExternal <$> readIORef progressRef
let ttc = ttTimeToCompletion <$> pTaskTimestamp
@ -77,6 +82,7 @@ test_tooManyRequests = testGroup "429 response tests"
retryAfter = formatTime defaultTimeLocale rfc822DateFormat (addUTCTime 4 utctime)
now = utcTimeToTimeSecond utctime
E.bracket (forkIO $ mock429 (fromString retryAfter) ok200) killThread $ \_ -> do
setRef <- newIORef S.empty
progressRef <- newIORef VerifyProgress
{ vrLocal = initProgress 0
, vrExternal = Progress
@ -87,8 +93,9 @@ test_tooManyRequests = testGroup "429 response tests"
, pTaskTimestamp = Just (TaskTimestamp (sec 2) (now -:- sec 1.5))
}
}
_ <- verifyReferenceWithProgress
_ <- verifyReferenceWithProgressDefault
(Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) RIExternal)
setRef
progressRef
Progress{..} <- vrExternal <$> readIORef progressRef
let ttc = fromMaybe (sec 0) $ ttTimeToCompletion <$> pTaskTimestamp
@ -103,6 +110,7 @@ test_tooManyRequests = testGroup "429 response tests"
retryAfter = formatTime defaultTimeLocale rfc822DateFormat (addUTCTime (-4) utctime)
now = utcTimeToTimeSecond utctime
E.bracket (forkIO $ mock429 (fromString retryAfter) ok200) killThread $ \_ -> do
setRef <- newIORef S.empty
progressRef <- newIORef VerifyProgress
{ vrLocal = initProgress 0
, vrExternal = Progress
@ -113,8 +121,9 @@ test_tooManyRequests = testGroup "429 response tests"
, pTaskTimestamp = Just (TaskTimestamp (sec 1) (now -:- sec 1.5))
}
}
_ <- verifyReferenceWithProgress
_ <- verifyReferenceWithProgressDefault
(Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) RIExternal)
setRef
progressRef
Progress{..} <- vrExternal <$> readIORef progressRef
let ttc = ttTimeToCompletion <$> pTaskTimestamp
@ -147,8 +156,9 @@ test_tooManyRequests = testGroup "429 response tests"
)
| otherwise -> toResponse ("" :: Text, serviceUnavailable503)
infoReverseAccumulatorRef <- newIORef []
setRef <- newIORef S.empty
E.bracket (forkIO $ mock429WithGlobalIORef infoReverseAccumulatorRef) killThread $ \_ -> do
_ <- verifyLink "http://127.0.0.1:5000/429grandfinale"
_ <- verifyLinkDefault setRef "http://127.0.0.1:5000/429grandfinale"
infoReverseAccumulator <- readIORef infoReverseAccumulatorRef
reverse infoReverseAccumulator @?=
[ ("HEAD", tooManyRequests429)

View File

@ -7,15 +7,22 @@ module Test.Xrefcheck.UtilRequests
( checkLinkAndProgressWithServer
, verifyLink
, verifyReferenceWithProgress
, checkMultipleLinksWithServer
, checkLinkAndProgressWithServerDefault
, verifyLinkDefault
, verifyReferenceWithProgressDefault
, VerifyLinkTestEntry (..)
) where
import Universum
import Control.Concurrent (forkIO, killThread)
import Control.Exception qualified as E
import Data.Map qualified as M
import Data.Set qualified as S
import Test.Tasty.HUnit (assertBool)
import Text.Interpolation.Nyan
import Control.Concurrent (forkIO, killThread)
import Test.Tasty.HUnit (assertBool)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress
@ -24,29 +31,57 @@ import Xrefcheck.System (canonicalizePath)
import Xrefcheck.Util
import Xrefcheck.Verify
checkLinkAndProgressWithServer
checkMultipleLinksWithServer
:: IO ()
-> IORef (S.Set DomainName)
-> [VerifyLinkTestEntry]
-> IO ()
checkMultipleLinksWithServer mock setRef entries =
E.bracket (forkIO mock) killThread $ \_ -> do
forM_ entries $ \VerifyLinkTestEntry {..} ->
checkLinkAndProgress
vlteConfigModifier
setRef
vlteLink
vlteExpectedProgress
vlteExpectationErrors
checkLinkAndProgressWithServer
:: (Config -> Config)
-> IORef (Set DomainName)
-> IO ()
-> Text
-> Progress Int
-> VerifyResult VerifyError
-> IO ()
checkLinkAndProgressWithServer mock link progress vrExpectation =
E.bracket (forkIO mock) killThread $ \_ -> do
(result, progRes) <- verifyLink link
flip assertBool (result == vrExpectation) $
[int||
Verification results differ: expected
#{interpolateIndentF 2 (show vrExpectation)}
but got
#{interpolateIndentF 2 (show result)}
|]
flip assertBool (progRes `progEquiv` progress) $
[int||
Expected the progress bar state to be
#{interpolateIndentF 2 (show progress)}
but got
#{interpolateIndentF 2 (show progRes)}
|]
checkLinkAndProgressWithServer configModifier setRef mock link progress vrExpectation =
E.bracket (forkIO mock) killThread $ \_ -> do
checkLinkAndProgress configModifier setRef link progress vrExpectation
checkLinkAndProgress
:: (Config -> Config)
-> IORef (Set DomainName)
-> Text
-> Progress Int
-> VerifyResult VerifyError
-> IO ()
checkLinkAndProgress configModifier setRef link progress vrExpectation = do
(result, progRes) <- verifyLink configModifier setRef link
flip assertBool (result == vrExpectation)
[int||
Verification results differ: expected
#{interpolateIndentF 2 (show vrExpectation)}
but got
#{interpolateIndentF 2 (show result)}
|]
flip assertBool (progRes `progEquiv` progress)
[int||
Expected the progress bar state to be
#{interpolateIndentF 2 (show progress)}
but got
#{interpolateIndentF 2 (show progRes)}
|]
where
-- Check whether the two @Progress@ values are equal up to similarity of their essential
-- components, ignoring the comparison of @pTaskTimestamp@s, which is done to prevent test
@ -60,18 +95,57 @@ checkLinkAndProgressWithServer mock link progress vrExpectation =
, ((==) `on` pErrorsFixable) p1 p2
]
verifyLink :: Text -> IO (VerifyResult VerifyError, Progress Int)
verifyLink link = do
checkLinkAndProgressWithServerDefault
:: IORef (Set DomainName)
-> IO ()
-> Text
-> Progress Int
-> VerifyResult VerifyError
-> IO ()
checkLinkAndProgressWithServerDefault = checkLinkAndProgressWithServer id
verifyLink
:: (Config -> Config)
-> IORef (S.Set DomainName)
-> Text
-> IO (VerifyResult VerifyError, Progress Int)
verifyLink configModifier setRef link = do
let reference = Reference "" link Nothing (Position Nothing) RIExternal
progRef <- newIORef $ initVerifyProgress [reference]
result <- verifyReferenceWithProgress reference progRef
p <- readIORef progRef
return (result, vrExternal p)
result <- verifyReferenceWithProgress configModifier reference setRef progRef
progress <- readIORef progRef
return (result, vrExternal progress)
verifyReferenceWithProgress :: Reference -> IORef VerifyProgress -> IO (VerifyResult VerifyError)
verifyReferenceWithProgress reference progRef = do
verifyLinkDefault
:: IORef (Set DomainName)
-> Text
-> IO (VerifyResult VerifyError, Progress Int)
verifyLinkDefault = verifyLink id
verifyReferenceWithProgress
:: (Config -> Config)
-> Reference
-> IORef (S.Set DomainName)
-> IORef VerifyProgress
-> IO (VerifyResult VerifyError)
verifyReferenceWithProgress configModifier reference setRef progRef = do
canonicalRoot <- canonicalizePath "."
file <- canonicalizePath ""
fmap wrlItem <$> verifyReference
(defConfig GitHub & cExclusionsL . ecIgnoreExternalRefsToL .~ []) FullMode
progRef (RepoInfo mempty mempty canonicalRoot) file reference
(defConfig GitHub & cExclusionsL . ecIgnoreExternalRefsToL .~ []
& configModifier)
FullMode setRef progRef (RepoInfo M.empty mempty canonicalRoot) file reference
verifyReferenceWithProgressDefault
:: Reference
-> IORef (Set DomainName)
-> IORef VerifyProgress
-> IO (VerifyResult VerifyError)
verifyReferenceWithProgressDefault = verifyReferenceWithProgress id
data VerifyLinkTestEntry = VerifyLinkTestEntry
{ vlteConfigModifier :: Config -> Config
, vlteLink :: Text
, vlteExpectedProgress :: Progress Int
, vlteExpectationErrors :: VerifyResult VerifyError
}

View File

@ -43,8 +43,33 @@ networking:
# How many attempts to retry an external link after getting
# a "429 Too Many Requests" response.
# Timeouts may also be accounted here, see the description
# of `maxTimeoutRetries` field.
# If a site once responded with 429 error code, subsequent
# request timeouts will also be treated as hitting the site's
# rate limiter and result in retry attempts, unless the
# maximum retries number has been reached.
#
# On other errors xrefcheck fails immediately, without retrying.
maxRetries: 3
# Querying a given domain that ever returned 429 before,
# this defines how many timeouts are allowed during retries.
#
# For such domains, timeouts likely mean hitting the rate limiter,
# and so xrefcheck considers timeouts in the same way as 429 errors.
#
# For other domains, a timeout results in a respective error, no retry
# attempts will be performed. Use `externalRefCheckTimeout` option
# to increase the time after which timeout is declared.
#
# This option is similar to `maxRetries`, the difference is that
# this `maxTimeoutRetries` option limits only the number of retries
# caused by timeouts, and `maxRetries` limits the number of retries
# caused both by 429s and timeouts.
maxTimeoutRetries: 1
# Parameters of scanners for various file types.
scanners:
# On 'anchor not found' error, how much similar anchors should be displayed as