From 1964f572806fa4bcbadd2d46d4bdee80a991f57d Mon Sep 17 00:00:00 2001 From: Diogo Castro Date: Sat, 24 Sep 2022 16:50:29 +0100 Subject: [PATCH 1/2] [#162] Do not cancel the progress bar thread Problem: The output of xrefcheck sometimes appears as a confusing mess, see here for details and examples: https://github.com/serokell/xrefcheck/issues/162 The culprit seems to be the `withAsync` used in `verifyRepo`. This spawns a thread that prints and refreshes the progress bar, while the main thread coordinates the verification of references. The problem here is that, as the docs of `withAsync` explain, when the main thread finishes/throws, the spawned thread will be cancelled with `uninterruptibleCancel`. If the thread is in the middle of updating the progress bar (printing return carriages, writing over an existing line, printing control characters to change the text color, etc), it'll be abruptly interrupted and will not be given the chance to finish cleanly. Solution: replace `withAsync` with `loopAsyncUntil`. This lets the printer thread finish what it's currently doing. --- CHANGES.md | 3 ++- src/Xrefcheck/Verify.hs | 42 ++++++++++++++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e51a49c..a6321f7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,9 +8,10 @@ Unreleased ========== * [#145](https://github.com/serokell/xrefcheck/pull/145) + Add check that there is no unknown fields in config. - * [#158](https://github.com/serokell/xrefcheck/pull/158) + Fixed bug when we reported footnotes as broken links +* [#163](https://github.com/serokell/xrefcheck/pull/163) + + Fixed an issue where the progress bar thread might be unexpectedly cancelled and jumble up the output. 0.2.1 ========== diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index 0552887..dcc320a 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -33,8 +33,9 @@ module Xrefcheck.Verify import Universum -import Control.Concurrent.Async (wait, withAsync) +import Control.Concurrent.Async (async, wait, withAsync) import Control.Exception (throwIO) +import Control.Monad.Catch (handleJust) import Control.Monad.Except (MonadError (..)) import Data.ByteString qualified as BS import Data.List qualified as L @@ -43,7 +44,7 @@ import Data.Text.Metrics (damerauLevenshteinNorm) import Data.Time (UTCTime, defaultTimeLocale, formatTime, readPTime, rfc822DateFormat) import Data.Time.Clock.POSIX (getPOSIXTime) import Data.Traversable (for) -import Fmt (Buildable (..), blockListF', listF, maybeF, nameF, (+|), (|+), unlinesF, indentF) +import Fmt (Buildable (..), blockListF', indentF, listF, maybeF, nameF, unlinesF, (+|), (|+)) import GHC.Exts qualified as Exts import GHC.Read (Read (readPrec)) import Network.FTP.Client @@ -57,13 +58,12 @@ import Network.HTTP.Types.Header (hRetryAfter) import Network.HTTP.Types.Status (Status, statusCode, statusMessage) import System.Console.Pretty (Style (..), style) import System.Directory (doesDirectoryExist, doesFileExist) -import System.FilePath (takeDirectory, (), normalise) +import System.FilePath (normalise, takeDirectory, ()) import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift) import Text.Regex.TDFA.Text (Regex, regexec) -import Text.URI (Authority (..), URI (..), mkURIBs, ParseExceptionBs) +import Text.URI (Authority (..), ParseExceptionBs, URI (..), mkURIBs) import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+), (-:-)) import URI.ByteString qualified as URIBS -import Control.Monad.Catch (handleJust) import Data.Bits (toIntegralSized) import Xrefcheck.Config @@ -265,12 +265,13 @@ verifyRepo progressRef <- newIORef $ initVerifyProgress (map snd toScan) - accumulated <- withAsync (printer progressRef) $ \_ -> + accumulated <- loopAsyncUntil (printer progressRef) do forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) -> verifyReference config mode progressRef repoInfo' root file ref return $ fold accumulated where - printer progressRef = forever $ do + printer :: IORef VerifyProgress -> IO () + printer progressRef = do posixTime <- getPOSIXTime <&> posixTimeToTimeSecond progress <- atomicModifyIORef' progressRef $ \VerifyProgress{..} -> let prog = VerifyProgress{ vrExternal = @@ -279,8 +280,10 @@ verifyRepo } in (prog, prog) reprintAnalyseProgress rw mode posixTime progress + -- Slight pause so we're not refreshing the progress bar more often than needed. threadDelay (ms 100) + ifExternalThenCache :: (a, Reference) -> NeedsCaching Text ifExternalThenCache (_, Reference{..}) = case locationType rLink of ExternalLoc -> CacheUnderKey rLink _ -> NoCaching @@ -621,3 +624,28 @@ checkExternalResource VerifyConfig{..} link pure () where handler = if secure then withFTPS else withFTP + +---------------------------------------------------------------------------- +-- Helpers +---------------------------------------------------------------------------- + +-- | @loopAsyncUntil ma mb@ will continually run @ma@ until @mb@ throws an exception or returns. +-- Once it does, it'll wait for @ma@ to finish running one last time and then return. +-- +-- See #163 to read more on why it's important to let @ma@ finish cleanly. +-- * https://github.com/serokell/xrefcheck/issues/162 +-- * https://github.com/serokell/xrefcheck/pull/163 +loopAsyncUntil :: forall a b. IO a -> IO b -> IO b +loopAsyncUntil loopingAction action = + mask $ \restore -> do + shouldLoop <- newIORef True + loopingActionAsync <- async $ restore $ loopingAction' shouldLoop + restore action `finally` do + writeIORef shouldLoop False + wait loopingActionAsync + where + loopingAction' :: IORef Bool -> IO () + loopingAction' shouldLoop = do + whenM (readIORef shouldLoop) do + void loopingAction + loopingAction' shouldLoop From 25d73c3d09f3e8634e07b3af2c366d6763f5b40b Mon Sep 17 00:00:00 2001 From: Diogo Castro Date: Sat, 24 Sep 2022 17:47:53 +0100 Subject: [PATCH 2/2] [#162] Remove artificial delay in `allowRewrite` Problem: `allowRewrite` uses `threadDelay (ms 100)` to "prevent our output to interleave with further outputs". It's not exactly what problem this is supposed to address, and how it's supposed to fix it. This seems like it may have been an earlier attempt to fix the issue addressed in the previous commit. Solution: delete `threadDelay`. --- src/Xrefcheck/Progress.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Xrefcheck/Progress.hs b/src/Xrefcheck/Progress.hs index 4363552..0a7fe11 100644 --- a/src/Xrefcheck/Progress.hs +++ b/src/Xrefcheck/Progress.hs @@ -31,7 +31,7 @@ import Universum import Data.Ratio ((%)) import System.Console.Pretty (Color (..), Style (..), color, style) -import Time (Second, Time, ms, sec, threadDelay, unTime, (-:-)) +import Time (Second, Time, sec, unTime, (-:-)) ----------------------------------------------------------- -- Task timestamp @@ -225,8 +225,6 @@ allowRewrite enabled = bracket prepare erase erase (Rewrite RewriteCtx{..}) = liftIO $ do maxPrintedSize <- readIORef rMaxPrintedSize hPutStr stderr $ '\r' : replicate maxPrintedSize ' ' ++ "\r" - -- prevent our output to interleave with further outputs - threadDelay (ms 100) erase RewriteDisabled = pass -- | Return caret and print the given text.