diff --git a/.reuse/dep5 b/.reuse/dep5 index 91590fc..bafb377 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -3,3 +3,7 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Files: .github/pull_request_template.md .github/ISSUE_TEMPLATE/*.md Copyright: 2018-2019 Serokell License: Unlicense + +Files: links-tests/ftp_root/**/* +Copyright: 2021 Serokell +License: Unlicense diff --git a/CHANGES.md b/CHANGES.md index 8427cee..b387785 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,5 @@ @@ -11,6 +11,8 @@ Unreleased `check-localhost` CLA argument (by default localhost links will not be checked). + Make possible to ignore auth failures (assume 'protected' links valid), use `ignoreAuthFailures` parameter of config. +* [#66](https://github.com/serokell/xrefcheck/pull/66) + + Added support for ftp links. 0.2.1 ========== diff --git a/README.md b/README.md index b606faa..65c0b66 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Both relative and absolute local links are supported out of the box. This tool requires some configuring before it can be applied to a repository or added to CI. * [awesome_bot](https://github.com/dkhamsing/awesome_bot) - a solution written in Ruby that can be easily included in CI or integrated into GitHub. Its features include duplicated URLs detection, specifying allowed HTTP error codes and reporting generation. - At the moment of writting, it scans only external references and checking anchors is not possible. + At the moment of writing, it scans only external references and checking anchors is not possible. * [remark-validate-links](https://github.com/remarkjs/remark-validate-links) and [remark-lint-no-dead-urls](https://github.com/davidtheclark/remark-lint-no-dead-urls) - highly configurable Javascript solution for checking local and remote links resp. It is able to check multiple repositores at once if they are gathered in one folder. Being written on JavaScript, it is fairly slow on large repositories. @@ -45,6 +45,7 @@ Both relative and absolute local links are supported out of the box. * [url-checker](https://github.com/paramt/url-checker) - GitHub action which checks links in specified files. * [linkcheck](https://github.com/filiph/linkcheck) - advanced site crawler, checks for `HTML` files. There are other solutions for this particular task which we don't mention here. +At the moment of writing, the listed solutions don't support ftp/ftps links. ## Usage [↑](#xrefcheck) diff --git a/links-tests/Main.hs b/links-tests/Main.hs new file mode 100644 index 0000000..a8f9668 --- /dev/null +++ b/links-tests/Main.hs @@ -0,0 +1,19 @@ +-- SPDX-FileCopyrightText: 2021 Serokell +-- +-- SPDX-License-Identifier: MPL-2.0 + +module Main + ( main + ) where + +import Test.Tasty (defaultIngredients, defaultMainWithIngredients, includingOptions) +import Test.Tasty.Ingredients (Ingredient) + +import Test.Xrefcheck.FtpLinks (ftpOptions) +import Tree (tests) + +main :: IO () +main = tests >>= defaultMainWithIngredients ingredients + +ingredients :: [Ingredient] +ingredients = includingOptions ftpOptions : defaultIngredients diff --git a/links-tests/Test/Xrefcheck/FtpLinks.hs b/links-tests/Test/Xrefcheck/FtpLinks.hs new file mode 100644 index 0000000..8b676a6 --- /dev/null +++ b/links-tests/Test/Xrefcheck/FtpLinks.hs @@ -0,0 +1,83 @@ +-- SPDX-FileCopyrightText: 2021 Serokell +-- +-- SPDX-License-Identifier: MPL-2.0 + +module Test.Xrefcheck.FtpLinks + ( FtpHostOpt(..) + , ftpOptions + , test_FtpLinks + ) where + +import Data.Tagged (Tagged, untag) +import Options.Applicative (help, long, strOption) +import Test.Tasty (TestTree, askOption, testGroup) +import Test.Tasty.HUnit (assertBool, assertFailure, testCase, (@?=)) +import Test.Tasty.Options as Tasty (IsOption (..), OptionDescription (Option), safeRead) + +import Xrefcheck.Config (Config (cVerification), VerifyConfig (vcCheckLocalhost), defConfig) +import Xrefcheck.Core (Flavor (GitHub)) +import Xrefcheck.Verify + (VerifyError (..), VerifyResult (VerifyResult), checkExternalResource, verifyErrors) + +-- | A list with all the options needed to configure FTP links tests. +ftpOptions :: [OptionDescription] +ftpOptions = + [ Tasty.Option (Proxy @FtpHostOpt) + ] + +-- | Option specifying FTP host. +newtype FtpHostOpt = FtpHostOpt Text + deriving (Show, Eq) + +instance IsOption FtpHostOpt where + defaultValue = FtpHostOpt "ftp://localhost" + optionName = "ftp-host" + optionHelp = "[Test.Xrefcheck.FtpLinks] FTP host without trailing slash" + parseValue v = FtpHostOpt <$> safeRead v + optionCLParser = FtpHostOpt <$> strOption + ( long (untag (optionName :: Tagged FtpHostOpt String)) + <> help (untag (optionHelp :: Tagged FtpHostOpt String)) + ) + + +config :: VerifyConfig +config = (cVerification $ defConfig GitHub) { vcCheckLocalhost = True } + +test_FtpLinks :: TestTree +test_FtpLinks = askOption $ \(FtpHostOpt host) -> do + testGroup "Ftp links handler" + [ testCase "handles correct link to file" $ do + let link = host <> "/pub/file_exists.txt" + result <- checkExternalResource config link + result @?= VerifyResult [] + + , testCase "handles empty link (host only)" $ do + let link = host + result <- checkExternalResource config link + result @?= VerifyResult [] + + , testCase "handles correct link to non empty directory" $ do + let link = host <> "/pub/" + result <- checkExternalResource config link + result @?= VerifyResult [] + + , testCase "handles correct link to empty directory" $ do + let link = host <> "/empty/" + result <- checkExternalResource config link + result @?= VerifyResult [] + + , testCase "throws exception when file not found" $ do + let link = host <> "/pub/file_does_not_exists.txt" + result <- checkExternalResource config link + case verifyErrors result of + Nothing -> + assertFailure "No exception was raised, FtpEntryDoesNotExist expected" + Just errors -> + assertBool "Expected FtpEntryDoesNotExist, got other exceptions" + (any ( + \case + FtpEntryDoesNotExist _ -> True + ExternalFtpException _ -> True + _ -> False + ) $ toList errors) + ] diff --git a/links-tests/Tree.hs b/links-tests/Tree.hs new file mode 100644 index 0000000..d15eb9f --- /dev/null +++ b/links-tests/Tree.hs @@ -0,0 +1,5 @@ +-- SPDX-FileCopyrightText: 2021 Serokell +-- +-- SPDX-License-Identifier: MPL-2.0 + +{-# OPTIONS_GHC -F -pgmF tasty-discover -optF --tree-display -optF --generated-module -optF Tree #-} diff --git a/links-tests/ftp_root/empty/.gitkeep b/links-tests/ftp_root/empty/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/links-tests/ftp_root/pub/file_exists.txt b/links-tests/ftp_root/pub/file_exists.txt new file mode 100644 index 0000000..809c1fb --- /dev/null +++ b/links-tests/ftp_root/pub/file_exists.txt @@ -0,0 +1 @@ +File exists! diff --git a/package.yaml b/package.yaml index 7e120fe..fc27bb3 100644 --- a/package.yaml +++ b/package.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: 2018-2020 Serokell +# SPDX-FileCopyrightText: 2018-2021 Serokell # # SPDX-License-Identifier: MPL-2.0 @@ -12,13 +12,12 @@ license-file: LICENSE author: Kostya Ivanov, Serokell maintainer: Serokell copyright: 2018-2019 Serokell +description: Please see the README on GitHub at extra-source-files: -- README.md -- CHANGES.md -- src-files/* - -description: Please see the README on GitHub at + - README.md + - CHANGES.md + - src-files/* default-extensions: - AllowAmbiguousTypes @@ -71,6 +70,7 @@ dependencies: - filepath - file-embed - fmt + - ftp-client - Glob - http-client - http-types @@ -100,32 +100,45 @@ library: source-dirs: src generated-other-modules: - - Paths_xrefcheck + - Paths_xrefcheck executables: xrefcheck: - main: Main.hs - source-dirs: exec + main: Main.hs + source-dirs: exec generated-other-modules: - - Paths_xrefcheck + - Paths_xrefcheck ghc-options: - - -threaded - - -rtsopts - - -with-rtsopts=-N - - -O2 + - -threaded + - -rtsopts + - -with-rtsopts=-N + - -O2 dependencies: - - xrefcheck + - xrefcheck tests: xrefcheck-tests: - main: Main.hs - source-dirs: tests + main: Main.hs + source-dirs: tests generated-other-modules: - - Paths_xrefcheck + - Paths_xrefcheck dependencies: - - firefly - - hspec - - QuickCheck - - xrefcheck + - firefly + - hspec + - QuickCheck + - xrefcheck build-tools: - - hspec-discover + - hspec-discover + + links-tests: + main: Main.hs + source-dirs: links-tests + build-tools: tasty-discover:tasty-discover + generated-other-modules: + - Paths_xrefcheck + dependencies: + - optparse-applicative + - tagged + - tasty + - tasty-hunit + - xrefcheck diff --git a/src/Xrefcheck/Orphans.hs b/src/Xrefcheck/Orphans.hs new file mode 100644 index 0000000..db9385b --- /dev/null +++ b/src/Xrefcheck/Orphans.hs @@ -0,0 +1,45 @@ +{- SPDX-FileCopyrightText: 2021 Serokell + - + - SPDX-License-Identifier: MPL-2.0 + -} + +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | Orphan instances for types from other packages + +module Xrefcheck.Orphans () where + +import qualified Data.ByteString.Char8 as C + +import Fmt (Buildable (..), unlinesF, (+|), (|+)) +import Network.FTP.Client + (FTPException (..), FTPMessage (..), FTPResponse (..), ResponseStatus (..)) +import Text.URI (RText, unRText) + +instance ToString (RText t) where + toString = toString . unRText + +instance Buildable ResponseStatus where + build = show + +instance Buildable FTPMessage where + build message = build $ decodeUtf8 @Text ( + case message of + SingleLine s -> s + MultiLine ss -> C.intercalate "\n" ss + ) + +instance Buildable FTPResponse where + build FTPResponse{..} = unlinesF + [ frStatus |+ " (" +| frCode |+ "):" + , build frMessage + ] + +instance Buildable FTPException where + build (BadProtocolResponseException _) = "Raw FTP exception" + build (FailureRetryException e) = build e + build (FailureException e) = build e + build (UnsuccessfulException e) = build e + build (BogusResponseFormatException e) = build e + +deriving instance Eq FTPException diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index 306cf1c..2bcb462 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -22,28 +22,35 @@ module Xrefcheck.Verify , checkExternalResource ) where -import Control.Concurrent.Async (forConcurrently, withAsync) -import Control.Monad.Except (MonadError (..)) +import qualified Data.ByteString as BS import qualified Data.Map as M import qualified Data.Text as T +import qualified GHC.Exts as Exts +import qualified System.FilePath.Glob as Glob + +import Control.Concurrent.Async (forConcurrently, withAsync) +import Control.Exception (throwIO) +import Control.Monad.Except (MonadError (..)) import Data.Text.Metrics (damerauLevenshteinNorm) import Fmt (Buildable (..), blockListF', listF, (+|), (|+)) -import qualified GHC.Exts as Exts +import Network.FTP.Client + (FTPException (..), FTPResponse (..), ResponseStatus (..), login, nlst, size, withFTP, withFTPS) import Network.HTTP.Client (HttpException (..), HttpExceptionContent (..), responseStatus) import Network.HTTP.Req - (GET (..), HEAD (..), HttpException (..), NoReqBody (..), defaultHttpConfig, ignoreResponse, req, - runReq, useURI) + (AllowsBody, CanHaveBody (NoBody), GET (..), HEAD (..), HttpBodyAllowed, HttpException (..), + HttpMethod, NoReqBody (..), defaultHttpConfig, ignoreResponse, req, runReq, useURI) import Network.HTTP.Types.Status (Status, statusCode, statusMessage) import System.Console.Pretty (Style (..), style) import System.Directory (canonicalizePath, doesDirectoryExist, doesFileExist) import System.FilePath (takeDirectory, ()) -import qualified System.FilePath.Glob as Glob import Text.Regex.TDFA.Text (Regex, regexec) -import Text.URI (mkURI) +import Text.URI (Authority (..), URI (..), mkURI) import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout) +import Data.Bits (toIntegralSized) import Xrefcheck.Config import Xrefcheck.Core +import Xrefcheck.Orphans () import Xrefcheck.Progress import Xrefcheck.System @@ -94,18 +101,22 @@ instance Buildable a => Buildable (WithReferenceLoc a) where +| wrlItem |+ "\n\n" data VerifyError - = FileDoesNotExist FilePath + = LocalFileDoesNotExist FilePath | AnchorDoesNotExist Text [Anchor] | AmbiguousAnchorRef FilePath Text (NonEmpty Anchor) | ExternalResourceInvalidUri + | ExternalResourceInvalidUrl (Maybe Text) | ExternalResourceUnknownProtocol - | ExternalResourceUnavailable Status + | ExternalHttpResourceUnavailable Status + | ExternalFtpResourceUnavailable FTPResponse + | ExternalFtpException FTPException + | FtpEntryDoesNotExist FilePath | ExternalResourceSomeError Text deriving (Show, Eq) instance Buildable VerifyError where build = \case - FileDoesNotExist file -> + LocalFileDoesNotExist file -> "⛀ File does not exist:\n " +| file |+ "\n" AnchorDoesNotExist anchor similar -> @@ -121,15 +132,30 @@ instance Buildable VerifyError where \ can change silently whereas the document containing it evolves.\n" ExternalResourceInvalidUri -> - "⛂ Invalid url\n" + "⛂ Invalid URI\n" + + ExternalResourceInvalidUrl Nothing -> + "⛂ Invalid URL\n" + + ExternalResourceInvalidUrl (Just message) -> + "⛂ Invalid URL (" +| message |+ ")\n" ExternalResourceUnknownProtocol -> - "⛂ Bad url (expected 'http' or 'https')\n" + "⛂ Bad url (expected 'http','https', 'ftp' or 'ftps')\n" - ExternalResourceUnavailable status -> + ExternalHttpResourceUnavailable status -> "⛂ Resource unavailable (" +| statusCode status |+ " " +| decodeUtf8 @Text (statusMessage status) |+ ")\n" + ExternalFtpResourceUnavailable response -> + "⛂ Resource unavailable:\n" +| response |+ "\n" + + ExternalFtpException err -> + "⛂ FTP exception (" +| err |+ ")\n" + + FtpEntryDoesNotExist entry -> + "⛂ File or directory does not exist:\n" +| entry |+ "\n" + ExternalResourceSomeError err -> "⛂ " +| build err |+ "\n\n" where @@ -236,7 +262,7 @@ verifyReference ] unless (fileExists || dirExists || isVirtual) $ - throwError (FileDoesNotExist file) + throwError (LocalFileDoesNotExist file) checkAnchor file fileAnchors anchor = do checkAnchorReferenceAmbiguity file fileAnchors anchor @@ -264,18 +290,22 @@ verifyReference Nothing -> let isSimilar = (>= vcAnchorSimilarityThreshold) similarAnchors = - filter - (isSimilar . realToFrac . damerauLevenshteinNorm anchor . aName) + filter (isSimilar . realToFrac . damerauLevenshteinNorm anchor . aName) givenAnchors in throwError $ AnchorDoesNotExist anchor similarAnchors checkExternalResource :: VerifyConfig -> Text -> IO (VerifyResult VerifyError) checkExternalResource VerifyConfig{..} link | skipCheck = return mempty - | otherwise = fmap toVerifyRes $ do - makeRequest HEAD 0.3 >>= \case - Right () -> return $ Right () - Left _ -> makeRequest GET 0.7 + | otherwise = fmap toVerifyRes $ runExceptT $ do + uri <- mkURI link & maybe (throwError ExternalResourceInvalidUri) pure + + case toString <$> uriScheme uri of + Just "http" -> checkHttp uri + Just "https" -> checkHttp uri + Just "ftp" -> checkFtp uri False + Just "ftps" -> checkFtp uri True + _ -> throwError ExternalResourceUnknownProtocol where skipCheck = isIgnored || (not vcCheckLocalhost && isLocalLink) where @@ -291,12 +321,23 @@ checkExternalResource VerifyConfig{..} link Nothing -> False Left _ -> False - makeRequest :: _ => method -> RatioNat -> IO (Either VerifyError ()) - makeRequest method timeoutFrac = runExceptT $ do - uri <- mkURI link - & maybe (throwError ExternalResourceInvalidUri) pure - parsedUrl <- useURI uri - & maybe (throwError ExternalResourceUnknownProtocol) pure + checkHttp :: URI -> ExceptT VerifyError IO () + checkHttp uri = makeHttpRequest uri HEAD 0.3 `catchError` \_ -> + makeHttpRequest uri GET 0.7 + + makeHttpRequest + :: (HttpMethod method, HttpBodyAllowed (AllowsBody method) 'NoBody) + => URI + -> method + -> RatioNat + -> ExceptT VerifyError IO () + makeHttpRequest uri method timeoutFrac = do + parsedUrl <- case useURI uri of + -- accordingly to source code - Nothing can be only in case when + -- protocol is not http or https, but we've checked it already + -- so just in case we throw exception here + Nothing -> throwError $ ExternalResourceInvalidUrl Nothing + Just u -> pure u let reqLink = case parsedUrl of Left (url, option) -> runReq defaultHttpConfig $ @@ -305,8 +346,7 @@ checkExternalResource VerifyConfig{..} link runReq defaultHttpConfig $ req method url NoReqBody ignoreResponse option - let maxTime = Time @Second $ - unTime vcExternalRefCheckTimeout * timeoutFrac + let maxTime = Time @Second $ unTime vcExternalRefCheckTimeout * timeoutFrac mres <- liftIO (timeout maxTime $ void reqLink) `catch` (either throwError (\() -> return (Just ())) . interpretErrors) @@ -328,5 +368,64 @@ checkExternalResource VerifyConfig{..} link HttpExceptionRequest _ exc -> case exc of StatusCodeException resp _ | isAllowedErrorCode (statusCode $ responseStatus resp) -> Right () - | otherwise -> Left $ ExternalResourceUnavailable (responseStatus resp) + | otherwise -> Left $ ExternalHttpResourceUnavailable (responseStatus resp) other -> Left . ExternalResourceSomeError $ show other + + checkFtp :: URI -> Bool -> ExceptT VerifyError IO () + checkFtp uri secure = do + -- get authority which stores host and port + authority <- case uriAuthority uri of + Right a -> pure a + Left _ -> throwError $ + ExternalResourceInvalidUrl (Just "FTP path must be absolute") + let host = toString $ authHost authority + port :: Int <- case toIntegralSized . fromMaybe 21 $ authPort authority of + Just p -> pure p + Nothing -> throwError $ + ExternalResourceInvalidUrl (Just "Bad port") + -- build path from pieces + path <- case uriPath uri of + Nothing -> pure "" + Just (_, pieces) -> pure + . mconcat + . intersperse "/" + . map toString + . toList + $ pieces + makeFtpRequest host port path secure `catch` \e -> + throwError $ ExternalFtpException e + + makeFtpRequest + :: String + -> Int + -> FilePath + -> Bool + -> ExceptT VerifyError IO () + makeFtpRequest host port path secure = handler host port $ + \handle response -> do + -- check connection status + when (frStatus response /= Success) $ + throwError $ ExternalFtpResourceUnavailable response + -- anonymous login + loginResp <- login handle "anonymous" "" + -- check login status + when (frStatus loginResp /= Success) $ + if vcIgnoreAuthFailures + then pure () + else throwError $ ExternalFtpException $ UnsuccessfulException loginResp + -- If the response is non-null, the path is definitely a directory; + -- If the response is null, the path may be a file or may not exist. + dirList <- nlst handle [ "-a", path ] + when (BS.null dirList) $ do + -- The server-PI will respond to the SIZE command with a 213 reply + -- giving the transfer size of the file whose pathname was supplied, + -- or an error response if the file does not exist, the size is + -- unavailable, or some other error has occurred. + _ <- size handle path `catch` \case + UnsuccessfulException _ -> throwError $ FtpEntryDoesNotExist path + FailureException FTPResponse{..} | frCode == 550 -> + throwError $ FtpEntryDoesNotExist path + err -> liftIO $ throwIO err + pure () + where + handler = if secure then withFTPS else withFTP diff --git a/tests/Test/Xrefcheck/ConfigSpec.hs b/tests/Test/Xrefcheck/ConfigSpec.hs index 7f6b7c8..0657d74 100644 --- a/tests/Test/Xrefcheck/ConfigSpec.hs +++ b/tests/Test/Xrefcheck/ConfigSpec.hs @@ -52,7 +52,7 @@ spec = do it "when False - assume 401 status is invalid" $ checkLinkWithServer (config { vcIgnoreAuthFailures = False }) "http://127.0.0.1:3000/401" $ VerifyResult - [ ExternalResourceUnavailable $ + [ ExternalHttpResourceUnavailable $ Status { statusCode = 401, statusMessage = "Unauthorized" } ] @@ -63,7 +63,7 @@ spec = do it "when False - assume 403 status is invalid" $ checkLinkWithServer (config { vcIgnoreAuthFailures = False }) "http://127.0.0.1:3000/403" $ VerifyResult - [ ExternalResourceUnavailable $ + [ ExternalHttpResourceUnavailable $ Status { statusCode = 403, statusMessage = "Forbidden" } ] where diff --git a/xrefcheck.nix b/xrefcheck.nix index 5dd82b6..7c1fa99 100644 --- a/xrefcheck.nix +++ b/xrefcheck.nix @@ -12,6 +12,31 @@ let modules = [{ packages.xrefcheck = { ghcOptions = [ "-Werror" ]; + components.tests = { + links-tests = { + build-tools = [ pkgs.vsftpd ]; + preCheck = '' + echo "Starting vsftpd..." + touch /tmp/vsftpd.log + vsftpd \ + -orun_as_launching_user=yes \ + -olisten_port=2221 \ + -olisten=yes \ + -oftp_username=$(whoami) \ + -oanon_root=${./links-tests/ftp_root} \ + -opasv_min_port=2222 \ + -ohide_file='{.*}' \ + -odeny_file='{.*}' \ + -oseccomp_sandbox=no \ + -olog_ftp_protocol=yes \ + -oxferlog_enable=yes \ + -ovsftpd_log_file=/tmp/vsftpd.log & + sleep 1 + tail -f /tmp/vsftpd.log & + ''; + testFlags = [ "--ftp-host" "ftp://localhost:2221" ]; + }; + }; }; }]; };