[#47] Handle ftp links

Problem:
Currently we support only http and https links. If there is an `ftp://`
link, you will get exception.

Solution:
Use `ftp-client` to check connection to ftp, see response statuses and
check file existence. This produces adding new error types and small
refactoring.
Provide a test which is separate executable, where we have to pass CLA -
ftp host.

Co-authored-by: Alexander Bantyev <alexander.bantyev@serokell.io>
This commit is contained in:
Andrey Demidenko 2021-07-08 16:22:44 +03:00
parent c20bb32d79
commit c67ee9bd52
No known key found for this signature in database
GPG Key ID: 5E5E68635C25008D
13 changed files with 353 additions and 56 deletions

View File

@ -3,3 +3,7 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Files: .github/pull_request_template.md .github/ISSUE_TEMPLATE/*.md
Copyright: 2018-2019 Serokell <https://serokell.io>
License: Unlicense
Files: links-tests/ftp_root/**/*
Copyright: 2021 Serokell <https://serokell.io>
License: Unlicense

View File

@ -1,5 +1,5 @@
<!--
- SPDX-FileCopyrightText: 2020 Serokell <https://serokell.io>
- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-->
@ -11,6 +11,8 @@ Unreleased
`check-localhost` CLA argument (by default localhost links will not be checked).
+ Make possible to ignore auth failures (assume 'protected' links
valid), use `ignoreAuthFailures` parameter of config.
* [#66](https://github.com/serokell/xrefcheck/pull/66)
+ Added support for ftp links.
0.2.1
==========

View File

@ -36,7 +36,7 @@ Both relative and absolute local links are supported out of the box.
This tool requires some configuring before it can be applied to a repository or added to CI.
* [awesome_bot](https://github.com/dkhamsing/awesome_bot) - a solution written in Ruby that can be easily included in CI or integrated into GitHub.
Its features include duplicated URLs detection, specifying allowed HTTP error codes and reporting generation.
At the moment of writting, it scans only external references and checking anchors is not possible.
At the moment of writing, it scans only external references and checking anchors is not possible.
* [remark-validate-links](https://github.com/remarkjs/remark-validate-links) and [remark-lint-no-dead-urls](https://github.com/davidtheclark/remark-lint-no-dead-urls) - highly configurable Javascript solution for checking local and remote links resp.
It is able to check multiple repositores at once if they are gathered in one folder.
Being written on JavaScript, it is fairly slow on large repositories.
@ -45,6 +45,7 @@ Both relative and absolute local links are supported out of the box.
* [url-checker](https://github.com/paramt/url-checker) - GitHub action which checks links in specified files.
* [linkcheck](https://github.com/filiph/linkcheck) - advanced site crawler, checks for `HTML` files. There are other solutions for this particular task which we don't mention here.
At the moment of writing, the listed solutions don't support ftp/ftps links.
## Usage [](#xrefcheck)

19
links-tests/Main.hs Normal file
View File

@ -0,0 +1,19 @@
-- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
--
-- SPDX-License-Identifier: MPL-2.0
module Main
( main
) where
import Test.Tasty (defaultIngredients, defaultMainWithIngredients, includingOptions)
import Test.Tasty.Ingredients (Ingredient)
import Test.Xrefcheck.FtpLinks (ftpOptions)
import Tree (tests)
main :: IO ()
main = tests >>= defaultMainWithIngredients ingredients
ingredients :: [Ingredient]
ingredients = includingOptions ftpOptions : defaultIngredients

View File

@ -0,0 +1,83 @@
-- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
--
-- SPDX-License-Identifier: MPL-2.0
module Test.Xrefcheck.FtpLinks
( FtpHostOpt(..)
, ftpOptions
, test_FtpLinks
) where
import Data.Tagged (Tagged, untag)
import Options.Applicative (help, long, strOption)
import Test.Tasty (TestTree, askOption, testGroup)
import Test.Tasty.HUnit (assertBool, assertFailure, testCase, (@?=))
import Test.Tasty.Options as Tasty (IsOption (..), OptionDescription (Option), safeRead)
import Xrefcheck.Config (Config (cVerification), VerifyConfig (vcCheckLocalhost), defConfig)
import Xrefcheck.Core (Flavor (GitHub))
import Xrefcheck.Verify
(VerifyError (..), VerifyResult (VerifyResult), checkExternalResource, verifyErrors)
-- | A list with all the options needed to configure FTP links tests.
ftpOptions :: [OptionDescription]
ftpOptions =
[ Tasty.Option (Proxy @FtpHostOpt)
]
-- | Option specifying FTP host.
newtype FtpHostOpt = FtpHostOpt Text
deriving (Show, Eq)
instance IsOption FtpHostOpt where
defaultValue = FtpHostOpt "ftp://localhost"
optionName = "ftp-host"
optionHelp = "[Test.Xrefcheck.FtpLinks] FTP host without trailing slash"
parseValue v = FtpHostOpt <$> safeRead v
optionCLParser = FtpHostOpt <$> strOption
( long (untag (optionName :: Tagged FtpHostOpt String))
<> help (untag (optionHelp :: Tagged FtpHostOpt String))
)
config :: VerifyConfig
config = (cVerification $ defConfig GitHub) { vcCheckLocalhost = True }
test_FtpLinks :: TestTree
test_FtpLinks = askOption $ \(FtpHostOpt host) -> do
testGroup "Ftp links handler"
[ testCase "handles correct link to file" $ do
let link = host <> "/pub/file_exists.txt"
result <- checkExternalResource config link
result @?= VerifyResult []
, testCase "handles empty link (host only)" $ do
let link = host
result <- checkExternalResource config link
result @?= VerifyResult []
, testCase "handles correct link to non empty directory" $ do
let link = host <> "/pub/"
result <- checkExternalResource config link
result @?= VerifyResult []
, testCase "handles correct link to empty directory" $ do
let link = host <> "/empty/"
result <- checkExternalResource config link
result @?= VerifyResult []
, testCase "throws exception when file not found" $ do
let link = host <> "/pub/file_does_not_exists.txt"
result <- checkExternalResource config link
case verifyErrors result of
Nothing ->
assertFailure "No exception was raised, FtpEntryDoesNotExist expected"
Just errors ->
assertBool "Expected FtpEntryDoesNotExist, got other exceptions"
(any (
\case
FtpEntryDoesNotExist _ -> True
ExternalFtpException _ -> True
_ -> False
) $ toList errors)
]

5
links-tests/Tree.hs Normal file
View File

@ -0,0 +1,5 @@
-- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
--
-- SPDX-License-Identifier: MPL-2.0
{-# OPTIONS_GHC -F -pgmF tasty-discover -optF --tree-display -optF --generated-module -optF Tree #-}

View File

View File

@ -0,0 +1 @@
File exists!

View File

@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2018-2020 Serokell <https://serokell.io>
# SPDX-FileCopyrightText: 2018-2021 Serokell <https://serokell.io>
#
# SPDX-License-Identifier: MPL-2.0
@ -12,13 +12,12 @@ license-file: LICENSE
author: Kostya Ivanov, Serokell
maintainer: Serokell <hi@serokell.io>
copyright: 2018-2019 Serokell <https://serokell.io>
description: Please see the README on GitHub at <https://github.com/serokell/xrefcheck#readme>
extra-source-files:
- README.md
- CHANGES.md
- src-files/*
description: Please see the README on GitHub at <https://github.com/serokell/xrefcheck#readme>
- README.md
- CHANGES.md
- src-files/*
default-extensions:
- AllowAmbiguousTypes
@ -71,6 +70,7 @@ dependencies:
- filepath
- file-embed
- fmt
- ftp-client
- Glob
- http-client
- http-types
@ -100,32 +100,45 @@ library:
source-dirs: src
generated-other-modules:
- Paths_xrefcheck
- Paths_xrefcheck
executables:
xrefcheck:
main: Main.hs
source-dirs: exec
main: Main.hs
source-dirs: exec
generated-other-modules:
- Paths_xrefcheck
- Paths_xrefcheck
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
dependencies:
- xrefcheck
- xrefcheck
tests:
xrefcheck-tests:
main: Main.hs
source-dirs: tests
main: Main.hs
source-dirs: tests
generated-other-modules:
- Paths_xrefcheck
- Paths_xrefcheck
dependencies:
- firefly
- hspec
- QuickCheck
- xrefcheck
- firefly
- hspec
- QuickCheck
- xrefcheck
build-tools:
- hspec-discover
- hspec-discover
links-tests:
main: Main.hs
source-dirs: links-tests
build-tools: tasty-discover:tasty-discover
generated-other-modules:
- Paths_xrefcheck
dependencies:
- optparse-applicative
- tagged
- tasty
- tasty-hunit
- xrefcheck

45
src/Xrefcheck/Orphans.hs Normal file
View File

@ -0,0 +1,45 @@
{- SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-- | Orphan instances for types from other packages
module Xrefcheck.Orphans () where
import qualified Data.ByteString.Char8 as C
import Fmt (Buildable (..), unlinesF, (+|), (|+))
import Network.FTP.Client
(FTPException (..), FTPMessage (..), FTPResponse (..), ResponseStatus (..))
import Text.URI (RText, unRText)
instance ToString (RText t) where
toString = toString . unRText
instance Buildable ResponseStatus where
build = show
instance Buildable FTPMessage where
build message = build $ decodeUtf8 @Text (
case message of
SingleLine s -> s
MultiLine ss -> C.intercalate "\n" ss
)
instance Buildable FTPResponse where
build FTPResponse{..} = unlinesF
[ frStatus |+ " (" +| frCode |+ "):"
, build frMessage
]
instance Buildable FTPException where
build (BadProtocolResponseException _) = "Raw FTP exception"
build (FailureRetryException e) = build e
build (FailureException e) = build e
build (UnsuccessfulException e) = build e
build (BogusResponseFormatException e) = build e
deriving instance Eq FTPException

View File

@ -22,28 +22,35 @@ module Xrefcheck.Verify
, checkExternalResource
) where
import Control.Concurrent.Async (forConcurrently, withAsync)
import Control.Monad.Except (MonadError (..))
import qualified Data.ByteString as BS
import qualified Data.Map as M
import qualified Data.Text as T
import qualified GHC.Exts as Exts
import qualified System.FilePath.Glob as Glob
import Control.Concurrent.Async (forConcurrently, withAsync)
import Control.Exception (throwIO)
import Control.Monad.Except (MonadError (..))
import Data.Text.Metrics (damerauLevenshteinNorm)
import Fmt (Buildable (..), blockListF', listF, (+|), (|+))
import qualified GHC.Exts as Exts
import Network.FTP.Client
(FTPException (..), FTPResponse (..), ResponseStatus (..), login, nlst, size, withFTP, withFTPS)
import Network.HTTP.Client (HttpException (..), HttpExceptionContent (..), responseStatus)
import Network.HTTP.Req
(GET (..), HEAD (..), HttpException (..), NoReqBody (..), defaultHttpConfig, ignoreResponse, req,
runReq, useURI)
(AllowsBody, CanHaveBody (NoBody), GET (..), HEAD (..), HttpBodyAllowed, HttpException (..),
HttpMethod, NoReqBody (..), defaultHttpConfig, ignoreResponse, req, runReq, useURI)
import Network.HTTP.Types.Status (Status, statusCode, statusMessage)
import System.Console.Pretty (Style (..), style)
import System.Directory (canonicalizePath, doesDirectoryExist, doesFileExist)
import System.FilePath (takeDirectory, (</>))
import qualified System.FilePath.Glob as Glob
import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (mkURI)
import Text.URI (Authority (..), URI (..), mkURI)
import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout)
import Data.Bits (toIntegralSized)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Orphans ()
import Xrefcheck.Progress
import Xrefcheck.System
@ -94,18 +101,22 @@ instance Buildable a => Buildable (WithReferenceLoc a) where
+| wrlItem |+ "\n\n"
data VerifyError
= FileDoesNotExist FilePath
= LocalFileDoesNotExist FilePath
| AnchorDoesNotExist Text [Anchor]
| AmbiguousAnchorRef FilePath Text (NonEmpty Anchor)
| ExternalResourceInvalidUri
| ExternalResourceInvalidUrl (Maybe Text)
| ExternalResourceUnknownProtocol
| ExternalResourceUnavailable Status
| ExternalHttpResourceUnavailable Status
| ExternalFtpResourceUnavailable FTPResponse
| ExternalFtpException FTPException
| FtpEntryDoesNotExist FilePath
| ExternalResourceSomeError Text
deriving (Show, Eq)
instance Buildable VerifyError where
build = \case
FileDoesNotExist file ->
LocalFileDoesNotExist file ->
"⛀ File does not exist:\n " +| file |+ "\n"
AnchorDoesNotExist anchor similar ->
@ -121,15 +132,30 @@ instance Buildable VerifyError where
\ can change silently whereas the document containing it evolves.\n"
ExternalResourceInvalidUri ->
"⛂ Invalid url\n"
"⛂ Invalid URI\n"
ExternalResourceInvalidUrl Nothing ->
"⛂ Invalid URL\n"
ExternalResourceInvalidUrl (Just message) ->
"⛂ Invalid URL (" +| message |+ ")\n"
ExternalResourceUnknownProtocol ->
"⛂ Bad url (expected 'http' or 'https')\n"
"⛂ Bad url (expected 'http','https', 'ftp' or 'ftps')\n"
ExternalResourceUnavailable status ->
ExternalHttpResourceUnavailable status ->
"⛂ Resource unavailable (" +| statusCode status |+ " " +|
decodeUtf8 @Text (statusMessage status) |+ ")\n"
ExternalFtpResourceUnavailable response ->
"⛂ Resource unavailable:\n" +| response |+ "\n"
ExternalFtpException err ->
"⛂ FTP exception (" +| err |+ ")\n"
FtpEntryDoesNotExist entry ->
"⛂ File or directory does not exist:\n" +| entry |+ "\n"
ExternalResourceSomeError err ->
"" +| build err |+ "\n\n"
where
@ -236,7 +262,7 @@ verifyReference
]
unless (fileExists || dirExists || isVirtual) $
throwError (FileDoesNotExist file)
throwError (LocalFileDoesNotExist file)
checkAnchor file fileAnchors anchor = do
checkAnchorReferenceAmbiguity file fileAnchors anchor
@ -264,18 +290,22 @@ verifyReference
Nothing ->
let isSimilar = (>= vcAnchorSimilarityThreshold)
similarAnchors =
filter
(isSimilar . realToFrac . damerauLevenshteinNorm anchor . aName)
filter (isSimilar . realToFrac . damerauLevenshteinNorm anchor . aName)
givenAnchors
in throwError $ AnchorDoesNotExist anchor similarAnchors
checkExternalResource :: VerifyConfig -> Text -> IO (VerifyResult VerifyError)
checkExternalResource VerifyConfig{..} link
| skipCheck = return mempty
| otherwise = fmap toVerifyRes $ do
makeRequest HEAD 0.3 >>= \case
Right () -> return $ Right ()
Left _ -> makeRequest GET 0.7
| otherwise = fmap toVerifyRes $ runExceptT $ do
uri <- mkURI link & maybe (throwError ExternalResourceInvalidUri) pure
case toString <$> uriScheme uri of
Just "http" -> checkHttp uri
Just "https" -> checkHttp uri
Just "ftp" -> checkFtp uri False
Just "ftps" -> checkFtp uri True
_ -> throwError ExternalResourceUnknownProtocol
where
skipCheck = isIgnored || (not vcCheckLocalhost && isLocalLink)
where
@ -291,12 +321,23 @@ checkExternalResource VerifyConfig{..} link
Nothing -> False
Left _ -> False
makeRequest :: _ => method -> RatioNat -> IO (Either VerifyError ())
makeRequest method timeoutFrac = runExceptT $ do
uri <- mkURI link
& maybe (throwError ExternalResourceInvalidUri) pure
parsedUrl <- useURI uri
& maybe (throwError ExternalResourceUnknownProtocol) pure
checkHttp :: URI -> ExceptT VerifyError IO ()
checkHttp uri = makeHttpRequest uri HEAD 0.3 `catchError` \_ ->
makeHttpRequest uri GET 0.7
makeHttpRequest
:: (HttpMethod method, HttpBodyAllowed (AllowsBody method) 'NoBody)
=> URI
-> method
-> RatioNat
-> ExceptT VerifyError IO ()
makeHttpRequest uri method timeoutFrac = do
parsedUrl <- case useURI uri of
-- accordingly to source code - Nothing can be only in case when
-- protocol is not http or https, but we've checked it already
-- so just in case we throw exception here
Nothing -> throwError $ ExternalResourceInvalidUrl Nothing
Just u -> pure u
let reqLink = case parsedUrl of
Left (url, option) ->
runReq defaultHttpConfig $
@ -305,8 +346,7 @@ checkExternalResource VerifyConfig{..} link
runReq defaultHttpConfig $
req method url NoReqBody ignoreResponse option
let maxTime = Time @Second $
unTime vcExternalRefCheckTimeout * timeoutFrac
let maxTime = Time @Second $ unTime vcExternalRefCheckTimeout * timeoutFrac
mres <- liftIO (timeout maxTime $ void reqLink) `catch`
(either throwError (\() -> return (Just ())) . interpretErrors)
@ -328,5 +368,64 @@ checkExternalResource VerifyConfig{..} link
HttpExceptionRequest _ exc -> case exc of
StatusCodeException resp _
| isAllowedErrorCode (statusCode $ responseStatus resp) -> Right ()
| otherwise -> Left $ ExternalResourceUnavailable (responseStatus resp)
| otherwise -> Left $ ExternalHttpResourceUnavailable (responseStatus resp)
other -> Left . ExternalResourceSomeError $ show other
checkFtp :: URI -> Bool -> ExceptT VerifyError IO ()
checkFtp uri secure = do
-- get authority which stores host and port
authority <- case uriAuthority uri of
Right a -> pure a
Left _ -> throwError $
ExternalResourceInvalidUrl (Just "FTP path must be absolute")
let host = toString $ authHost authority
port :: Int <- case toIntegralSized . fromMaybe 21 $ authPort authority of
Just p -> pure p
Nothing -> throwError $
ExternalResourceInvalidUrl (Just "Bad port")
-- build path from pieces
path <- case uriPath uri of
Nothing -> pure ""
Just (_, pieces) -> pure
. mconcat
. intersperse "/"
. map toString
. toList
$ pieces
makeFtpRequest host port path secure `catch` \e ->
throwError $ ExternalFtpException e
makeFtpRequest
:: String
-> Int
-> FilePath
-> Bool
-> ExceptT VerifyError IO ()
makeFtpRequest host port path secure = handler host port $
\handle response -> do
-- check connection status
when (frStatus response /= Success) $
throwError $ ExternalFtpResourceUnavailable response
-- anonymous login
loginResp <- login handle "anonymous" ""
-- check login status
when (frStatus loginResp /= Success) $
if vcIgnoreAuthFailures
then pure ()
else throwError $ ExternalFtpException $ UnsuccessfulException loginResp
-- If the response is non-null, the path is definitely a directory;
-- If the response is null, the path may be a file or may not exist.
dirList <- nlst handle [ "-a", path ]
when (BS.null dirList) $ do
-- The server-PI will respond to the SIZE command with a 213 reply
-- giving the transfer size of the file whose pathname was supplied,
-- or an error response if the file does not exist, the size is
-- unavailable, or some other error has occurred.
_ <- size handle path `catch` \case
UnsuccessfulException _ -> throwError $ FtpEntryDoesNotExist path
FailureException FTPResponse{..} | frCode == 550 ->
throwError $ FtpEntryDoesNotExist path
err -> liftIO $ throwIO err
pure ()
where
handler = if secure then withFTPS else withFTP

View File

@ -52,7 +52,7 @@ spec = do
it "when False - assume 401 status is invalid" $
checkLinkWithServer (config { vcIgnoreAuthFailures = False })
"http://127.0.0.1:3000/401" $ VerifyResult
[ ExternalResourceUnavailable $
[ ExternalHttpResourceUnavailable $
Status { statusCode = 401, statusMessage = "Unauthorized" }
]
@ -63,7 +63,7 @@ spec = do
it "when False - assume 403 status is invalid" $
checkLinkWithServer (config { vcIgnoreAuthFailures = False })
"http://127.0.0.1:3000/403" $ VerifyResult
[ ExternalResourceUnavailable $
[ ExternalHttpResourceUnavailable $
Status { statusCode = 403, statusMessage = "Forbidden" }
]
where

View File

@ -12,6 +12,31 @@ let
modules = [{
packages.xrefcheck = {
ghcOptions = [ "-Werror" ];
components.tests = {
links-tests = {
build-tools = [ pkgs.vsftpd ];
preCheck = ''
echo "Starting vsftpd..."
touch /tmp/vsftpd.log
vsftpd \
-orun_as_launching_user=yes \
-olisten_port=2221 \
-olisten=yes \
-oftp_username=$(whoami) \
-oanon_root=${./links-tests/ftp_root} \
-opasv_min_port=2222 \
-ohide_file='{.*}' \
-odeny_file='{.*}' \
-oseccomp_sandbox=no \
-olog_ftp_protocol=yes \
-oxferlog_enable=yes \
-ovsftpd_log_file=/tmp/vsftpd.log &
sleep 1
tail -f /tmp/vsftpd.log &
'';
testFlags = [ "--ftp-host" "ftp://localhost:2221" ];
};
};
};
}];
};