[#106] Fix using ./ in paths

Problem: --ignored and --root CLI options had misbehave when you use ./ in path

Solution: add path normalisation and path-equality from System.FilePath instead of common functions for strings
This commit is contained in:
Andrei Borzenkov 2022-07-07 17:22:28 +04:00
parent d6fb95f5b9
commit 55268fee2c
17 changed files with 231 additions and 69 deletions

View File

@ -9,7 +9,7 @@ steps:
- command: nix-build ci.nix -A xrefcheck-lib-and-tests
label: Library and tests
- command: nix run -f ci.nix pkgs.bats pkgs.diffutils xrefcheck-static -c bash -c "cd tests/golden && bats ./"
- command: nix run -f ci.nix pkgs.bats pkgs.diffutils xrefcheck-static -c bash -c "cd tests/golden/ && bats ./**"
label: Golden tests (bats)
- command: nix-build ci.nix -A xrefcheck-static
@ -22,7 +22,7 @@ steps:
artifact_paths:
- "result/bin/*"
- command: nix run -f ci.nix xrefcheck-static -c xrefcheck --ignored tests/markdowns
- command: nix run -f ci.nix xrefcheck-static -c xrefcheck --ignored tests/markdowns --ignored tests/golden/
label: Xrefcheck itself
- label: lint

View File

@ -21,6 +21,7 @@ extra-source-files:
default-extensions:
- AllowAmbiguousTypes
- BangPatterns
- BlockArguments
- ConstraintKinds
- DataKinds
- DefaultSignatures

View File

@ -27,13 +27,14 @@ import Data.Text qualified as T
import Data.Version (showVersion)
import Options.Applicative
(Parser, ReadM, command, eitherReader, execParser, flag, flag', footerDoc, fullDesc, help, helper,
hsubparser, info, infoOption, long, metavar, option, progDesc, short, strOption, switch, value)
hsubparser, info, infoOption, long, metavar, option, progDesc, short, strOption, switch, value, OptionFields, Mod)
import Options.Applicative.Help.Pretty (Doc, displayS, fill, fillSep, indent, renderPretty, text)
import Paths_xrefcheck (version)
import Xrefcheck.Config (VerifyConfig (..))
import Xrefcheck.Core
import Xrefcheck.Scan
import Xrefcheck.Util (normaliseWithNoTrailing)
modeReadM :: ReadM VerifyMode
modeReadM = eitherReader $ \s ->
@ -97,6 +98,9 @@ defaultConfigPaths = ["./xrefcheck.yaml", "./.xrefcheck.yaml"]
-- and flavors, so we write a type alias here.
type RepoType = Flavor
filepathOption :: Mod OptionFields FilePath -> Parser FilePath
filepathOption = fmap normaliseWithNoTrailing <$> strOption
repoTypeReadM :: ReadM RepoType
repoTypeReadM = eitherReader $ \name ->
maybeToRight (failureText name) $ L.lookup (map C.toLower name) allRepoTypesNamed
@ -110,7 +114,7 @@ repoTypeReadM = eitherReader $ \name ->
optionsParser :: Parser Options
optionsParser = do
oConfigPath <- optional . strOption $
oConfigPath <- optional . filepathOption $
short 'c' <>
long "config" <>
metavar "FILEPATH" <>
@ -119,7 +123,7 @@ optionsParser = do
(mconcat . intersperse ", " $ map show defaultConfigPaths) <> ". \
\If none of these files exist, default configuration is used."
)
oRoot <- strOption $
oRoot <- filepathOption $
short 'r' <>
long "root" <>
metavar "DIRECTORY" <>
@ -154,7 +158,7 @@ optionsParser = do
traversalOptionsParser :: Parser TraversalOptions
traversalOptionsParser = do
toIgnored <- many . strOption $
toIgnored <- many . filepathOption $
long "ignored" <>
metavar "FILEPATH" <>
help "Files and folders which we pretend do not exist."
@ -183,7 +187,7 @@ dumpConfigOptions = hsubparser $
help "Git repository type."
outputOption =
strOption $
filepathOption $
short 'o' <>
long "output" <>
metavar "FILEPATH" <>

View File

@ -14,7 +14,7 @@ import Fmt (blockListF', build, fmt, fmtLn, indentF)
import System.Directory (doesFileExist)
import Xrefcheck.CLI (Options (..), addTraversalOptions, addVerifyOptions, defaultConfigPaths)
import Xrefcheck.Config (Config (..), ScannersConfig (..), defConfig)
import Xrefcheck.Config (Config (..), ScannersConfig (..), defConfig, normaliseConfigFilePaths)
import Xrefcheck.Core (Flavor (..))
import Xrefcheck.Progress (allowRewrite)
import Xrefcheck.Scan (FormatsSupport, gatherRepoInfo, specificFormatsSupport)
@ -23,9 +23,9 @@ import Xrefcheck.System (askWithinCI)
import Xrefcheck.Verify (verifyErrors, verifyRepo)
readConfig :: FilePath -> IO Config
readConfig path =
readConfig path = fmap normaliseConfigFilePaths do
decodeFileEither path
>>= either (error . toText . prettyPrintParseException) pure
>>= either (error . toText . prettyPrintParseException) pure
formats :: ScannersConfig -> FormatsSupport
formats ScannersConfig{..} = specificFormatsSupport

View File

@ -18,19 +18,19 @@ import Data.ByteString qualified as BS
import Data.Map qualified as Map
import Data.Yaml (FromJSON (..), decodeEither', prettyPrintParseException, withText)
import Instances.TH.Lift ()
import Text.Regex.TDFA (CompOption (..), ExecOption (..), Regex)
import Text.Regex.TDFA qualified as R
import Text.Regex.TDFA.ByteString ()
import Text.Regex.TDFA.Text qualified as R
import Time (KnownRatName, Second, Time, unitsP)
import Time (KnownRatName, Second, Time(..), unitsP)
import Xrefcheck.Core
import Xrefcheck.Scan
import Xrefcheck.Scanners.Markdown
import Xrefcheck.System (RelGlobPattern)
import Xrefcheck.Util (aesonConfigOption, postfixFields, (-:))
import Xrefcheck.System (RelGlobPattern, normaliseGlobPattern)
import Xrefcheck.Util (aesonConfigOption, postfixFields, (-:), normaliseWithNoTrailing)
import Xrefcheck.Config.Default
import Text.Regex.TDFA.Common
-- | Overall config.
data Config = Config
@ -39,6 +39,14 @@ data Config = Config
, cScanners :: ScannersConfig
}
normaliseConfigFilePaths :: Config -> Config
normaliseConfigFilePaths Config{..}
= Config
{ cTraversal = normaliseTraversalConfigFilePaths cTraversal
, cVerification = normaliseVerifyConfigFilePaths cVerification
, cScanners
}
-- | Config of verification.
data VerifyConfig = VerifyConfig
{ vcAnchorSimilarityThreshold :: Double
@ -56,6 +64,13 @@ data VerifyConfig = VerifyConfig
-- otherwise will be marked as broken, because we can't check it.
}
normaliseVerifyConfigFilePaths :: VerifyConfig -> VerifyConfig
normaliseVerifyConfigFilePaths vc@VerifyConfig{ vcVirtualFiles, vcNotScanned}
= vc
{ vcVirtualFiles = map normaliseGlobPattern vcVirtualFiles
, vcNotScanned = map normaliseWithNoTrailing vcNotScanned
}
-- | Configs for all the supported scanners.
data ScannersConfig = ScannersConfig
{ scMarkdown :: MarkdownConfig
@ -178,7 +193,7 @@ foldMap (deriveFromJSON aesonConfigOption)
]
defConfig :: HasCallStack => Flavor -> Config
defConfig flavor =
defConfig flavor = normaliseConfigFilePaths $
either (error . toText . prettyPrintParseException) id $
decodeEither' (defConfigText flavor)

View File

@ -12,6 +12,7 @@ module Xrefcheck.Scan
, FormatsSupport
, RepoInfo (..)
, normaliseTraversalConfigFilePaths
, gatherRepoInfo
, specificFormatsSupport
) where
@ -23,11 +24,11 @@ import Data.Foldable qualified as F
import Data.Map qualified as M
import GHC.Err (errorWithoutStackTrace)
import System.Directory.Tree qualified as Tree
import System.FilePath (dropTrailingPathSeparator, takeDirectory, takeExtension, (</>))
import System.FilePath (dropTrailingPathSeparator, takeDirectory, takeExtension, (</>), equalFilePath)
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.Util (aesonConfigOption)
import Xrefcheck.Util (aesonConfigOption, normaliseWithNoTrailing)
-- | Config of repositry traversal.
data TraversalConfig = TraversalConfig
@ -35,6 +36,9 @@ data TraversalConfig = TraversalConfig
-- ^ Files and folders, files in which we completely ignore.
}
normaliseTraversalConfigFilePaths :: TraversalConfig -> TraversalConfig
normaliseTraversalConfigFilePaths = TraversalConfig . map normaliseWithNoTrailing . tcIgnored
deriveFromJSON aesonConfigOption ''TraversalConfig
-- | File extension, dot included.
@ -55,34 +59,19 @@ specificFormatsSupport formats = \ext -> M.lookup ext formatsMap
, extension <- extensions
]
-- | Returns the context location of the given path.
-- This is done by removing the last component from the path.
--
-- > locationOf "./folder/file.md" == "./folder"
-- > locationOf "./folder/subfolder" == "./folder"
-- > locationOf "./folder/subfolder/" == "./folder"
-- > locationOf "./folder/subfolder/./" == "./folder/subfolder"
-- > locationOf "." == ""
-- > locationOf "/absolute/path" == "/absolute"
-- > locationOf "/" == "/"
locationOf :: FilePath -> FilePath
locationOf fp
| fp == "" || fp == "." = ""
| otherwise = takeDirectory $ dropTrailingPathSeparator fp
gatherRepoInfo
:: MonadIO m
=> Rewrite -> FormatsSupport -> TraversalConfig -> FilePath -> m RepoInfo
gatherRepoInfo rw formatsSupport config root = do
putTextRewrite rw "Scanning repository..."
_ Tree.:/ repoTree <- liftIO $ Tree.readDirectoryWithL processFile rootNE
let fileInfos = filter (\(path, _) -> not $ isIgnored path)
_ Tree.:/ repoTree <- liftIO $ Tree.readDirectoryWithL processFile root
let fileInfos = map (first normaliseWithNoTrailing)
$ filter (\(path, _) -> not $ isIgnored path)
$ dropSndMaybes . F.toList
$ Tree.zipPaths . (locationOf root Tree.:/)
$ Tree.zipPaths . (location Tree.:/)
$ filterExcludedDirs root repoTree
return $ RepoInfo (M.fromList fileInfos)
where
rootNE = if null root then "." else root
processFile file = do
let ext = takeExtension file
let mscanner = formatsSupport ext
@ -90,7 +79,7 @@ gatherRepoInfo rw formatsSupport config root = do
dropSndMaybes l = [(a, b) | (a, Just b) <- l]
ignored = map (root </>) (tcIgnored config)
isIgnored path = path `elem` ignored
isIgnored path = any (equalFilePath path) ignored
filterExcludedDirs cur = \case
Tree.Dir name subfiles ->
let subfiles' =
@ -102,3 +91,17 @@ gatherRepoInfo rw formatsSupport config root = do
file@Tree.File{} -> file
Tree.Failed _name err ->
errorWithoutStackTrace $ "Repository traversal failed: " <> show err
-- The context location of the root.
-- This is done by removing the last component from the path.
-- > root = "./folder/file.md" ==> location = "./folder"
-- > root = "./folder/subfolder" ==> location = "./folder"
-- > root = "./folder/subfolder/" ==> location = "./folder"
-- > root = "./folder/subfolder/./" ==> location = "./folder/subfolder"
-- > root = "." ==> location = ""
-- > root = "/absolute/path" ==> location = "/absolute"
-- > root = "/" ==> location = "/"
location =
if root `equalFilePath` "."
then ""
else takeDirectory $ dropTrailingPathSeparator root

View File

@ -7,6 +7,7 @@ module Xrefcheck.System
( readingSystem
, askWithinCI
, RelGlobPattern (..)
, normaliseGlobPattern
, bindGlobPattern
) where
@ -19,6 +20,8 @@ import System.Directory (canonicalizePath)
import System.Environment (lookupEnv)
import System.FilePath ((</>))
import System.FilePath.Glob qualified as Glob
import Data.Coerce (coerce)
import Xrefcheck.Util (normaliseWithNoTrailing)
-- | We can quite safely treat surrounding filesystem as frozen,
-- so IO reading operations can be turned into pure values.
@ -36,6 +39,9 @@ askWithinCI = lookupEnv "CI" <&> \case
-- | Glob pattern relative to repository root.
newtype RelGlobPattern = RelGlobPattern FilePath
normaliseGlobPattern :: RelGlobPattern -> RelGlobPattern
normaliseGlobPattern = RelGlobPattern . normaliseWithNoTrailing . coerce
bindGlobPattern :: FilePath -> RelGlobPattern -> Glob.Pattern
bindGlobPattern root (RelGlobPattern relPat) = readingSystem $ do
-- TODO [#26] try to avoid using canonicalization

View File

@ -11,7 +11,7 @@ module Xrefcheck.Util
, postfixFields
, (-:)
, aesonConfigOption
) where
, normaliseWithNoTrailing) where
import Universum
@ -20,6 +20,7 @@ import Data.Aeson qualified as Aeson
import Data.Aeson.Casing (aesonPrefix, camelCase)
import Fmt (Builder, build, fmt, nameF)
import System.Console.Pretty (Pretty (..), Style (Faint))
import System.FilePath (dropTrailingPathSeparator, normalise)
instance Pretty Builder where
colorize s c = build @Text . colorize s c . fmt
@ -43,3 +44,6 @@ infixr 0 -:
-- | Options that we use to derive JSON instances for config types.
aesonConfigOption :: Aeson.Options
aesonConfigOption = aesonPrefix camelCase
normaliseWithNoTrailing :: FilePath -> FilePath
normaliseWithNoTrailing = dropTrailingPathSeparator . normalise

View File

@ -43,7 +43,7 @@ import Network.HTTP.Req
import Network.HTTP.Types.Status (Status, statusCode, statusMessage)
import System.Console.Pretty (Style (..), style)
import System.Directory (canonicalizePath, doesDirectoryExist, doesFileExist)
import System.FilePath (takeDirectory, (</>))
import System.FilePath (takeDirectory, (</>), normalise)
import System.FilePath.Glob qualified as Glob
import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (Authority (..), URI (..), mkURI)
@ -211,7 +211,7 @@ verifyRepo
= do
let toScan = do
(file, fileInfo) <- M.toList repoInfo
guard . not $ any ((`isPrefixOf` file) . (root </>)) vcNotScanned
guard . not $ any ((`isPrefixOf` file) . normalise . (root </>)) vcNotScanned
ref <- _fiReferences fileInfo
return (file, ref)

View File

@ -0,0 +1,79 @@
#!/usr/bin/env bats
# SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
#
# SPDX-License-Identifier: MPL-2.0
load '../helpers/bats-support/load'
load '../helpers/bats-assert/load'
load '../helpers'
@test "No redundant slashes" {
run xrefcheck \
--ignored to-ignore \
--root .
assert_output --partial "All repository links are valid."
}
@test "Redundant slashes in root and ignored" {
run xrefcheck \
--ignored ./././././././//to-ignore \
--root ./
assert_output --partial "All repository links are valid."
}
@test "Redundant slashes in root" {
run xrefcheck \
-c config-no-scan-ignored.yaml \
--root ./
assert_output --partial "All repository links are valid."
}
@test "Reduchant slashes in ignored" {
run xrefcheck \
--ignored ./././././././//to-ignore \
--root .
assert_output --partial "All repository links are valid."
}
@test "Basic root, check errors report" {
xrefcheck \
--root . \
| prepare > /tmp/check-cli.test || true
diff /tmp/check-cli.test expected.gold \
--ignore-space-change \
--ignore-blank-lines \
--new-file # treat absent files as empty
rm /tmp/check-cli.test
}
@test "Root with redundant slashes, check errors report" {
xrefcheck \
--root ././///././././//./ \
| prepare > /tmp/check-cli.test || true
diff /tmp/check-cli.test expected.gold \
--ignore-space-change \
--ignore-blank-lines \
--new-file # treat absent files as empty
rm /tmp/check-cli.test
}
@test "No root, check errors report" {
xrefcheck \
| prepare > /tmp/check-cli.test || true
diff /tmp/check-cli.test expected.gold \
--ignore-space-change \
--ignore-blank-lines \
--new-file # treat absent files as empty
rm /tmp/check-cli.test
}

View File

@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2021 Serokell <https://serokell.io>
#
# SPDX-License-Identifier: Unlicense
traversal:
ignored: []
verification:
anchorSimilarityThreshold: 0.5
externalRefCheckTimeout: 10s
notScanned: [ "to-ignore/broken-link.md" ]
virtualFiles: []
ignoreRefs: []
checkLocalhost: false
ignoreAuthFailures: true
scanners:
markdown:
flavor: GitHub

View File

@ -0,0 +1,13 @@
=== Invalid references found ===
➥ In file to-ignore/broken-link.md
bad reference (absolute) at src:7:1-25:
- text: "my link"
- link: /one/two/three
- anchor: -
⛀ File does not exist:
./one/two/three
Invalid references dumped, 1 in total.

View File

@ -0,0 +1,2 @@
# SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
# SPDX-License-Identifier: Unlicense

View File

@ -0,0 +1,7 @@
<!--
- SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-->
[my link](/one/two/three)

View File

@ -4,36 +4,26 @@
#
# SPDX-License-Identifier: MPL-2.0
load 'helpers/bats-support/load'
load 'helpers/bats-assert/load'
# this function is used for:
# - delete all color characters
# - replace socket port with N
# - replace multiple connection retry errors with single one
# (because at some machine there are one error and at others - two)
prepare () {
sed -r "s/[[:cntrl:]]\[[0-9]{1,3}m//g" \
| sed 's/socket: [0-9]*/socket: N/g' \
| sed 's/Network.Socket.connect: <socket: N>: does not exist (Connection refused),//g'
}
load '../helpers/bats-support/load'
load '../helpers/bats-assert/load'
load '../helpers'
@test "Config: check-localhost=false, CLA --check-localhost not provided" {
run xrefcheck \
-c check-localhost/config-check-disabled.yaml \
-r check-localhost
-c config-check-disabled.yaml \
-r .
assert_output --partial "All repository links are valid."
}
@test "Config: check-localhost=false, CLA --check-localhost provided" {
xrefcheck \
-c check-localhost/config-check-disabled.yaml \
-r check-localhost \
-c config-check-disabled.yaml \
-r . \
--check-localhost \
| prepare > /tmp/check-localhost.test || true
diff /tmp/check-localhost.test check-localhost/expected.gold \
diff /tmp/check-localhost.test expected.gold \
--ignore-space-change \
--ignore-blank-lines \
--new-file # treat absent files as empty
@ -43,11 +33,11 @@ prepare () {
@test "Config: check-localhost=true, CLA --check-localhost not provided" {
xrefcheck \
-c check-localhost/config-check-enabled.yaml \
-r check-localhost \
-c config-check-enabled.yaml \
-r . \
| prepare > /tmp/check-localhost.test || true
diff /tmp/check-localhost.test check-localhost/expected.gold \
diff /tmp/check-localhost.test expected.gold \
--ignore-space-change \
--ignore-blank-lines \
--new-file # treat absent files as empty
@ -57,12 +47,12 @@ prepare () {
@test "Config: check-localhost=true, CLA --check-localhost provided" {
xrefcheck \
-c check-localhost/config-check-enabled.yaml \
-r check-localhost \
-c config-check-enabled.yaml \
-r . \
--check-localhost \
| prepare > /tmp/check-localhost.test || true
diff /tmp/check-localhost.test check-localhost/expected.gold \
diff /tmp/check-localhost.test expected.gold \
--ignore-space-change \
--ignore-blank-lines \
--new-file # treat absent files as empty
@ -81,7 +71,7 @@ prepare () {
--check-localhost \
| prepare > /tmp/check-localhost.test || true
diff /tmp/check-localhost.test check-localhost/expected.gold \
diff /tmp/check-localhost.test expected.gold \
--ignore-space-change \
--ignore-blank-lines \
--new-file # treat absent files as empty

View File

@ -1,6 +1,6 @@
=== Invalid references found ===
➥ In file ./check-localhost/check-localhost.md
➥ In file check-localhost.md
bad reference (external) at src:7:10-47:
- text: "web-site"
- link: https://localhost/web-site
@ -10,7 +10,7 @@
➥ In file ./check-localhost/check-localhost.md
➥ In file check-localhost.md
bad reference (external) at src:9:10-39:
- text: "team"
- link: https://127.0.0.1/team
@ -20,7 +20,7 @@
➥ In file ./check-localhost/check-localhost.md
➥ In file check-localhost.md
bad reference (external) at src:11:10-38:
- text: "blog"
- link: http://localhost/blog
@ -30,7 +30,7 @@
➥ In file ./check-localhost/check-localhost.md
➥ In file check-localhost.md
bad reference (external) at src:13:10-38:
- text: "labs"
- link: http://127.0.0.1/labs

19
tests/golden/helpers.bash Normal file
View File

@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
#
# SPDX-License-Identifier: MPL-2.0
setup () {
# change working directory to the location of the running `bats` suite.
cd "$( dirname "$BATS_TEST_FILENAME")"
}
# this function is used for:
# - delete all color characters
# - replace socket port with N
# - replace multiple connection retry errors with single one
# (because at some machine there are one error and at others - two)
prepare () {
sed -r "s/[[:cntrl:]]\[[0-9]{1,3}m//g" \
| sed 's/socket: [0-9]*/socket: N/g' \
| sed 's/Network.Socket.connect: <socket: N>: does not exist (Connection refused),//g'
}