diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 1b5efbc..219fc3a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -22,7 +22,7 @@ steps: artifact_paths: - "result/bin/*" - - command: nix run -f ci.nix xrefcheck-static -c xrefcheck --ignored tests/markdowns --ignored tests/golden/ + - command: nix run -f ci.nix xrefcheck-static -c xrefcheck --ignored 'tests/**/*' label: Xrefcheck itself - label: lint diff --git a/CHANGES.md b/CHANGES.md index c4069ef..b088f55 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -37,6 +37,10 @@ Unreleased + Fixed bug with ignoring checks for relative anchors. * [#132](https://github.com/serokell/xrefcheck/pull/132) + Display URL parsing errors. +* [#131](https://github.com/serokell/xrefcheck/pull/131) + + Add support for glob patterns to `ignored` and `notScanned`. + + Remove support for directory names from `ignored` and `notScanned`. + + Fix bug with `ignored` not ignoring files with broken xrefcheck annotations. 0.2.1 ========== diff --git a/README.md b/README.md index 7118ffb..7cfb98e 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ xrefcheck dump-config -t GitHub Currently supported options include: * Timeout for checking external references; -* List of ignored folders. +* List of ignored files. ## Build instructions [↑](#xrefcheck) diff --git a/src/Xrefcheck/CLI.hs b/src/Xrefcheck/CLI.hs index 720397c..4cf7b1e 100644 --- a/src/Xrefcheck/CLI.hs +++ b/src/Xrefcheck/CLI.hs @@ -37,6 +37,7 @@ import Xrefcheck.Config (VerifyConfig (..)) import Xrefcheck.Core import Xrefcheck.Scan import Xrefcheck.Util (normaliseWithNoTrailing) +import Xrefcheck.System (RelGlobPattern (..)) modeReadM :: ReadM VerifyMode modeReadM = eitherReader $ \s -> @@ -78,7 +79,7 @@ data Options = Options } data TraversalOptions = TraversalOptions - { toIgnored :: [FilePath] + { toIgnored :: [RelGlobPattern] } addTraversalOptions :: TraversalConfig -> TraversalOptions -> TraversalConfig @@ -115,6 +116,9 @@ type RepoType = Flavor filepathOption :: Mod OptionFields FilePath -> Parser FilePath filepathOption = fmap normaliseWithNoTrailing <$> strOption +globOption :: Mod OptionFields FilePath -> Parser RelGlobPattern +globOption = fmap RelGlobPattern <$> filepathOption + repoTypeReadM :: ReadM RepoType repoTypeReadM = eitherReader $ \name -> maybeToRight (failureText name) $ L.lookup (map C.toLower name) allRepoTypesNamed @@ -174,10 +178,12 @@ optionsParser = do traversalOptionsParser :: Parser TraversalOptions traversalOptionsParser = do - toIgnored <- many . filepathOption $ + toIgnored <- many . globOption $ long "ignored" <> - metavar "FILEPATH" <> - help "Files and folders which we pretend do not exist." + metavar "GLOB PATTERN" <> + help "Files which we pretend do not exist.\ + \ Glob patterns that contain wildcards MUST be enclosed\ + \ in quotes to avoid being expanded by shell." return TraversalOptions{..} verifyOptionsParser :: Parser VerifyOptions diff --git a/src/Xrefcheck/Config.hs b/src/Xrefcheck/Config.hs index ea4178e..df6ce00 100644 --- a/src/Xrefcheck/Config.hs +++ b/src/Xrefcheck/Config.hs @@ -28,7 +28,7 @@ import Xrefcheck.Core import Xrefcheck.Scan import Xrefcheck.Scanners.Markdown import Xrefcheck.System (RelGlobPattern, normaliseGlobPattern) -import Xrefcheck.Util (aesonConfigOption, postfixFields, (-:), normaliseWithNoTrailing) +import Xrefcheck.Util (aesonConfigOption, postfixFields, (-:)) import Xrefcheck.Config.Default import Text.Regex.TDFA.Common @@ -53,8 +53,8 @@ data VerifyConfig = VerifyConfig , vcExternalRefCheckTimeout :: Time Second , vcVirtualFiles :: [RelGlobPattern] -- ^ Files which we pretend do exist. - , vcNotScanned :: [FilePath] - -- ^ Prefixes of files, references in which we should not analyze. + , vcNotScanned :: [RelGlobPattern] + -- ^ Files, references in which we should not analyze. , vcIgnoreRefs :: [Regex] -- ^ Regular expressions that match external references we should not verify. , vcCheckLocalhost :: Bool @@ -72,7 +72,7 @@ normaliseVerifyConfigFilePaths :: VerifyConfig -> VerifyConfig normaliseVerifyConfigFilePaths vc@VerifyConfig{ vcVirtualFiles, vcNotScanned} = vc { vcVirtualFiles = map normaliseGlobPattern vcVirtualFiles - , vcNotScanned = map normaliseWithNoTrailing vcNotScanned + , vcNotScanned = map normaliseGlobPattern vcNotScanned } -- | Configs for all the supported scanners. @@ -167,12 +167,12 @@ defConfigText flavor = GitHub -> [ ".github/pull_request_template.md" , ".github/issue_template.md" - , ".github/PULL_REQUEST_TEMPLATE" - , ".github/ISSUE_TEMPLATE" + , ".github/PULL_REQUEST_TEMPLATE/**/*" + , ".github/ISSUE_TEMPLATE/**/*" ] GitLab -> - [ ".gitlab/merge_request_templates/" - , ".gitlab/issue_templates/" + [ ".gitlab/merge_request_templates/**/*" + , ".gitlab/issue_templates/**/*" ] , "virtualFiles" -: Right $ case flavor of diff --git a/src/Xrefcheck/Config/Default.hs b/src/Xrefcheck/Config/Default.hs index 7fbfdad..7c15c13 100644 --- a/src/Xrefcheck/Config/Default.hs +++ b/src/Xrefcheck/Config/Default.hs @@ -15,14 +15,14 @@ defConfigUnfilled :: ByteString defConfigUnfilled = [r|# Parameters of repository traversal. traversal: - # Files and folders which we pretend do not exist + # Glob patterns describing files which we pretend do not exist # (so they are neither analyzed nor can be referenced). ignored: # Git files - - .git + - .git/**/* # Stack files - - .stack-work + - .stack-work/**/* # Verification parameters. verification: @@ -34,7 +34,7 @@ verification: # declaring "Response timeout". externalRefCheckTimeout: 10s - # Prefixes of files, references in which should not be analyzed. + # Glob patterns describing the files, references in which should not be analyzed. notScanned: - :PLACEHOLDER:notScanned: diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index 81a2aad..0af155a 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -22,24 +22,23 @@ import Universum import Data.Aeson.TH (deriveFromJSON) import Data.Foldable qualified as F import Data.Map qualified as M -import GHC.Err (errorWithoutStackTrace) import System.Directory (doesDirectoryExist) import System.Directory.Tree qualified as Tree -import System.FilePath (dropTrailingPathSeparator, takeDirectory, takeExtension, (), equalFilePath) +import System.FilePath (dropTrailingPathSeparator, takeDirectory, takeExtension, equalFilePath) import Xrefcheck.Core import Xrefcheck.Progress -import Xrefcheck.System (readingSystem) +import Xrefcheck.System (readingSystem, RelGlobPattern, normaliseGlobPattern, matchesGlobPatterns) import Xrefcheck.Util (aesonConfigOption, normaliseWithNoTrailing) -- | Config of repositry traversal. data TraversalConfig = TraversalConfig - { tcIgnored :: [FilePath] + { tcIgnored :: [RelGlobPattern] -- ^ Files and folders, files in which we completely ignore. } normaliseTraversalConfigFilePaths :: TraversalConfig -> TraversalConfig -normaliseTraversalConfigFilePaths = TraversalConfig . map normaliseWithNoTrailing . tcIgnored +normaliseTraversalConfigFilePaths = TraversalConfig . map normaliseGlobPattern . tcIgnored deriveFromJSON aesonConfigOption ''TraversalConfig @@ -72,10 +71,8 @@ gatherRepoInfo rw formatsSupport config root = do _ Tree.:/ repoTree <- liftIO $ Tree.readDirectoryWithL processFile root let fileInfos = map (first normaliseWithNoTrailing) - $ filter (\(path, _) -> not $ isIgnored path) $ dropSndMaybes . F.toList - $ Tree.zipPaths . (location Tree.:/) - $ filterExcludedDirs root repoTree + $ Tree.zipPaths $ location Tree.:/ repoTree return $ RepoInfo (M.fromList fileInfos) where isDirectory = readingSystem . doesDirectoryExist @@ -83,22 +80,12 @@ gatherRepoInfo rw formatsSupport config root = do processFile file = do let ext = takeExtension file let mscanner = formatsSupport ext - forM mscanner $ \scanFile -> scanFile file + if isIgnored file + then pure Nothing + else forM mscanner ($ file) dropSndMaybes l = [(a, b) | (a, Just b) <- l] - ignored = map (root ) (tcIgnored config) - isIgnored path = any (equalFilePath path) ignored - filterExcludedDirs cur = \case - Tree.Dir name subfiles -> - let subfiles' = - if isIgnored cur - then [] - else map visitRec subfiles - visitRec sub = filterExcludedDirs (cur Tree.name sub) sub - in Tree.Dir name subfiles' - file@Tree.File{} -> file - Tree.Failed _name err -> - errorWithoutStackTrace $ "Repository traversal failed: " <> show err + isIgnored = matchesGlobPatterns root $ tcIgnored config -- The context location of the root. -- This is done by removing the last component from the path. diff --git a/src/Xrefcheck/System.hs b/src/Xrefcheck/System.hs index f3e6e21..2376883 100644 --- a/src/Xrefcheck/System.hs +++ b/src/Xrefcheck/System.hs @@ -9,6 +9,7 @@ module Xrefcheck.System , RelGlobPattern (..) , normaliseGlobPattern , bindGlobPattern + , matchesGlobPatterns ) where import Universum @@ -52,6 +53,14 @@ bindGlobPattern root (RelGlobPattern relPat) = readingSystem $ do Right pat -> return pat +matchesGlobPatterns :: FilePath -> [RelGlobPattern] -> FilePath -> Bool +matchesGlobPatterns root globPatterns file = or + [ Glob.match pat cFile + | globPattern <- globPatterns + , let pat = bindGlobPattern root globPattern + , let cFile = readingSystem $ canonicalizePath file + ] + instance FromJSON RelGlobPattern where parseJSON = withText "Repo-relative glob pattern" $ \path -> do let spath = toString path diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index e4351ca..4fe7230 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -57,9 +57,8 @@ import Network.HTTP.Req import Network.HTTP.Types.Header (hRetryAfter) import Network.HTTP.Types.Status (Status, statusCode, statusMessage) import System.Console.Pretty (Style (..), style) -import System.Directory (canonicalizePath, doesDirectoryExist, doesFileExist) +import System.Directory (doesDirectoryExist, doesFileExist) import System.FilePath (takeDirectory, (), normalise) -import System.FilePath.Glob qualified as Glob import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift) import Text.Regex.TDFA.Text (Regex, regexec) import Text.URI (Authority (..), URI (..), mkURIBs, ParseExceptionBs) @@ -261,7 +260,7 @@ verifyRepo = do let toScan = do (file, fileInfo) <- M.toList repoInfo - guard . not $ any ((`isPrefixOf` file) . normalise . (root )) vcNotScanned + guard . not $ matchesGlobPatterns root vcNotScanned file ref <- _fiReferences fileInfo return (file, ref) @@ -416,12 +415,7 @@ verifyReference let fileExists = readingSystem $ doesFileExist file let dirExists = readingSystem $ doesDirectoryExist file - let cfile = readingSystem $ canonicalizePath file - let isVirtual = or - [ Glob.match pat cfile - | virtualFile <- vcVirtualFiles - , let pat = bindGlobPattern root virtualFile - ] + let isVirtual = matchesGlobPatterns root vcVirtualFiles file unless (fileExists || dirExists || isVirtual) $ throwError (LocalFileDoesNotExist file) diff --git a/tests/configs/github-config.yaml b/tests/configs/github-config.yaml index 1985b22..ae50cf8 100644 --- a/tests/configs/github-config.yaml +++ b/tests/configs/github-config.yaml @@ -1,13 +1,13 @@ # Parameters of repository traversal. traversal: - # Files and folders which we pretend do not exist + # Glob patterns describing files which we pretend do not exist # (so they are neither analyzed nor can be referenced). ignored: # Git files - - .git + - .git/**/* # Stack files - - .stack-work + - .stack-work/**/* # Verification parameters. verification: @@ -19,12 +19,12 @@ verification: # declaring "Response timeout". externalRefCheckTimeout: 10s - # Prefixes of files, references in which should not be analyzed. + # Glob patterns describing the files, references in which should not be analyzed. notScanned: - .github/pull_request_template.md - .github/issue_template.md - - .github/PULL_REQUEST_TEMPLATE - - .github/ISSUE_TEMPLATE + - .github/PULL_REQUEST_TEMPLATE/**/* + - .github/ISSUE_TEMPLATE/**/* # Glob patterns describing the files which do not physically exist in the # repository but should be treated as existing nevertheless. diff --git a/tests/golden/check-cli/check-cli.bats b/tests/golden/check-cli/check-cli.bats index a59893e..4d48455 100644 --- a/tests/golden/check-cli/check-cli.bats +++ b/tests/golden/check-cli/check-cli.bats @@ -10,7 +10,7 @@ load '../helpers' @test "No redundant slashes" { run xrefcheck \ - --ignored to-ignore \ + --ignored to-ignore/* \ --root . assert_output --partial "All repository links are valid." @@ -18,7 +18,7 @@ load '../helpers' @test "Redundant slashes in root and ignored" { run xrefcheck \ - --ignored ./././././././//to-ignore \ + --ignored ./././././././//to-ignore/* \ --root ./ assert_output --partial "All repository links are valid." @@ -34,7 +34,7 @@ load '../helpers' @test "Reduchant slashes in ignored" { run xrefcheck \ - --ignored ./././././././//to-ignore \ + --ignored ./././././././//to-ignore/* \ --root . assert_output --partial "All repository links are valid." diff --git a/tests/golden/check-ignored/check-ignored.bats b/tests/golden/check-ignored/check-ignored.bats new file mode 100644 index 0000000..58c7001 --- /dev/null +++ b/tests/golden/check-ignored/check-ignored.bats @@ -0,0 +1,40 @@ +#!/usr/bin/env bats + +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: MPL-2.0 + +load '../helpers/bats-support/load' +load '../helpers/bats-assert/load' +load '../helpers' + + +@test "Ignore file with broken xrefcheck annotation: full path" { + run xrefcheck --ignored ./to-ignore/inner-directory/broken_annotation.md + + assert_output --partial "All repository links are valid." +} + +@test "Ignore file with broken xrefcheck annotation: glob wildcard" { + run xrefcheck --ignored 'to-ignore/inner-directory/*' + + assert_output --partial "All repository links are valid." +} + +@test "Ignore file with broken xrefcheck annotation: nested directories with glob wildcard" { + run xrefcheck --ignored './**/*' + + assert_output --partial "All repository links are valid." +} + +@test "Ignore file with broken xrefcheck annotation: config file" { + run xrefcheck --config ./config-ignored.yaml + + assert_output --partial "All repository links are valid." +} + +@test "Ignore file with broken xrefcheck annotation: directory, check filure" { + run xrefcheck --ignored ./to-ignore/inner-directory/ + + assert_output --partial "Error when scanning ./to-ignore/inner-directory/broken_annotation.md" +} diff --git a/tests/golden/check-ignored/config-ignored.yaml b/tests/golden/check-ignored/config-ignored.yaml new file mode 100644 index 0000000..84d6b11 --- /dev/null +++ b/tests/golden/check-ignored/config-ignored.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: Unlicense + +traversal: + ignored: + - ./to-ignore/inner-directory/broken_annotation.md + +verification: + anchorSimilarityThreshold: 0.5 + externalRefCheckTimeout: 10s + notScanned: [] + virtualFiles: [] + ignoreRefs: [] + checkLocalhost: true + ignoreAuthFailures: true + defaultRetryAfter: 30s + maxRetries: 3 + +scanners: + markdown: + flavor: GitHub diff --git a/tests/golden/check-ignored/to-ignore/inner-directory/broken_annotation.md b/tests/golden/check-ignored/to-ignore/inner-directory/broken_annotation.md new file mode 100644 index 0000000..0414366 --- /dev/null +++ b/tests/golden/check-ignored/to-ignore/inner-directory/broken_annotation.md @@ -0,0 +1,11 @@ + + +One + + + +Two diff --git a/tests/golden/check-notScanned/check-notScanned.bats b/tests/golden/check-notScanned/check-notScanned.bats new file mode 100644 index 0000000..a66f650 --- /dev/null +++ b/tests/golden/check-notScanned/check-notScanned.bats @@ -0,0 +1,40 @@ +#!/usr/bin/env bats + +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: MPL-2.0 + +load '../helpers/bats-support/load' +load '../helpers/bats-assert/load' +load '../helpers' + + +@test "Not scanned: full path" { + run xrefcheck -c config-full-path.yaml + + assert_output --partial "All repository links are valid." +} + +@test "Not scanned: glob wildcard" { + run xrefcheck -c config-wildcard.yaml + + assert_output --partial "All repository links are valid." +} + +@test "Not scanned: nested directories with glob wildcard" { + run xrefcheck -c config-nested-directories.yaml + + assert_output --partial "All repository links are valid." +} + +@test "Not scanned: directory, check failure" { + xrefcheck -c config-directory.yaml \ + | prepare > /tmp/check-notScanned.test || true + + diff /tmp/check-notScanned.test expected.gold \ + --ignore-space-change \ + --ignore-blank-lines \ + --new-file # treat absent files as empty + + rm /tmp/check-notScanned.test +} diff --git a/tests/golden/check-notScanned/config-directory.yaml b/tests/golden/check-notScanned/config-directory.yaml new file mode 100644 index 0000000..62cc2b2 --- /dev/null +++ b/tests/golden/check-notScanned/config-directory.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: Unlicense + +traversal: + ignored: [] + +verification: + anchorSimilarityThreshold: 0.5 + externalRefCheckTimeout: 10s + notScanned: + - notScanned/inner-directory + virtualFiles: [] + ignoreRefs: [] + checkLocalhost: true + ignoreAuthFailures: true + defaultRetryAfter: 30s + maxRetries: 3 + +scanners: + markdown: + flavor: GitHub diff --git a/tests/golden/check-notScanned/config-full-path.yaml b/tests/golden/check-notScanned/config-full-path.yaml new file mode 100644 index 0000000..cdf3a64 --- /dev/null +++ b/tests/golden/check-notScanned/config-full-path.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: Unlicense + +traversal: + ignored: [] + +verification: + anchorSimilarityThreshold: 0.5 + externalRefCheckTimeout: 10s + notScanned: + - ./notScanned/inner-directory/bad-reference.md + virtualFiles: [] + ignoreRefs: [] + checkLocalhost: true + ignoreAuthFailures: true + defaultRetryAfter: 30s + maxRetries: 3 + +scanners: + markdown: + flavor: GitHub diff --git a/tests/golden/check-notScanned/config-nested-directories.yaml b/tests/golden/check-notScanned/config-nested-directories.yaml new file mode 100644 index 0000000..b22df2c --- /dev/null +++ b/tests/golden/check-notScanned/config-nested-directories.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: Unlicense + +traversal: + ignored: [] + +verification: + anchorSimilarityThreshold: 0.5 + externalRefCheckTimeout: 10s + notScanned: + - ./**/* + virtualFiles: [] + ignoreRefs: [] + checkLocalhost: true + ignoreAuthFailures: true + defaultRetryAfter: 30s + maxRetries: 3 + +scanners: + markdown: + flavor: GitHub diff --git a/tests/golden/check-notScanned/config-wildcard.yaml b/tests/golden/check-notScanned/config-wildcard.yaml new file mode 100644 index 0000000..e356c51 --- /dev/null +++ b/tests/golden/check-notScanned/config-wildcard.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: Unlicense + +traversal: + ignored: [] + +verification: + anchorSimilarityThreshold: 0.5 + externalRefCheckTimeout: 10s + notScanned: + - ./notScanned/inner-directory/* + virtualFiles: [] + ignoreRefs: [] + checkLocalhost: true + ignoreAuthFailures: true + defaultRetryAfter: 30s + maxRetries: 3 + +scanners: + markdown: + flavor: GitHub diff --git a/tests/golden/check-notScanned/expected.gold b/tests/golden/check-notScanned/expected.gold new file mode 100644 index 0000000..7a37c8e --- /dev/null +++ b/tests/golden/check-notScanned/expected.gold @@ -0,0 +1,13 @@ +=== Invalid references found === + + ➥ In file notScanned/inner-directory/bad-reference.md + bad reference (absolute) at src:7:1-28: + - text: "Bad reference" + - link: /no-file.md + - anchor: - + + ⛀ File does not exist: + ./no-file.md + + +Invalid references dumped, 1 in total. diff --git a/tests/golden/check-notScanned/notScanned/inner-directory/bad-reference.md b/tests/golden/check-notScanned/notScanned/inner-directory/bad-reference.md new file mode 100644 index 0000000..c8af531 --- /dev/null +++ b/tests/golden/check-notScanned/notScanned/inner-directory/bad-reference.md @@ -0,0 +1,7 @@ + + +[Bad reference](/no-file.md)