[#200] Warnings about files that weren't added to git yet

Problem: after 0.2.2 release, xrefcheck cares only about files
that were added to Git. That can be confusing for users (see #200)

Solution:
If a scannable (currently it means markdown) file is not ignored
(by git or via config) and not tracked by git, print a warning to
stderr while scanning repo.

If a link target such file, change error message from "file not exists"
to `Link target is not tracked by Git`

Suggest user to run "git add" before running xrefcheck in both cases.

To do this, I've changed the `RepoInfo` type, so it also contains
information about untracked files now.
This commit is contained in:
Anton Sorokin 2022-11-16 00:48:52 +02:00
parent 2d83165a0b
commit da25917fa6
No known key found for this signature in database
GPG Key ID: 4B53B91ADFBFB649
4 changed files with 170 additions and 44 deletions

View File

@ -122,12 +122,36 @@ makeLenses ''FileInfo
instance Default FileInfo where
def = diffToFileInfo mempty
data ScanPolicy
= OnlyTracked
-- ^ Scan and treat as existing only files tracked by Git.
-- Warn when there are scannable files not added to Git yet.
| IncludeUntracked
-- ^ Also scan and treat as existing
-- files that were neither tracked nor ignored by Git.
deriving stock (Show, Eq)
data FileStatus
= Scanned FileInfo
| NotScannable
-- ^ Files that are not supported by our scanners
| NotAddedToGit
-- ^ We are not scanning files that are not added to git
-- unless --include-untracked CLI option was enabled, but we're
-- gathering information about them to improve reports.
deriving stock (Show)
data DirectoryStatus
= TrackedDirectory
| UntrackedDirectory
deriving stock (Show)
-- | All tracked files and directories.
data RepoInfo = RepoInfo
{ riFiles :: Map FilePath (Maybe FileInfo)
-- ^ Files from the repo with `FileInfo` attached to files that we can scan.
, riDirectories :: Set FilePath
-- ^ Tracked directories.
{ riFiles :: Map FilePath FileStatus
-- ^ Files from the repo with `FileInfo` attached to files that we've scanned.
, riDirectories :: Map FilePath DirectoryStatus
-- ^ Directories containing those files.
} deriving stock (Show)
-----------------------------------------------------------
@ -180,8 +204,9 @@ instance Given ColorMode => Buildable FileInfo where
|]
instance Given ColorMode => Buildable RepoInfo where
build (RepoInfo (nonEmpty . mapMaybe sequence . toPairs -> Just m) _) =
interpolateBlockListF' "" buildFileReport m
build (RepoInfo m _)
| Just scanned <- nonEmpty [(name, info) | (name, Scanned info) <- toPairs m]
= interpolateBlockListF' "" buildFileReport scanned
where
buildFileReport (name, info) =
[int||

View File

@ -14,6 +14,7 @@ module Xrefcheck.Scan
, ScanAction
, FormatsSupport
, RepoInfo (..)
, ReadDirectoryMode(..)
, ScanError (..)
, ScanErrorDescription (..)
, ScanResult (..)
@ -138,18 +139,41 @@ specificFormatsSupport formats = \ext -> M.lookup ext formatsMap
, extension <- extensions
]
-- | Process files that are tracked by git and not ignored by the config.
data ReadDirectoryMode
= RdmTracked
-- ^ Consider files tracked by Git, obtained from "git ls-files"
| RdmUntracked
-- ^ Consider files that are not tracked nor ignored by Git, obtained from
-- "git ls-files --others --exclude-standard"
| RdmBothTrackedAndUtracked
-- ^ Combine output from commands listed above, so we consider all files
-- except ones that are explicitly ignored by Git
-- | Process files that match given @ReadDirectoryMode@ and aren't ignored by the config.
readDirectoryWith
:: forall a. ExclusionConfig
:: forall a. ReadDirectoryMode
-> ExclusionConfig
-> (FilePath -> IO a)
-> FilePath
-> IO [(FilePath, a)]
readDirectoryWith config scanner root =
readDirectoryWith mode config scanner root =
traverse scanFile
. filter (not . isIgnored)
. fmap (location </>)
. L.lines =<< readCreateProcess (shell "git ls-files"){cwd = Just root} ""
. L.lines =<< getFiles
where
getFiles = case mode of
RdmTracked -> getTrackedFiles
RdmUntracked -> getUntrackedFiles
RdmBothTrackedAndUtracked -> liftA2 (<>) getTrackedFiles getUntrackedFiles
getTrackedFiles = readCreateProcess
(shell "git ls-files"){cwd = Just root} ""
getUntrackedFiles = readCreateProcess
(shell "git ls-files --others --exclude-standard"){cwd = Just root} ""
scanFile :: FilePath -> IO (FilePath, a)
scanFile = sequence . (normaliseWithNoTrailing &&& scanner)
@ -172,14 +196,35 @@ scanRepo rw formatsSupport config root = do
when (not $ isDirectory root) $
die $ "Repository's root does not seem to be a directory: " <> root
(errs, fileInfos) <- liftIO
$ (gatherScanErrs &&& gatherFileInfos)
<$> readDirectoryWith config processFile root
(errs, processedFiles) <- liftIO
$ (gatherScanErrs &&& gatherFileStatuses)
<$> readDirectoryWith RdmTracked config processFile root
let dirs = fromList $ foldMap (getDirs . fst) fileInfos
notProcessedFiles <- liftIO $
readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) root
return . ScanResult errs $ RepoInfo (M.fromList fileInfos) dirs
let scannableNotProcessedFiles = filter (isJust . mscanner . fst) notProcessedFiles
whenJust (nonEmpty $ map fst scannableNotProcessedFiles) $ \files -> hPutStrLn @Text stderr
[int|A|
Those files are not added by Git, so we're not scanning them:
#{interpolateBlockListF files}
Please run "git add" before running xrefcheck or enable \
--include-untracked CLI option to check these files.
|]
let trackedDirs = foldMap (getDirs . fst) processedFiles
untrackedDirs = foldMap (getDirs . fst) notProcessedFiles
return . ScanResult errs $ RepoInfo
{ riFiles = M.fromList $ processedFiles <> notProcessedFiles
, riDirectories = M.fromList
$ map (, TrackedDirectory) trackedDirs
<> map (, UntrackedDirectory) untrackedDirs
}
where
mscanner :: FilePath -> Maybe ScanAction
mscanner = formatsSupport . takeExtension
isDirectory :: FilePath -> Bool
isDirectory = readingSystem . doesDirectoryExist
@ -188,20 +233,19 @@ scanRepo rw formatsSupport config root = do
getDirs = scanl (</>) "" . splitDirectories . takeDirectory
gatherScanErrs
:: [(FilePath, Maybe (FileInfo, [ScanError]))]
:: [(FilePath, (FileStatus, [ScanError]))]
-> [ScanError]
gatherScanErrs = fold . mapMaybe (fmap snd . snd)
gatherScanErrs = foldMap (snd . snd)
gatherFileInfos
:: [(FilePath, Maybe (FileInfo, [ScanError]))]
-> [(FilePath, Maybe FileInfo)]
gatherFileInfos = map (second (fmap fst))
gatherFileStatuses
:: [(FilePath, (FileStatus, [ScanError]))]
-> [(FilePath, FileStatus)]
gatherFileStatuses = map (second fst)
processFile :: FilePath -> IO $ Maybe (FileInfo, [ScanError])
processFile file = do
let ext = takeExtension file
let mscanner = formatsSupport ext
forM mscanner ($ file)
processFile :: FilePath -> IO (FileStatus, [ScanError])
processFile file = case mscanner file of
Nothing -> pure (NotScannable, [])
Just scanner -> scanner file <&> _1 %~ Scanned
-----------------------------------------------------------
-- Yaml instances

View File

@ -120,6 +120,7 @@ instance (Given ColorMode, Buildable a) => Buildable (WithReferenceLoc a) where
data VerifyError
= LocalFileDoesNotExist FilePath
| LocalFileOutsideRepo FilePath
| LinkTargetNotAddedToGit FilePath
| AnchorDoesNotExist Text [Anchor]
| AmbiguousAnchorRef FilePath Text (NonEmpty Anchor)
| ExternalResourceInvalidUri URIBS.URIParseError
@ -148,6 +149,14 @@ instance Given ColorMode => Buildable VerifyError where
#{file}
|]
LinkTargetNotAddedToGit file ->
[int||
Link target is not tracked by Git:
#{file}
Please run "git add" before running xrefcheck or enable --include-untracked CLI option.
|]
AnchorDoesNotExist anchor similar -> case nonEmpty similar of
Nothing ->
[int||
@ -339,10 +348,13 @@ verifyRepo
(file, fileInfo) <- M.toList files
guard . not $ matchesGlobPatterns root (ecIgnoreRefsFrom cExclusions) file
case fileInfo of
Just fi -> do
Scanned fi -> do
ref <- _fiReferences fi
return (file, ref)
Nothing -> empty -- no support for such file, can do nothing
NotScannable -> empty -- No support for such file, can do nothing.
NotAddedToGit -> empty -- If this file is scannable, we've notified
-- user that we are scanning only files
-- added to Git while gathering RepoInfo.
progressRef <- newIORef $ initVerifyProgress (map snd toScan)
@ -504,14 +516,15 @@ verifyReference
checkRef mAnchor referredFile = verifying $
unless (isVirtual referredFile) do
checkReferredFileIsInsideRepo referredFile
checkReferredFileExists referredFile
case lookupFilePath referredFile $ M.toList files of
Nothing -> pass -- no support for such file, can do nothing
Just referredFileInfo -> whenJust mAnchor $
mFileStatus <- tryGetFileStatus referredFile
case mFileStatus of
Right (Scanned referredFileInfo) -> whenJust mAnchor $
checkAnchor referredFile (_fiAnchors referredFileInfo)
lookupFilePath :: FilePath -> [(FilePath, Maybe FileInfo)] -> Maybe FileInfo
lookupFilePath fp = snd <=< find (equalFilePath (expandIndirections fp) . fst)
Right NotScannable -> pass -- no support for such file, can do nothing
Right NotAddedToGit -> throwError (LinkTargetNotAddedToGit referredFile)
Left UntrackedDirectory -> throwError (LinkTargetNotAddedToGit referredFile)
Left TrackedDirectory -> pass -- path leads to directory, currently
-- if such link contain anchor, we ignore it
-- expands ".." and "."
-- expandIndirections "a/b/../c" = "a/c"
@ -545,18 +558,21 @@ verifyReference
nestingChange "." = 0
nestingChange _ = 1
checkReferredFileExists file = do
unless (fileExists || dirExists) $
throwError (LocalFileDoesNotExist file)
-- Returns `Nothing` when path corresponds to an existing (and tracked) directory
tryGetFileStatus :: FilePath -> ExceptT VerifyError IO (Either DirectoryStatus FileStatus)
tryGetFileStatus file
| Just f <- mFile = return $ Right f
| Just d <- mDir = return $ Left d
| otherwise = throwError (LocalFileDoesNotExist file)
where
matchesFilePath :: FilePath -> Bool
matchesFilePath = equalFilePath $ expandIndirections file
fileExists :: Bool
fileExists = any matchesFilePath $ M.keys files
mFile :: Maybe FileStatus
mFile = (files M.!) <$> find matchesFilePath (M.keys files)
dirExists :: Bool
dirExists = any matchesFilePath dirs
mDir :: Maybe DirectoryStatus
mDir = (dirs M.!) <$> find matchesFilePath (M.keys dirs)
checkAnchor file fileAnchors anchor = do
checkAnchorReferenceAmbiguity file fileAnchors anchor

View File

@ -17,7 +17,7 @@ load '../helpers'
assert_output --partial "fatal: not a git repository"
}
@test "Git: file not tracked" {
@test "Git: bad file not tracked" {
cd $TEST_TEMP_DIR
git init
@ -26,10 +26,19 @@ load '../helpers'
run xrefcheck
assert_success
assert_output --partial "All repository links are valid."
# this is printed to stderr
assert_output --partial - <<EOF
Those files are not added by Git, so we're not scanning them:
- git.md
Please run "git add" before running xrefcheck or enable --include-untracked CLI option to check these files.
EOF
}
@test "Git: file tracked, check failure" {
@test "Git: bad file tracked, check failure" {
cd $TEST_TEMP_DIR
git init
@ -55,3 +64,35 @@ load '../helpers'
Invalid references dumped, 1 in total.
EOF
}
@test "Git: link to untracked file, check failure" {
cd $TEST_TEMP_DIR
git init
echo "[a](./a.md)" >> "git.md"
touch ./a.md
git add git.md
to_temp xrefcheck
assert_diff - <<EOF
=== Invalid references found ===
➥ In file git.md
bad reference (relative) at src:1:1-11:
- text: "a"
- link: ./a.md
- anchor: -
⛀ Link target is not tracked by Git:
a.md
Please run "git add" before running xrefcheck or enable --include-untracked CLI option.
Invalid references dumped, 1 in total.
EOF
}