From 0886062500de95ab4353ff4545497d27fec3b93c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Enr=C3=ADquez?= Date: Tue, 20 Dec 2022 16:54:58 +0100 Subject: [PATCH] [#197] Canonicalize filepaths Problem: the current usage of filepaths is error-prone and can be simplified. Solution: canonicalize filepaths at the boundaries, so their management will be safer and will simplify the codebase. --- ftp-tests/Test/Xrefcheck/FtpLinks.hs | 1 - package.yaml | 1 - src/Xrefcheck/CLI.hs | 4 +- src/Xrefcheck/Command.hs | 10 +- src/Xrefcheck/Config.hs | 15 +- src/Xrefcheck/Core.hs | 162 +++++++++-------- src/Xrefcheck/Scan.hs | 151 +++++++++------- src/Xrefcheck/Scanners/Markdown.hs | 70 ++++---- src/Xrefcheck/System.hs | 157 +++++++++++++--- src/Xrefcheck/Util.hs | 5 - src/Xrefcheck/Verify.hs | 170 ++++++++---------- tests/Test/Xrefcheck/CanonicalPathSpec.hs | 63 +++++++ tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs | 12 +- tests/Test/Xrefcheck/IgnoreRegexSpec.hs | 2 +- tests/Test/Xrefcheck/LocalSpec.hs | 25 --- tests/Test/Xrefcheck/RedirectRequestsSpec.hs | 2 +- tests/Test/Xrefcheck/TooManyRequestsSpec.hs | 6 +- tests/Test/Xrefcheck/TrailingSlashSpec.hs | 9 +- tests/Test/Xrefcheck/Util.hs | 9 +- tests/Test/Xrefcheck/UtilRequests.hs | 8 +- tests/golden/check-anchors/check-anchors.bats | 14 +- tests/golden/check-cli/check-cli.bats | 6 +- tests/golden/check-footnotes/expected.gold | 4 +- tests/golden/check-ignore/check-ignore.bats | 2 +- .../check-ignoreLocalRefsTo/expected.gold | 12 +- .../check-ignoreRefsFrom.bats | 2 +- tests/golden/check-local-refs/expected1.gold | 20 +-- tests/golden/check-local-refs/expected2.gold | 60 +++---- tests/golden/check-local-refs/expected3.gold | 44 ++--- tests/golden/check-scan-errors/expected.gold | 4 +- 30 files changed, 594 insertions(+), 456 deletions(-) create mode 100644 tests/Test/Xrefcheck/CanonicalPathSpec.hs delete mode 100644 tests/Test/Xrefcheck/LocalSpec.hs diff --git a/ftp-tests/Test/Xrefcheck/FtpLinks.hs b/ftp-tests/Test/Xrefcheck/FtpLinks.hs index 5830630..717be14 100644 --- a/ftp-tests/Test/Xrefcheck/FtpLinks.hs +++ b/ftp-tests/Test/Xrefcheck/FtpLinks.hs @@ -41,7 +41,6 @@ instance IsOption FtpHostOpt where <> help (untag (optionHelp :: Tagged FtpHostOpt String)) ) - config :: Config config = defConfig GitHub & cExclusionsL . ecIgnoreExternalRefsToL .~ [] diff --git a/package.yaml b/package.yaml index d0adcd0..b497b20 100644 --- a/package.yaml +++ b/package.yaml @@ -87,7 +87,6 @@ library: - bytestring - containers - cmark-gfm >= 0.2.5 - - data-default - directory - dlist - filepath diff --git a/src/Xrefcheck/CLI.hs b/src/Xrefcheck/CLI.hs index 1aa809d..b06992e 100644 --- a/src/Xrefcheck/CLI.hs +++ b/src/Xrefcheck/CLI.hs @@ -39,7 +39,7 @@ import Xrefcheck.Config (NetworkingConfig, NetworkingConfig' (..)) import Xrefcheck.Core import Xrefcheck.Scan import Xrefcheck.System (RelGlobPattern, mkGlobPattern) -import Xrefcheck.Util (ColorMode (WithColors, WithoutColors), normaliseWithNoTrailing) +import Xrefcheck.Util (ColorMode (WithColors, WithoutColors)) modeReadM :: ReadM VerifyMode modeReadM = eitherReader $ \s -> @@ -118,7 +118,7 @@ defaultConfigPaths = ["./xrefcheck.yaml", "./.xrefcheck.yaml"] type RepoType = Flavor filepathOption :: Mod OptionFields FilePath -> Parser FilePath -filepathOption = fmap normaliseWithNoTrailing <$> strOption +filepathOption = strOption globOption :: Mod OptionFields RelGlobPattern -> Parser RelGlobPattern globOption = option $ eitherReader $ mkGlobPattern diff --git a/src/Xrefcheck/Command.hs b/src/Xrefcheck/Command.hs index 16620a8..3fb6467 100644 --- a/src/Xrefcheck/Command.hs +++ b/src/Xrefcheck/Command.hs @@ -18,8 +18,7 @@ import Text.Interpolation.Nyan import Xrefcheck.CLI (Options (..), addExclusionOptions, addNetworkingOptions, defaultConfigPaths) import Xrefcheck.Config - (Config, Config' (..), ScannersConfig, ScannersConfig' (..), defConfig, normaliseConfigFilePaths, - overrideConfig) + (Config, Config' (..), ScannersConfig, ScannersConfig' (..), defConfig, overrideConfig) import Xrefcheck.Core (Flavor (..)) import Xrefcheck.Progress (allowRewrite) import Xrefcheck.Scan @@ -31,7 +30,7 @@ import Xrefcheck.Util import Xrefcheck.Verify (reportVerifyErrs, verifyErrors, verifyRepo) readConfig :: FilePath -> IO Config -readConfig path = fmap (normaliseConfigFilePaths . overrideConfig) do +readConfig path = fmap overrideConfig do decodeFileEither path >>= either (error . toText . prettyPrintParseException) pure @@ -70,7 +69,8 @@ defaultAction Options{..} = do (ScanResult scanErrs repoInfo) <- allowRewrite showProgressBar $ \rw -> do let fullConfig = addExclusionOptions (cExclusions config) oExclusionOptions - scanRepo oScanPolicy rw (formats $ cScanners config) fullConfig oRoot + formatsSupport = formats $ cScanners config + scanRepo oScanPolicy rw formatsSupport fullConfig oRoot when oVerbose $ fmt [int|| @@ -84,7 +84,7 @@ defaultAction Options{..} = do verifyRes <- allowRewrite showProgressBar $ \rw -> do let fullConfig = config { cNetworking = addNetworkingOptions (cNetworking config) oNetworkingOptions } - verifyRepo rw fullConfig oMode oRoot repoInfo + verifyRepo rw fullConfig oMode repoInfo case verifyErrors verifyRes of Nothing | null scanErrs -> fmtLn "All repository links are valid." diff --git a/src/Xrefcheck/Config.hs b/src/Xrefcheck/Config.hs index 1496167..45bdde7 100644 --- a/src/Xrefcheck/Config.hs +++ b/src/Xrefcheck/Config.hs @@ -10,7 +10,6 @@ module Xrefcheck.Config , defConfigText ) where - import Universum import Control.Lens (makeLensesWith) @@ -38,13 +37,6 @@ data Config' f = Config , cScanners :: ScannersConfig' f } deriving stock (Generic) -normaliseConfigFilePaths :: Config -> Config -normaliseConfigFilePaths Config{..} - = Config - { cExclusions = normaliseExclusionConfigFilePaths cExclusions - , .. - } - -- | Type alias for NetworkingConfig' with all required fields. type NetworkingConfig = NetworkingConfig' Identity @@ -79,9 +71,10 @@ makeLensesWith postfixFields ''Config' makeLensesWith postfixFields ''NetworkingConfig' defConfig :: HasCallStack => Flavor -> Config -defConfig flavor = normaliseConfigFilePaths $ - either (error . toText . prettyPrintParseException) id $ - decodeEither' $ encodeUtf8 $ defConfigText flavor +defConfig = either (error . toText . prettyPrintParseException) id + . decodeEither' + . encodeUtf8 + . defConfigText -- | Override missed fields with default values. overrideConfig :: ConfigOptional -> Config diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 5a9d58c..d3fd0ab 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -15,10 +15,10 @@ import Control.Lens (makeLenses) import Data.Aeson (FromJSON (..), withText) import Data.Char (isAlphaNum) import Data.Char qualified as C -import Data.Default (Default (..)) import Data.DList (DList) import Data.DList qualified as DList import Data.List qualified as L +import Data.Map qualified as M import Data.Reflection (Given) import Data.Text qualified as T import Fmt (Buildable (..), Builder) @@ -27,6 +27,7 @@ import Text.Interpolation.Nyan import Time (Second, Time) import Xrefcheck.Progress +import Xrefcheck.System import Xrefcheck.Util ----------------------------------------------------------- @@ -77,8 +78,60 @@ data Reference = Reference , rAnchor :: Maybe Text -- ^ Section or custom anchor tag. , rPos :: Position + -- ^ Position in source file. + , rInfo :: ReferenceInfo + -- ^ More info about the link. } deriving stock (Show, Generic) +-- | Info about the reference. +data ReferenceInfo + = RIExternal + -- ^ Reference to a file at outer site, e.g @[d](http://www.google.com/doodles)@ + | RIOtherProtocol + -- ^ Entry not to be processed, e.g. @mailto:e-mail@ + | RIFileLocal + -- ^ Reference to this file, e.g. @[a](#header)@ + | RIFileAbsolute + -- ^ Reference to a file absolute to the root, e.g. @[c](/folder/file#header)@ + | RIFileRelative + -- ^ Reference to a file relative to given one, e.g. @[b](folder/file#header)@ + deriving stock (Show, Generic) + +pattern PathSep :: Char +pattern PathSep <- (isPathSeparator -> True) + +-- | Compute the 'ReferenceInfo' corresponding to a given link. +referenceInfo :: Text -> ReferenceInfo +referenceInfo link = case toString link of + [] -> RIFileLocal + PathSep : _ -> RIFileAbsolute + '.' : PathSep : _ -> RIFileRelative + '.' : '.' : PathSep : _ -> RIFileRelative + _ | hasUrlProtocol -> RIExternal + | hasProtocol -> RIOtherProtocol + | otherwise -> RIFileRelative + where + hasUrlProtocol = "://" `T.isInfixOf` T.take 10 link + hasProtocol = ":" `T.isInfixOf` T.take 10 link + +-- | Whether this is a link to external resource. +isExternal :: ReferenceInfo -> Bool +isExternal = \case + RIFileLocal -> False + RIFileRelative -> False + RIFileAbsolute -> False + RIExternal -> True + RIOtherProtocol -> False + +-- | Whether this is a link to repo-local resource. +isLocal :: ReferenceInfo -> Bool +isLocal = \case + RIFileLocal -> True + RIFileRelative -> True + RIFileAbsolute -> True + RIExternal -> False + RIOtherProtocol -> False + -- | Context of anchor. data AnchorType = HeaderAnchor Int @@ -119,9 +172,6 @@ data FileInfo = FileInfo } deriving stock (Show, Generic) makeLenses ''FileInfo -instance Default FileInfo where - def = diffToFileInfo mempty - data ScanPolicy = OnlyTracked -- ^ Scan and treat as existing only files tracked by Git. @@ -148,11 +198,23 @@ data DirectoryStatus -- | All tracked files and directories. data RepoInfo = RepoInfo - { riFiles :: Map FilePath FileStatus - -- ^ Files from the repo with `FileInfo` attached to files that we've scanned. - , riDirectories :: Map FilePath DirectoryStatus - -- ^ Directories containing those files. - } deriving stock (Show) + { riFiles :: Map CanonicalPath FileStatus + -- ^ Files from the repo with `FileInfo` attached to files that we've scanned. + , riDirectories :: Map CanonicalPath DirectoryStatus + -- ^ Directories containing those files. + , riRoot :: CanonicalPath + -- ^ Repository root. + } + +-- Search for a file in the repository. +lookupFile :: CanonicalPath -> RepoInfo -> Maybe FileStatus +lookupFile path RepoInfo{..} = + M.lookup path riFiles + +-- Search for a directory in the repository. +lookupDirectory :: CanonicalPath -> RepoInfo -> Maybe DirectoryStatus +lookupDirectory path RepoInfo{..} = + M.lookup path riDirectories ----------------------------------------------------------- -- Instances @@ -160,6 +222,7 @@ data RepoInfo = RepoInfo instance NFData Position instance NFData Reference +instance NFData ReferenceInfo instance NFData AnchorType instance NFData Anchor instance NFData FileInfo @@ -167,12 +230,20 @@ instance NFData FileInfo instance Given ColorMode => Buildable Reference where build Reference{..} = [int|| - reference #{paren . build $ locationType rLink} #{rPos}: + reference #{paren . build $ rInfo} #{rPos}: - text: #s{rName} - link: #{if null rLink then "-" else rLink} - anchor: #{rAnchor ?: styleIfNeeded Faint "-"} |] +instance Given ColorMode => Buildable ReferenceInfo where + build = \case + RIFileLocal -> colorIfNeeded Green "file-local" + RIFileRelative -> colorIfNeeded Yellow "relative" + RIFileAbsolute -> colorIfNeeded Blue "absolute" + RIExternal -> colorIfNeeded Red "external" + RIOtherProtocol -> "" + instance Given ColorMode => Buildable AnchorType where build = styleIfNeeded Faint . \case HeaderAnchor l -> colorIfNeeded Green ("header " <> headerLevelToRoman l) @@ -204,14 +275,14 @@ instance Given ColorMode => Buildable FileInfo where |] instance Given ColorMode => Buildable RepoInfo where - build (RepoInfo m _) - | Just scanned <- nonEmpty [(name, info) | (name, Scanned info) <- toPairs m] + build RepoInfo{..} + | Just scanned <- nonEmpty [(name, info) | (name, Scanned info) <- toPairs riFiles] = interpolateUnlinesF $ buildFileReport <$> scanned where - buildFileReport :: ([Char], FileInfo) -> Builder + buildFileReport :: (CanonicalPath, FileInfo) -> Builder buildFileReport (name, info) = [int|| - #{ colorIfNeeded Cyan $ name }: + #{ colorIfNeeded Cyan $ getPosixRelativeOrAbsoluteChild riRoot name }: #{ interpolateIndentF 2 $ build info } |] build _ = "No scannable files found." @@ -220,60 +291,6 @@ instance Given ColorMode => Buildable RepoInfo where -- Analysing ----------------------------------------------------------- -pattern PathSep :: Char -pattern PathSep <- (isPathSeparator -> True) - --- | Type of reference. -data LocationType - = FileLocalLoc - -- ^ Reference to this file, e.g. @[a](#header)@ - | RelativeLoc - -- ^ Reference to a file relative to given one, e.g. @[b](folder/file#header)@ - | AbsoluteLoc - -- ^ Reference to a file relative to the root, e.g. @[c](/folder/file#header)@ - | ExternalLoc - -- ^ Reference to a file at outer site, e.g @[d](http://www.google.com/doodles)@ - | OtherLoc - -- ^ Entry not to be processed, e.g. @mailto:e-mail@ - deriving stock (Eq, Show) - -instance Given ColorMode => Buildable LocationType where - build = \case - FileLocalLoc -> colorIfNeeded Green "file-local" - RelativeLoc -> colorIfNeeded Yellow "relative" - AbsoluteLoc -> colorIfNeeded Blue "absolute" - ExternalLoc -> colorIfNeeded Red "external" - OtherLoc -> "" - --- | Whether this is a link to external resource. -isExternal :: LocationType -> Bool -isExternal = \case - ExternalLoc -> True - _ -> False - --- | Whether this is a link to repo-local resource. -isLocal :: LocationType -> Bool -isLocal = \case - FileLocalLoc -> True - RelativeLoc -> True - AbsoluteLoc -> True - ExternalLoc -> False - OtherLoc -> False - --- | Get type of reference. -locationType :: Text -> LocationType -locationType location = case toString location of - [] -> FileLocalLoc - PathSep : _ -> AbsoluteLoc - '.' : PathSep : _ -> RelativeLoc - '.' : '.' : PathSep : _ -> RelativeLoc - _ | hasUrlProtocol -> ExternalLoc - | hasProtocol -> OtherLoc - | otherwise -> RelativeLoc - where - hasUrlProtocol = "://" `T.isInfixOf` T.take 10 location - hasProtocol = ":" `T.isInfixOf` T.take 10 location - -- | Which parts of verification do we perform. data VerifyMode = LocalOnlyMode @@ -335,13 +352,6 @@ stripAnchorDupNo t = do guard (length strippedNo < length t) T.stripSuffix "-" strippedNo --- | Strip './' prefix from local references. -canonizeLocalRef :: Text -> Text -canonizeLocalRef ref = - maybe ref canonizeLocalRef (T.stripPrefix localPrefix ref) - where - localPrefix = "./" - ----------------------------------------------------------- -- Visualisation ----------------------------------------------------------- @@ -357,7 +367,7 @@ initVerifyProgress references = VerifyProgress , vrExternal = initProgress (length (ordNub $ map rLink extRefs)) } where - (extRefs, localRefs) = L.partition (isExternal . locationType . rLink) references + (extRefs, localRefs) = L.partition (isExternal . rInfo) references showAnalyseProgress :: Given ColorMode => VerifyMode -> Time Second -> VerifyProgress -> Text showAnalyseProgress mode posixTime VerifyProgress{..} = diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index da312a0..5e15f31 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -13,13 +13,14 @@ module Xrefcheck.Scan , Extension , ScanAction , FormatsSupport - , RepoInfo (..) , ReadDirectoryMode(..) , ScanError (..) , ScanErrorDescription (..) , ScanResult (..) + , ScanStage (..) - , normaliseExclusionConfigFilePaths + , mkParseScanError + , mkGatherScanError , scanRepo , specificFormatsSupport , ecIgnoreL @@ -38,8 +39,6 @@ import Data.Map qualified as M import Data.Reflection (Given) import Fmt (Buildable (..), fmt) import System.Directory (doesDirectoryExist) -import System.FilePath.Posix - (dropTrailingPathSeparator, equalFilePath, splitDirectories, takeDirectory, takeExtension, ()) import System.Process (cwd, readCreateProcess, shell) import Text.Interpolation.Nyan import Text.Regex.TDFA.Common (CompOption (..), ExecOption (..), Regex) @@ -47,7 +46,7 @@ import Text.Regex.TDFA.Text qualified as R import Xrefcheck.Core import Xrefcheck.Progress -import Xrefcheck.System (RelGlobPattern, matchesGlobPatterns, normaliseGlobPattern, readingSystem) +import Xrefcheck.System import Xrefcheck.Util -- | Type alias for ExclusionConfig' with all required fields. @@ -67,35 +66,58 @@ data ExclusionConfig' f = ExclusionConfig makeLensesWith postfixFields ''ExclusionConfig' -normaliseExclusionConfigFilePaths :: ExclusionConfig -> ExclusionConfig -normaliseExclusionConfigFilePaths ec@ExclusionConfig{..} - = ec - { ecIgnore = map normaliseGlobPattern ecIgnore - , ecIgnoreLocalRefsTo = map normaliseGlobPattern ecIgnoreLocalRefsTo - , ecIgnoreRefsFrom = map normaliseGlobPattern ecIgnoreRefsFrom - } - -- | File extension, dot included. type Extension = String -- | Way to parse a file. -type ScanAction = FilePath -> IO (FileInfo, [ScanError]) +type ScanAction = CanonicalPath -> IO (FileInfo, [ScanError 'Parse]) -- | All supported ways to parse a file. type FormatsSupport = Extension -> Maybe ScanAction data ScanResult = ScanResult - { srScanErrors :: [ScanError] + { srScanErrors :: [ScanError 'Gather] , srRepoInfo :: RepoInfo - } deriving stock (Show) + } -data ScanError = ScanError - { sePosition :: Position - , seFile :: FilePath +-- | A scan error indexed by different process stages. +-- +-- Within 'Parse', 'seFile' has no information because the same +-- file is being parsed. +-- +-- Within 'Gather', 'seFile' stores the 'FilePath' corresponding +-- to the file in where the error was found. +data ScanError (a :: ScanStage) = ScanError + { seFile :: ScanStageFile a + , sePosition :: Position , seDescription :: ScanErrorDescription - } deriving stock (Show, Eq) + } -instance Given ColorMode => Buildable ScanError where +data ScanStage = Parse | Gather + +type family ScanStageFile (a :: ScanStage) where + ScanStageFile 'Parse = () + ScanStageFile 'Gather = FilePath + +deriving stock instance Show (ScanError 'Parse) +deriving stock instance Show (ScanError 'Gather) +deriving stock instance Eq (ScanError 'Parse) +deriving stock instance Eq (ScanError 'Gather) + +-- | Make a 'ScanError' for the 'Parse' stage. +mkParseScanError :: Position -> ScanErrorDescription -> ScanError 'Parse +mkParseScanError = ScanError () + +-- | Promote a 'ScanError' from the 'Parse' stage +-- to the 'Gather' stage. +mkGatherScanError :: FilePath -> ScanError 'Parse -> ScanError 'Gather +mkGatherScanError seFile ScanError{sePosition, seDescription} = ScanError + { seFile + , sePosition + , seDescription + } + +instance Given ColorMode => Buildable (ScanError 'Gather) where build ScanError{..} = [int|| In file #{styleIfNeeded Faint (styleIfNeeded Bold seFile)} scan error #{sePosition}: @@ -104,7 +126,7 @@ instance Given ColorMode => Buildable ScanError where |] -reportScanErrs :: Given ColorMode => NonEmpty ScanError -> IO () +reportScanErrs :: Given ColorMode => NonEmpty (ScanError 'Gather) -> IO () reportScanErrs errs = fmt [int|| === Scan errors found === @@ -153,14 +175,13 @@ data ReadDirectoryMode readDirectoryWith :: forall a. ReadDirectoryMode -> ExclusionConfig - -> (FilePath -> IO a) - -> FilePath - -> IO [(FilePath, a)] -readDirectoryWith mode config scanner root = - traverse scanFile - . filter (not . isIgnored) - . fmap (location ) - . L.lines =<< getFiles + -> (CanonicalPath -> IO a) + -> CanonicalPath + -> IO [(CanonicalPath, a)] +readDirectoryWith mode config scanner root = do + relativeFiles <- L.lines <$> getFiles + canonicalFiles <- mapM (root liftA2 (<>) getTrackedFiles getUntrackedFiles getTrackedFiles = readCreateProcess - (shell "git ls-files"){cwd = Just root} "" + (shell "git ls-files"){cwd = Just $ unCanonicalPath root} "" getUntrackedFiles = readCreateProcess - (shell "git ls-files --others --exclude-standard"){cwd = Just root} "" + (shell "git ls-files --others --exclude-standard"){cwd = Just $ unCanonicalPath root} "" - scanFile :: FilePath -> IO (FilePath, a) - scanFile = sequence . (normaliseWithNoTrailing &&& scanner) + scanFile :: CanonicalPath -> IO (CanonicalPath, a) + scanFile c = (c,) <$> scanner c - isIgnored :: FilePath -> Bool + isIgnored :: CanonicalPath -> Bool isIgnored = matchesGlobPatterns root $ ecIgnore config - -- Strip leading "." and trailing "/" - location :: FilePath - location = - if root `equalFilePath` "." - then "" - else dropTrailingPathSeparator root - scanRepo :: MonadIO m => ScanPolicy -> Rewrite -> FormatsSupport -> ExclusionConfig -> FilePath -> m ScanResult scanRepo scanMode rw formatsSupport config root = do putTextRewrite rw "Scanning repository..." - when (not $ isDirectory root) $ + liftIO $ whenM (not <$> doesDirectoryExist root) $ die $ "Repository's root does not seem to be a directory: " <> root + canonicalRoot <- liftIO $ canonicalizePath root + (errs, processedFiles) <- let mode = case scanMode of OnlyTracked -> RdmTracked IncludeUntracked -> RdmBothTrackedAndUtracked - in liftIO - $ (gatherScanErrs &&& gatherFileStatuses) - <$> readDirectoryWith mode config processFile root + in liftIO $ (gatherScanErrs canonicalRoot &&& gatherFileStatuses) + <$> readDirectoryWith mode config processFile canonicalRoot notProcessedFiles <- case scanMode of OnlyTracked -> liftIO $ - readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) root + readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) canonicalRoot IncludeUntracked -> pure [] let scannableNotProcessedFiles = filter (isJust . mscanner . fst) notProcessedFiles @@ -214,44 +229,42 @@ scanRepo scanMode rw formatsSupport config root = do whenJust (nonEmpty $ map fst scannableNotProcessedFiles) $ \files -> hPutStrLn @Text stderr [int|A| Those files are not added by Git, so we're not scanning them: - #{interpolateBlockListF files} + #{interpolateBlockListF $ getPosixRelativeOrAbsoluteChild canonicalRoot <$> files} Please run "git add" before running xrefcheck or enable \ --include-untracked CLI option to check these files. |] - let trackedDirs = foldMap (getDirs . fst) processedFiles - untrackedDirs = foldMap (getDirs . fst) notProcessedFiles + let trackedDirs = foldMap (getDirsBetweenRootAndFile canonicalRoot . fst) processedFiles + untrackedDirs = foldMap (getDirsBetweenRootAndFile canonicalRoot . fst) notProcessedFiles + return . ScanResult errs $ RepoInfo { riFiles = M.fromList $ processedFiles <> notProcessedFiles - , riDirectories = M.fromList - $ map (, TrackedDirectory) trackedDirs - <> map (, UntrackedDirectory) untrackedDirs + , riDirectories = M.fromList $ (fmap (, TrackedDirectory) trackedDirs + <> fmap (, UntrackedDirectory) untrackedDirs) + , riRoot = canonicalRoot } where - mscanner :: FilePath -> Maybe ScanAction + mscanner :: CanonicalPath -> Maybe ScanAction mscanner = formatsSupport . takeExtension - isDirectory :: FilePath -> Bool - isDirectory = readingSystem . doesDirectoryExist - - -- Get all directories from filepath. - getDirs :: FilePath -> [FilePath] - getDirs = scanl () "" . splitDirectories . takeDirectory - gatherScanErrs - :: [(FilePath, (FileStatus, [ScanError]))] - -> [ScanError] - gatherScanErrs = foldMap (snd . snd) + :: CanonicalPath + -> [(CanonicalPath, (FileStatus, [ScanError 'Parse]))] + -> [ScanError 'Gather] + gatherScanErrs canonicalRoot = foldMap $ \(file, (_, errs)) -> + mkGatherScanError (showFilepath file) <$> errs + where + showFilepath = getPosixRelativeOrAbsoluteChild canonicalRoot gatherFileStatuses - :: [(FilePath, (FileStatus, [ScanError]))] - -> [(FilePath, FileStatus)] + :: [(CanonicalPath, (FileStatus, [ScanError 'Parse]))] + -> [(CanonicalPath, FileStatus)] gatherFileStatuses = map (second fst) - processFile :: FilePath -> IO (FileStatus, [ScanError]) - processFile file = case mscanner file of + processFile :: CanonicalPath -> IO (FileStatus, [ScanError 'Parse]) + processFile canonicalFile = case mscanner canonicalFile of Nothing -> pure (NotScannable, []) - Just scanner -> scanner file <&> _1 %~ Scanned + Just scanner -> scanner canonicalFile <&> _1 %~ Scanned ----------------------------------------------------------- -- Yaml instances diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index f687616..bcd6d40 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -26,7 +26,6 @@ import Control.Monad.Trans.Writer.CPS (Writer, runWriter, tell) import Data.Aeson (FromJSON (..), genericParseJSON) import Data.ByteString.Lazy qualified as BSL import Data.DList qualified as DList -import Data.Default (def) import Data.Text qualified as T import Data.Text.Lazy qualified as LT import Fmt (Buildable (..), nameF) @@ -35,6 +34,7 @@ import Text.Interpolation.Nyan import Xrefcheck.Core import Xrefcheck.Scan +import Xrefcheck.System import Xrefcheck.Util data MarkdownConfig = MarkdownConfig @@ -124,8 +124,6 @@ data GetIgnoreMode | InvalidMode Text deriving stock (Eq) - - data ScannerState = ScannerState { _ssIgnore :: Maybe Ignore , _ssParentNodeType :: Maybe NodeType @@ -139,7 +137,7 @@ initialScannerState = ScannerState , _ssParentNodeType = Nothing } -type ScannerM a = StateT ScannerState (Writer [ScanError]) a +type ScannerM a = StateT ScannerState (Writer [ScanError 'Parse]) a -- | A fold over a `Node`. cataNode :: (Maybe PosInfo -> NodeType -> [c] -> c) -> Node -> c @@ -156,9 +154,9 @@ cataNodeWithParentNodeInfo f node = cataNode f' node map (ssParentNodeType .= Just ty >>) childScanners -- | Find ignore annotations (ignore paragraph and ignore link) --- and remove nodes that should be ignored -removeIgnored :: FilePath -> Node -> Writer [ScanError] Node -removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove +-- and remove nodes that should be ignored. +removeIgnored :: Node -> Writer [ScanError 'Parse] Node +removeIgnored = withIgnoreMode . cataNodeWithParentNodeInfo remove where remove :: Maybe PosInfo @@ -178,7 +176,7 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove -- found we should report an error. (IMSParagraph, PARAGRAPH) -> (ssIgnore .= Nothing) $> defNode (IMSParagraph, x) -> do - lift . tell . makeError modePos fp . ParagraphErr $ prettyType x + lift . tell . makeError modePos . ParagraphErr $ prettyType x ssIgnore .= Nothing Node pos ty <$> sequence subs @@ -187,7 +185,7 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove -- the file should already be ignored when `checkIgnoreFile` is called. -- We should report an error if we find it anyway. (IMSAll, _) -> do - lift . tell $ makeError modePos fp FileErr + lift . tell $ makeError modePos FileErr ssIgnore .= Nothing Node pos ty <$> sequence subs @@ -205,14 +203,14 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove currentIgnore <- use ssIgnore case currentIgnore of Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do - lift $ tell $ makeError modePos fp LinkErr + lift $ tell $ makeError modePos LinkErr ssIgnore .= Nothing _ -> pass return node' when (ty == PARAGRAPH) $ use ssIgnore >>= \case Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> - lift $ tell $ makeError pragmaPos fp LinkErr + lift $ tell $ makeError pragmaPos LinkErr _ -> pass return scan @@ -236,7 +234,7 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove (ssIgnore .= Just (Ignore ignoreModeState correctPos)) $> defNode InvalidMode msg -> do - lift . tell $ makeError correctPos fp $ UnrecognisedErr msg + lift . tell $ makeError correctPos $ UnrecognisedErr msg (ssIgnore .= Nothing) $> defNode NotAnAnnotation -> Node pos nodeType <$> sequence subs where @@ -249,20 +247,20 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove withIgnoreMode :: ScannerM Node - -> Writer [ScanError] Node + -> Writer [ScanError 'Parse] Node withIgnoreMode action = action `runStateT` initialScannerState >>= \case -- We expect `Ignore` state to be `Nothing` when we reach EOF, -- otherwise that means there was an annotation that didn't match -- any node, so we have to report that. (node, ScannerState {_ssIgnore = Just (Ignore mode pos)}) -> case mode of IMSParagraph -> do - tell . makeError pos fp $ ParagraphErr "EOF" + tell . makeError pos $ ParagraphErr "EOF" pure node IMSLink _ -> do - tell $ makeError pos fp LinkErr + tell $ makeError pos LinkErr pure node IMSAll -> do - tell $ makeError pos fp FileErr + tell $ makeError pos FileErr pure node (node, _) -> pure node @@ -273,17 +271,14 @@ foldNode action node@(Node _ _ subs) = do b <- concatForM subs (foldNode action) return (a <> b) -type ExtractorM a = ReaderT MarkdownConfig (Writer [ScanError]) a +type ExtractorM a = ReaderT MarkdownConfig (Writer [ScanError 'Parse]) a -- | Extract information from source tree. -nodeExtractInfo - :: FilePath - -> Node - -> ExtractorM FileInfo -nodeExtractInfo fp input@(Node _ _ nSubs) = do +nodeExtractInfo :: Node -> ExtractorM FileInfo +nodeExtractInfo input@(Node _ _ nSubs) = do if checkIgnoreAllFile nSubs - then return def - else diffToFileInfo <$> (foldNode extractor =<< lift (removeIgnored fp input)) + then return (diffToFileInfo mempty) + else diffToFileInfo <$> (foldNode extractor =<< lift (removeIgnored input)) where extractor :: Node -> ExtractorM FileInfoDiff @@ -332,12 +327,16 @@ nodeExtractInfo fp input@(Node _ _ nSubs) = do let rName = nodeExtractText node rPos = toPosition pos link = if null url then rName else url + let (rLink, rAnchor) = case T.splitOn "#" link of - [t] -> (t, Nothing) - t : ts -> (t, Just $ T.intercalate "#" ts) - [] -> error "impossible" + [t] -> (t, Nothing) + t : ts -> (t, Just $ T.intercalate "#" ts) + [] -> error "impossible" + + let rInfo = referenceInfo rLink + return $ FileInfoDiff - (DList.singleton $ Reference {rName, rPos, rLink, rAnchor}) + (DList.singleton $ Reference {rName, rPos, rLink, rAnchor, rInfo}) DList.empty -- | Check if there is `ignore all` at the beginning of the file, @@ -361,10 +360,9 @@ defNode = Node Nothing DOCUMENT [] -- hard-coded default Node makeError :: Maybe PosInfo - -> FilePath -> ScanErrorDescription - -> [ScanError] -makeError pos fp errDescription = one $ ScanError (toPosition pos) fp errDescription + -> [ScanError 'Parse] +makeError pos errDescription = one $ mkParseScanError (toPosition pos) errDescription getCommentContent :: Node -> Maybe Text getCommentContent node = do @@ -406,16 +404,18 @@ textToMode ("ignore" : [x]) | otherwise = InvalidMode x textToMode _ = NotAnAnnotation -parseFileInfo :: MarkdownConfig -> FilePath -> LT.Text -> (FileInfo, [ScanError]) -parseFileInfo config fp input +parseFileInfo :: MarkdownConfig -> LT.Text -> (FileInfo, [ScanError 'Parse]) +parseFileInfo config input = runWriter $ flip runReaderT config - $ nodeExtractInfo fp + $ nodeExtractInfo $ commonmarkToNode [optFootnotes] [extAutolink] $ toStrict input markdownScanner :: MarkdownConfig -> ScanAction -markdownScanner config path = parseFileInfo config path . decodeUtf8 <$> BSL.readFile path +markdownScanner config canonicalFile = + parseFileInfo config . decodeUtf8 + <$> BSL.readFile (unCanonicalPath canonicalFile) markdownSupport :: MarkdownConfig -> ([Extension], ScanAction) markdownSupport config = ([".md"], markdownScanner config) diff --git a/src/Xrefcheck/System.hs b/src/Xrefcheck/System.hs index 20f340f..89d496d 100644 --- a/src/Xrefcheck/System.hs +++ b/src/Xrefcheck/System.hs @@ -6,9 +6,20 @@ module Xrefcheck.System ( readingSystem , askWithinCI - , RelGlobPattern + + , CanonicalPath + , canonicalizePath + , unCanonicalPath + , getDirsBetweenRootAndFile + , getPosixRelativeChild + , getPosixRelativeOrAbsoluteChild + , hasIndirectionThroughParent + , takeDirectory + , takeExtension + , ()) -import Text.Interpolation.Nyan - -import Xrefcheck.Util (normaliseWithNoTrailing) +import System.FilePath.Posix qualified as FPP +import Text.Interpolation.Nyan (int, rmode') -- | We can quite safely treat surrounding filesystem as frozen, -- so IO reading operations can be turned into pure values. @@ -36,21 +46,125 @@ readingSystem = unsafePerformIO -- Check the respective env variable which is usually set in all CIs. askWithinCI :: IO Bool askWithinCI = lookupEnv "CI" <&> \case - Just "1" -> True + Just "1" -> True Just (map C.toLower -> "true") -> True - _ -> False + _ -> False --- | Glob pattern relative to repository root. Should be created via @mkGlobPattern@ -newtype RelGlobPattern = RelGlobPattern FilePath +-- | A FilePath that has been canonicalized. +-- +-- It should be created via 'canonicalizePath'. +-- +-- Currently, canonical paths have been made absolute, normalised +-- regarding the running platform (e.g. Posix or Windows), with +-- indirections syntactically expanded as much as possible and +-- with no trailing path separator. All this results in a weaker +-- version than that provided by 'System.Directory.canonicalizePath'. +newtype CanonicalPath = UnsafeCanonicalPath + { unCanonicalPath :: FilePath + } deriving newtype (Show, Eq, Ord) + +canonicalizePath :: FilePath -> IO CanonicalPath +canonicalizePath = fmap canonicalize . Directory.makeAbsolute + where + canonicalize :: FilePath -> CanonicalPath + canonicalize = UnsafeCanonicalPath + . expandIndirections + . FP.normalise + . FP.dropTrailingPathSeparator + + expandIndirections :: FilePath -> FilePath + expandIndirections = FP.joinPath + . reverse + . expand 0 + . reverse + . FP.splitDirectories + + expand :: Int -> [FilePath] -> [FilePath] + expand acc (".." : xs) = expand (acc + 1) xs + expand acc ("." : xs) = expand acc xs + expand 0 (x : xs) = x : expand 0 xs + expand acc (_ : xs) = expand (acc - 1) xs + expand acc [] = replicate acc ".." + +-- | 'FilePath.takeDirectory' version for 'CanonicalPath'. +takeDirectory :: CanonicalPath -> CanonicalPath +takeDirectory (UnsafeCanonicalPath p) = UnsafeCanonicalPath $ FP.takeDirectory p + +-- | 'FilePath.takeExtension' version for 'CanonicalPath'. +takeExtension :: CanonicalPath -> String +takeExtension (UnsafeCanonicalPath p) = FP.takeExtension p + +-- | Get the list of directories, canonicalized, between two given paths. +getDirsBetweenRootAndFile :: CanonicalPath -> CanonicalPath -> [CanonicalPath] +getDirsBetweenRootAndFile (UnsafeCanonicalPath rootPath) file = + case stripPrefix rootPath (unCanonicalPath (takeDirectory file)) of + Just path -> UnsafeCanonicalPath <$> scanl (FP.) rootPath directories + where + directories = FP.splitDirectories $ dropWhile FP.isPathSeparator path + Nothing -> [] + +-- | Get a relative 'FilePath' from the second given path (child) with +-- respect to the first one (root). +-- +-- It returns Nothing if child cannot be reached from root downwards +-- in the filesystem tree. +-- +-- The resulting `FilePath` uses POSIX path separators. +getPosixRelativeChild :: CanonicalPath -> CanonicalPath -> Maybe FilePath +getPosixRelativeChild (UnsafeCanonicalPath root) (UnsafeCanonicalPath child) = + dropLeadingSepAndEmptyCase . fmap replaceSeparator <$> stripPrefix root child + where + replaceSeparator :: Char -> Char + replaceSeparator c + | FP.isPathSeparator c = FPP.pathSeparator + | otherwise = c + + dropLeadingSepAndEmptyCase :: FilePath -> FilePath + dropLeadingSepAndEmptyCase path = case dropWhile FP.isPathSeparator path of + "" -> "." + other -> other + +-- | Get the relative 'FilePath' using 'getPosixRelativeChild', but +-- return the same passed absolute path instead of 'Nothing'. +getPosixRelativeOrAbsoluteChild :: CanonicalPath -> CanonicalPath -> FilePath +getPosixRelativeOrAbsoluteChild root child = + fromMaybe (unCanonicalPath child) (getPosixRelativeChild root child) + +-- | Check if some 'FilePath' passes through its parent while +-- expanding indirections. +hasIndirectionThroughParent :: FilePath -> Bool +hasIndirectionThroughParent = go 0 . FP.splitDirectories + where + go :: Int -> [FilePath] -> Bool + go _ [] = False + go 0 (".." : _) = True + go acc (".." : xs) = go (acc - 1) xs + go acc ("." : xs) = go acc xs + go acc (_ : xs) = go (acc + 1) xs + +-- | Extend some 'CanonicalPath' with a given relative 'FilePath'. +-- +-- The right-hand side 'FilePath' can use both Posix and Windows +-- path separators. +( FilePath -> IO CanonicalPath +UnsafeCanonicalPath p f +infixr 5 s -> Either String RelGlobPattern mkGlobPattern path = do let spath = toString path - unless (isRelative spath) $ Left $ + unless (FPP.isRelative spath) $ Left $ "Expected a relative glob pattern, but got " <> spath -- Checking correctness of glob, e.g. "a[b" is incorrect case Glob.tryCompileWith globCompileOptions spath of - Right _ -> return (RelGlobPattern spath) + Right _ -> return (UnsafeRelGlobPattern spath) Left err -> Left [int|| Glob pattern compilation failed. @@ -61,25 +175,20 @@ mkGlobPattern path = do Special characters in file names can be escaped using square brackets, e.g. -> [<]a[>]. |] -normaliseGlobPattern :: RelGlobPattern -> RelGlobPattern -normaliseGlobPattern = RelGlobPattern . normaliseWithNoTrailing . coerce - -bindGlobPattern :: FilePath -> RelGlobPattern -> Glob.Pattern -bindGlobPattern root (RelGlobPattern relPat) = readingSystem $ do - -- TODO [#26] try to avoid using canonicalization - absPat <- canonicalizePath (root relPat) +bindGlobPattern :: CanonicalPath -> RelGlobPattern -> Glob.Pattern +bindGlobPattern root (UnsafeRelGlobPattern relPat) = readingSystem $ do + UnsafeCanonicalPath absPat <- root error $ "Glob pattern compilation failed after canonicalization: " <> toText err Right pat -> return pat -matchesGlobPatterns :: FilePath -> [RelGlobPattern] -> FilePath -> Bool +matchesGlobPatterns :: CanonicalPath -> [RelGlobPattern] -> CanonicalPath -> Bool matchesGlobPatterns root globPatterns file = or - [ Glob.match pat cFile + [ Glob.match pat $ unCanonicalPath file | globPattern <- globPatterns , let pat = bindGlobPattern root globPattern - , let cFile = readingSystem $ canonicalizePath file ] instance FromJSON RelGlobPattern where diff --git a/src/Xrefcheck/Util.hs b/src/Xrefcheck/Util.hs index b779539..772060a 100644 --- a/src/Xrefcheck/Util.hs +++ b/src/Xrefcheck/Util.hs @@ -9,7 +9,6 @@ module Xrefcheck.Util , postfixFields , (-:) , aesonConfigOption - , normaliseWithNoTrailing , posixTimeToTimeSecond , utcTimeToTimeSecond , module Xrefcheck.Util.Colorize @@ -27,7 +26,6 @@ import Data.Time (UTCTime) import Data.Time.Clock (nominalDiffTimeToSeconds) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds) import Fmt (Builder) -import System.FilePath.Posix (dropTrailingPathSeparator, normalise) import Time (Second, Time (..), sec) import Xrefcheck.Util.Colorize @@ -54,9 +52,6 @@ type family Field f a where Field Identity a = a Field Maybe a = Maybe a -normaliseWithNoTrailing :: FilePath -> FilePath -normaliseWithNoTrailing = dropTrailingPathSeparator . normalise - posixTimeToTimeSecond :: POSIXTime -> Time Second posixTimeToTimeSecond posixTime = let picos@(MkFixed ps) = nominalDiffTimeToSeconds posixTime diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index 1ed1fdb..426c4a8 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -35,11 +35,15 @@ import Universum import Control.Concurrent.Async (Async, async, cancel, poll, wait, withAsync) import Control.Exception (AsyncException (..), throwIO) +import Control.Exception.Safe (handleAsync, handleJust) import Control.Monad.Except (MonadError (..)) +import Data.Bits (toIntegralSized) import Data.ByteString qualified as BS +import Data.List (lookup) import Data.List qualified as L import Data.Map qualified as M import Data.Reflection (Given) +import Data.Text (toCaseFold) import Data.Text.Metrics (damerauLevenshteinNorm) import Data.Time (UTCTime, defaultTimeLocale, formatTime, readPTime, rfc822DateFormat) import Data.Time.Clock.POSIX (getPOSIXTime) @@ -57,8 +61,8 @@ import Network.HTTP.Req defaultHttpConfig, ignoreResponse, req, runReq, useURI) import Network.HTTP.Types.Header (hRetryAfter) import Network.HTTP.Types.Status (Status, statusCode, statusMessage) -import System.FilePath.Posix - (equalFilePath, joinPath, makeRelative, normalise, splitDirectories, takeDirectory, ()) +import System.FilePath (isPathSeparator) +import System.FilePath.Posix (()) import Text.Interpolation.Nyan import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift) import Text.Regex.TDFA.Text (Regex, regexec) @@ -66,10 +70,6 @@ import Text.URI (Authority (..), ParseExceptionBs, URI (..), mkURIBs) import Time (RatioNat, Second, Time (..), ms, sec, threadDelay, timeout, (+:+), (-:-)) import URI.ByteString qualified as URIBS -import Control.Exception.Safe (handleAsync, handleJust) -import Data.Bits (toIntegralSized) -import Data.List (lookup) -import Data.Text (toCaseFold) import Xrefcheck.Config import Xrefcheck.Core import Xrefcheck.Orphans () @@ -265,7 +265,6 @@ reportVerifyErrs errs = fmt Invalid references dumped, #{length errs} in total. |] - data RetryAfter = Date UTCTime | Seconds (Time Second) deriving stock (Show, Eq) @@ -353,19 +352,17 @@ verifyRepo => Rewrite -> Config -> VerifyMode - -> FilePath -> RepoInfo -> IO (VerifyResult $ WithReferenceLoc VerifyError) verifyRepo rw config@Config{..} mode - root - repoInfo'@(RepoInfo files _) + repoInfo@RepoInfo{..} = do let toScan = do - (file, fileInfo) <- M.toList files - guard . not $ matchesGlobPatterns root (ecIgnoreRefsFrom cExclusions) file + (file, fileInfo) <- toPairs riFiles + guard . not $ matchesGlobPatterns riRoot (ecIgnoreRefsFrom cExclusions) file case fileInfo of Scanned fi -> do ref <- _fiReferences fi @@ -379,7 +376,7 @@ verifyRepo accumulated <- loopAsyncUntil (printer progressRef) do forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) -> - verifyReference config mode progressRef repoInfo' root file ref + verifyReference config mode progressRef repoInfo file ref case accumulated of Right res -> return $ fold res Left (exception, partialRes) -> do @@ -408,11 +405,12 @@ verifyRepo threadDelay (ms 100) ifExternalThenCache :: (a, Reference) -> NeedsCaching Text - ifExternalThenCache (_, Reference{..}) = case locationType rLink of - ExternalLoc -> CacheUnderKey rLink - _ -> NoCaching + ifExternalThenCache (_, Reference{..}) = + if isExternal rInfo + then CacheUnderKey rLink + else NoCaching -shouldCheckLocType :: VerifyMode -> LocationType -> Bool +shouldCheckLocType :: VerifyMode -> ReferenceInfo -> Bool shouldCheckLocType mode locType | isExternal locType = shouldCheckExternal mode | isLocal locType = shouldCheckLocal mode @@ -423,29 +421,31 @@ verifyReference -> VerifyMode -> IORef VerifyProgress -> RepoInfo - -> FilePath - -> FilePath + -> CanonicalPath -> Reference -> IO (VerifyResult $ WithReferenceLoc VerifyError) verifyReference config@Config{..} mode progressRef - (RepoInfo files dirs) - root - fileWithReference + repoInfo@RepoInfo{..} + file ref@Reference{..} - = retryVerification 0 $ do - let locType = locationType rLink - if shouldCheckLocType mode locType - then case locType of - FileLocalLoc -> checkRef rAnchor fileWithReference - RelativeLoc -> checkRef rAnchor - (normalise $ takeDirectory fileWithReference - toString (canonizeLocalRef rLink)) - AbsoluteLoc -> checkRef rAnchor (root <> toString rLink) - ExternalLoc -> checkExternalResource config rLink - OtherLoc -> verifying pass + = retryVerification 0 $ + if shouldCheckLocType mode rInfo + then case rInfo of + RIFileLocal -> checkRef rAnchor riRoot file "" + RIFileRelative -> do + let shownFilepath = getPosixRelativeOrAbsoluteChild riRoot (takeDirectory file) + toString rLink + canonicalPath <- takeDirectory file do + let shownFilepath = dropWhile isPathSeparator (toString rLink) + canonicalPath <- riRoot checkExternalResource config rLink + RIOtherProtocol -> verifying pass else return mempty where retryVerification @@ -472,7 +472,7 @@ verifyReference . alterProgressErrors res numberOfRetries atomicModifyIORef' progressRef $ \VerifyProgress{..} -> - ( if isExternal $ locationType rLink + ( if isExternal rInfo then VerifyProgress{ vrExternal = let vrExternalAdvanced = moveProgress vrExternal in if toRetry @@ -488,7 +488,8 @@ verifyReference then do threadDelay currentRetryAfter retryVerification (numberOfRetries + 1) resIO - else return $ fmap (WithReferenceLoc fileWithReference ref) res + else return . (<$> res) $ + WithReferenceLoc (getPosixRelativeOrAbsoluteChild riRoot file) ref alterOverallProgress :: (Num a) @@ -530,76 +531,51 @@ verifyReference VerifyResult [ExternalHttpTooManyRequests retryAfter] -> retryAfter _ -> Nothing - isVirtual = matchesGlobPatterns root (ecIgnoreLocalRefsTo cExclusions) + isVirtual canonicalRoot = matchesGlobPatterns canonicalRoot (ecIgnoreLocalRefsTo cExclusions) - checkRef mAnchor referredFile = verifying $ - unless (isVirtual referredFile) do - checkReferredFileIsInsideRepo referredFile - mFileStatus <- tryGetFileStatus referredFile + -- Checks a local file reference. + -- + -- The `shownFilepath` argument is intended to be shown in the error + -- report when the `referredFile` path is not a child of `canonicalRoot`, + -- so it allows indirections and should be suitable for being shown to + -- the user. Also, it will be considered as outside the repository if + -- it is relative and its idirections pass through the repository root. + checkRef mAnchor canonicalRoot referredFile shownFilepath = verifying $ + unless (isVirtual canonicalRoot referredFile) do + when (hasIndirectionThroughParent shownFilepath) $ + throwError $ LocalFileOutsideRepo shownFilepath + + referredFileRelative <- + case getPosixRelativeChild canonicalRoot referredFile of + Just ps -> pure ps + Nothing -> throwError (LocalFileOutsideRepo shownFilepath) + + mFileStatus <- tryGetFileStatus referredFileRelative referredFile case mFileStatus of Right (Scanned referredFileInfo) -> whenJust mAnchor $ - checkAnchor referredFile (_fiAnchors referredFileInfo) - Right NotScannable -> pass -- no support for such file, can do nothing - Right NotAddedToGit -> throwError (LinkTargetNotAddedToGit referredFile) - Left UntrackedDirectory -> throwError (LinkTargetNotAddedToGit referredFile) + checkAnchor referredFileRelative (_fiAnchors referredFileInfo) + Right NotAddedToGit -> throwError (LinkTargetNotAddedToGit referredFileRelative) + Left UntrackedDirectory -> throwError (LinkTargetNotAddedToGit referredFileRelative) + Right NotScannable -> pass -- no support for such file, can do nothing Left TrackedDirectory -> pass -- path leads to directory, currently -- if such link contain anchor, we ignore it - -- expands ".." and "." - -- expandIndirections "a/b/../c" = "a/c" - -- expandIndirections "a/b/c/../../d" = "a/d" - -- expandIndirections "../../a" = "../../a" - -- expandIndirections "a/./b" = "a/b" - -- expandIndirections "a/b/./../c" = "a/c" - expandIndirections :: FilePath -> FilePath - expandIndirections = joinPath . reverse . expand 0 . reverse . splitDirectories - where - expand :: Int -> [FilePath] -> [FilePath] - expand acc ("..":xs) = expand (acc+1) xs - expand acc (".":xs) = expand acc xs - expand 0 (x:xs) = x : expand 0 xs - expand acc (_:xs) = expand (acc-1) xs - expand acc [] = replicate acc ".." - - checkReferredFileIsInsideRepo file = unless - (noNegativeNesting $ makeRelative root file) $ - throwError (LocalFileOutsideRepo file) - where - -- checks that relative filepath fully belongs to the root directory - -- noNegativeNesting "a/../b" = True - -- noNegativeNesting "a/../../b" = False - noNegativeNesting path = all (>= 0) $ scanl - (\n dir -> n + nestingChange dir) - (0 :: Integer) - $ splitDirectories path - - nestingChange ".." = -1 - nestingChange "." = 0 - nestingChange _ = 1 + caseInsensitive = caseInsensitiveAnchors . mcFlavor . scMarkdown $ cScanners -- Returns `Nothing` when path corresponds to an existing (and tracked) directory - tryGetFileStatus :: FilePath -> ExceptT VerifyError IO (Either DirectoryStatus FileStatus) - tryGetFileStatus file - | Just f <- mFile = return $ Right f - | Just d <- mDir = return $ Left d - | otherwise = throwError (LocalFileDoesNotExist file) - where - matchesFilePath :: FilePath -> Bool - matchesFilePath = equalFilePath $ expandIndirections file + tryGetFileStatus :: FilePath -> CanonicalPath -> ExceptT VerifyError IO (Either DirectoryStatus FileStatus) + tryGetFileStatus filePath canonicalPath + | Just f <- lookupFile canonicalPath repoInfo = return (Right f) + | Just d <- lookupDirectory canonicalPath repoInfo = return (Left d) + | otherwise = throwError (LocalFileDoesNotExist filePath) - mFile :: Maybe FileStatus - mFile = (files M.!) <$> find matchesFilePath (M.keys files) - - mDir :: Maybe DirectoryStatus - mDir = (dirs M.!) <$> find matchesFilePath (M.keys dirs) - - checkAnchor file fileAnchors anchor = do - checkAnchorReferenceAmbiguity file fileAnchors anchor - checkDeduplicatedAnchorReference file fileAnchors anchor + checkAnchor filePath fileAnchors anchor = do + checkAnchorReferenceAmbiguity filePath fileAnchors anchor + checkDeduplicatedAnchorReference filePath fileAnchors anchor checkAnchorExists fileAnchors anchor anchorNameEq = - if caseInsensitiveAnchors . mcFlavor . scMarkdown $ cScanners + if caseInsensitive then (==) `on` toCaseFold else (==) @@ -607,16 +583,16 @@ verifyReference -- has added a suffix to the duplicate, and now the original is referrenced - -- such links are pretty fragile and we discourage their use despite -- they are in fact unambiguous. - checkAnchorReferenceAmbiguity file fileAnchors anchor = do + checkAnchorReferenceAmbiguity filePath fileAnchors anchor = do let similarAnchors = filter (anchorNameEq anchor . aName) fileAnchors when (length similarAnchors > 1) $ - throwError $ AmbiguousAnchorRef file anchor (Exts.fromList similarAnchors) + throwError $ AmbiguousAnchorRef filePath anchor (Exts.fromList similarAnchors) -- Similar to the previous one, but for the case when we reference the -- renamed duplicate. - checkDeduplicatedAnchorReference file fileAnchors anchor = + checkDeduplicatedAnchorReference filePath fileAnchors anchor = whenJust (stripAnchorDupNo anchor) $ \origAnchor -> - checkAnchorReferenceAmbiguity file fileAnchors origAnchor + checkAnchorReferenceAmbiguity filePath fileAnchors origAnchor checkAnchorExists givenAnchors anchor = case find (anchorNameEq anchor . aName) givenAnchors of diff --git a/tests/Test/Xrefcheck/CanonicalPathSpec.hs b/tests/Test/Xrefcheck/CanonicalPathSpec.hs new file mode 100644 index 0000000..8c4c328 --- /dev/null +++ b/tests/Test/Xrefcheck/CanonicalPathSpec.hs @@ -0,0 +1,63 @@ +{- SPDX-FileCopyrightText: 2022 Serokell + - + - SPDX-License-Identifier: MPL-2.0 + -} + +module Test.Xrefcheck.CanonicalPathSpec where + +import Universum + +import System.Directory (getCurrentDirectory) +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.HUnit (testCase, (@?=)) + +import Xrefcheck.System + +test_canonicalPath :: IO TestTree +test_canonicalPath = do + current <- getCurrentDirectory >>= canonicalizePath + return $ testGroup "Canonical paths" + [ testGroup "Canonicalization" + [ testCase "Trailing separator" $ do + path <- canonicalizePath "./example/dir/" + getPosixRelativeOrAbsoluteChild current path @?= "example/dir" + , testCase "Parent directory indirection" $ do + path <- canonicalizePath "dir1/../dir2" + getPosixRelativeOrAbsoluteChild current path @?= "dir2" + , testCase "Through parent directory indirection" $ do + path <- canonicalizePath "dir1/../../../dir2" + root <- current getDirsBetweenRootAndFile current path @?= + [ "." + , "example" + , "example/dir" + ] + ] + ] diff --git a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs index 0eeebd3..6f381c5 100644 --- a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs +++ b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs @@ -22,19 +22,19 @@ test_ignoreAnnotations = [ testCase "Check if broken link annotation produce error" do let file = "tests/markdowns/with-annotations/no_link.md" errs <- getErrs file - errs @?= makeError (Just $ PosInfo 7 1 7 31) file LinkErr + errs @?= makeError (Just $ PosInfo 7 1 7 31) LinkErr , testCase "Check if broken paragraph annotation produce error" do let file = "tests/markdowns/with-annotations/no_paragraph.md" errs <- getErrs file - errs @?= makeError (Just $ PosInfo 7 1 7 35) file (ParagraphErr "HEADING") + errs @?= makeError (Just $ PosInfo 7 1 7 35) (ParagraphErr "HEADING") , testCase "Check if broken ignore all annotation produce error" do let file = "tests/markdowns/with-annotations/unexpected_ignore_file.md" errs <- getErrs file - errs @?= makeError (Just $ PosInfo 9 1 9 29) file FileErr + errs @?= makeError (Just $ PosInfo 9 1 9 29) FileErr , testCase "Check if broken unrecognised annotation produce error" do let file = "tests/markdowns/with-annotations/unrecognised_option.md" errs <- getErrs file - errs @?= makeError (Just $ PosInfo 7 1 7 46) file (UnrecognisedErr "unrecognised-option") + errs @?= makeError (Just $ PosInfo 7 1 7 46) (UnrecognisedErr "unrecognised-option") ] , testGroup "\"ignore link\" mode" [ testCase "Check \"ignore link\" performance" $ do @@ -42,7 +42,7 @@ test_ignoreAnnotations = (fi, errs) <- parse GitHub file getRefs fi @?= ["team", "team", "team", "hire-us", "how-we-work", "privacy", "link2", "link2", "link3"] - errs @?= makeError (Just $ PosInfo 42 1 42 31) file LinkErr + errs @?= makeError (Just $ PosInfo 42 1 42 31) LinkErr ] , testGroup "\"ignore paragraph\" mode" [ testCase "Check \"ignore paragraph\" performance" $ do @@ -61,5 +61,5 @@ test_ignoreAnnotations = getRefs :: FileInfo -> [Text] getRefs fi = map rName $ fi ^. fiReferences - getErrs :: FilePath -> IO [ScanError] + getErrs :: FilePath -> IO [ScanError 'Parse] getErrs path = snd <$> parse GitHub path diff --git a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs index c314308..2af13a2 100644 --- a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs +++ b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs @@ -42,7 +42,7 @@ test_ignoreRegex = give WithoutColors $ scanRepo OnlyTracked rw formats (config ^. cExclusionsL) root verifyRes <- allowRewrite showProgressBar $ \rw -> - verifyRepo rw config verifyMode root $ srRepoInfo scanResult + verifyRepo rw config verifyMode $ srRepoInfo scanResult let brokenLinks = pickBrokenLinks verifyRes diff --git a/tests/Test/Xrefcheck/LocalSpec.hs b/tests/Test/Xrefcheck/LocalSpec.hs deleted file mode 100644 index 6a33699..0000000 --- a/tests/Test/Xrefcheck/LocalSpec.hs +++ /dev/null @@ -1,25 +0,0 @@ -{- SPDX-FileCopyrightText: 2019 Serokell - - - - SPDX-License-Identifier: MPL-2.0 - -} - -module Test.Xrefcheck.LocalSpec where - -import Universum - -import Test.Tasty (TestTree, testGroup) -import Test.Tasty.HUnit (testCase, (@?=)) - -import Xrefcheck.Core (canonizeLocalRef) - -test_local_refs_canonizing :: TestTree -test_local_refs_canonizing = testGroup "Local refs canonizing" $ - [ testCase "Strips ./" $ - canonizeLocalRef "./AnchorsSpec.hs" @?= "AnchorsSpec.hs" - - , testCase "Strips ././" $ - canonizeLocalRef "././AnchorsSpec.hs" @?= "AnchorsSpec.hs" - - , testCase "Leaves plain other intact" $ - canonizeLocalRef "../AnchorsSpec.hs" @?= "../AnchorsSpec.hs" - ] diff --git a/tests/Test/Xrefcheck/RedirectRequestsSpec.hs b/tests/Test/Xrefcheck/RedirectRequestsSpec.hs index fa6f27d..c9dfbc0 100644 --- a/tests/Test/Xrefcheck/RedirectRequestsSpec.hs +++ b/tests/Test/Xrefcheck/RedirectRequestsSpec.hs @@ -1,4 +1,4 @@ -{- SPDX-FileCopyrightText: 2021 Serokell +{- SPDX-FileCopyrightText: 2022 Serokell - - SPDX-License-Identifier: MPL-2.0 -} diff --git a/tests/Test/Xrefcheck/TooManyRequestsSpec.hs b/tests/Test/Xrefcheck/TooManyRequestsSpec.hs index 8b07d49..787f383 100644 --- a/tests/Test/Xrefcheck/TooManyRequestsSpec.hs +++ b/tests/Test/Xrefcheck/TooManyRequestsSpec.hs @@ -63,7 +63,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) RIExternal) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = ttTimeToCompletion <$> pTaskTimestamp @@ -88,7 +88,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) RIExternal) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = fromMaybe (sec 0) $ ttTimeToCompletion <$> pTaskTimestamp @@ -114,7 +114,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) RIExternal) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = ttTimeToCompletion <$> pTaskTimestamp diff --git a/tests/Test/Xrefcheck/TrailingSlashSpec.hs b/tests/Test/Xrefcheck/TrailingSlashSpec.hs index 8f0f0a1..3e5b310 100644 --- a/tests/Test/Xrefcheck/TrailingSlashSpec.hs +++ b/tests/Test/Xrefcheck/TrailingSlashSpec.hs @@ -17,6 +17,7 @@ import Xrefcheck.Core import Xrefcheck.Progress import Xrefcheck.Scan import Xrefcheck.Scanners.Markdown +import Xrefcheck.System import Xrefcheck.Util test_slash :: TestTree @@ -27,13 +28,13 @@ test_slash = testGroup "Trailing forward slash detection" $ testCase ("All the files within the root \"" <> root <> "\" should exist") $ do - (ScanResult _ (RepoInfo repoInfo _)) <- allowRewrite False $ \rw -> + (ScanResult _ RepoInfo{..}) <- allowRewrite False $ \rw -> scanRepo OnlyTracked rw format (cExclusions config & ecIgnoreL .~ []) root - nonExistentFiles <- lefts <$> forM (keys repoInfo) (\filePath -> do - predicate <- doesFileExist filePath + nonExistentFiles <- lefts <$> forM (fst <$> toPairs riFiles) (\filePath -> do + predicate <- doesFileExist . unCanonicalPath $ filePath return $ if predicate then Right () - else Left filePath) + else Left . unCanonicalPath $ filePath) whenJust (nonEmpty nonExistentFiles) $ \files -> assertFailure [int|| diff --git a/tests/Test/Xrefcheck/Util.hs b/tests/Test/Xrefcheck/Util.hs index 9503127..2826d35 100644 --- a/tests/Test/Xrefcheck/Util.hs +++ b/tests/Test/Xrefcheck/Util.hs @@ -11,11 +11,14 @@ import Network.HTTP.Types (forbidden403, unauthorized401) import Web.Firefly (ToResponse (..), route, run) import Xrefcheck.Core (FileInfo, Flavor) -import Xrefcheck.Scan (ScanError) +import Xrefcheck.Scan (ScanError, ScanStage (..)) import Xrefcheck.Scanners.Markdown (MarkdownConfig (MarkdownConfig, mcFlavor), markdownScanner) +import Xrefcheck.System (canonicalizePath) -parse :: Flavor -> FilePath -> IO (FileInfo, [ScanError]) -parse fl path = markdownScanner MarkdownConfig { mcFlavor = fl } path +parse :: Flavor -> FilePath -> IO (FileInfo, [ScanError 'Parse]) +parse fl path = do + canonicalPath <- canonicalizePath path + markdownScanner MarkdownConfig { mcFlavor = fl } canonicalPath mockServer :: IO () mockServer = run 3000 $ do diff --git a/tests/Test/Xrefcheck/UtilRequests.hs b/tests/Test/Xrefcheck/UtilRequests.hs index 8582cd8..30efc16 100644 --- a/tests/Test/Xrefcheck/UtilRequests.hs +++ b/tests/Test/Xrefcheck/UtilRequests.hs @@ -12,7 +12,6 @@ module Test.Xrefcheck.UtilRequests import Universum import Control.Exception qualified as E -import Data.Map qualified as M import Text.Interpolation.Nyan import Control.Concurrent (forkIO, killThread) @@ -21,6 +20,7 @@ import Xrefcheck.Config import Xrefcheck.Core import Xrefcheck.Progress import Xrefcheck.Scan +import Xrefcheck.System (canonicalizePath) import Xrefcheck.Util import Xrefcheck.Verify @@ -62,7 +62,7 @@ checkLinkAndProgressWithServer mock link progress vrExpectation = verifyLink :: Text -> IO (VerifyResult VerifyError, Progress Int) verifyLink link = do - let reference = Reference "" link Nothing (Position Nothing) + let reference = Reference "" link Nothing (Position Nothing) RIExternal progRef <- newIORef $ initVerifyProgress [reference] result <- verifyReferenceWithProgress reference progRef p <- readIORef progRef @@ -70,6 +70,8 @@ verifyLink link = do verifyReferenceWithProgress :: Reference -> IORef VerifyProgress -> IO (VerifyResult VerifyError) verifyReferenceWithProgress reference progRef = do + canonicalRoot <- canonicalizePath "." + file <- canonicalizePath "" fmap wrlItem <$> verifyReference (defConfig GitHub & cExclusionsL . ecIgnoreExternalRefsToL .~ []) FullMode - progRef (RepoInfo M.empty mempty) "." "" reference + progRef (RepoInfo mempty mempty canonicalRoot) file reference diff --git a/tests/golden/check-anchors/check-anchors.bats b/tests/golden/check-anchors/check-anchors.bats index c111f03..205dfd0 100644 --- a/tests/golden/check-anchors/check-anchors.bats +++ b/tests/golden/check-anchors/check-anchors.bats @@ -14,14 +14,14 @@ load '../helpers' assert_diff - <