[#197] Dependent RepoInfo keys example

Problem: During the implementation of issue #197, we though about how we
could customize the way filepaths are searched within the repository data
in an efficient way, depending on if filepaths are case-insensitive or not.

Solution: We implemented a GADT with its polymorphic parameter, corresponding
to Map keys, hidden under an existentially quantified type. Although we end up
not requiring this, we separate the essence of the idea to a commit in a
separate branch, so it can be reviewed in the future.
This commit is contained in:
Adrián Enríquez 2022-12-15 18:09:38 +01:00
parent 50e4e3bd62
commit 90abffeb4a
No known key found for this signature in database
GPG Key ID: 1D2A049F5866F977
8 changed files with 141 additions and 73 deletions

View File

@ -25,7 +25,7 @@ import Xrefcheck.Progress (allowRewrite)
import Xrefcheck.Scan
(FormatsSupport, ScanError (..), ScanResult (..), reportScanErrs, scanRepo,
specificFormatsSupport)
import Xrefcheck.Scanners.Markdown (markdownSupport)
import Xrefcheck.Scanners.Markdown (MarkdownConfig (mcFlavor), markdownSupport)
import Xrefcheck.System (askWithinCI)
import Xrefcheck.Util
import Xrefcheck.Verify (reportVerifyErrs, verifyErrors, verifyRepo)
@ -70,7 +70,9 @@ defaultAction Options{..} = do
(ScanResult scanErrs repoInfo) <- allowRewrite showProgressBar $ \rw -> do
let fullConfig = addExclusionOptions (cExclusions config) oExclusionOptions
scanRepo oScanPolicy rw (formats $ cScanners config) fullConfig oRoot
formatsSupport = formats $ cScanners config
flavor = mcFlavor $ scMarkdown $ cScanners config
scanRepo oScanPolicy rw formatsSupport fullConfig flavor oRoot
when oVerbose $
fmt [int||

View File

@ -21,7 +21,7 @@ import Data.DList qualified as DList
import Data.List qualified as L
import Data.Reflection (Given)
import Data.Text qualified as T
import Fmt (Buildable (..), Builder)
import Fmt (Buildable (..))
import System.FilePath.Posix (isPathSeparator)
import Text.Interpolation.Nyan
import Time (Second, Time)
@ -146,14 +146,6 @@ data DirectoryStatus
| UntrackedDirectory
deriving stock (Show)
-- | All tracked files and directories.
data RepoInfo = RepoInfo
{ riFiles :: Map FilePath FileStatus
-- ^ Files from the repo with `FileInfo` attached to files that we've scanned.
, riDirectories :: Map FilePath DirectoryStatus
-- ^ Directories containing those files.
} deriving stock (Show)
-----------------------------------------------------------
-- Instances
-----------------------------------------------------------
@ -203,19 +195,6 @@ instance Given ColorMode => Buildable FileInfo where
#{ interpolateIndentF 4 $ maybe "none" interpolateBlockListF (nonEmpty _fiAnchors) }
|]
instance Given ColorMode => Buildable RepoInfo where
build (RepoInfo m _)
| Just scanned <- nonEmpty [(name, info) | (name, Scanned info) <- toPairs m]
= interpolateUnlinesF $ buildFileReport <$> scanned
where
buildFileReport :: ([Char], FileInfo) -> Builder
buildFileReport (name, info) =
[int||
#{ colorIfNeeded Cyan $ name }:
#{ interpolateIndentF 2 $ build info }
|]
build _ = "No scannable files found."
-----------------------------------------------------------
-- Analysing
-----------------------------------------------------------

113
src/Xrefcheck/RepoInfo.hs Normal file
View File

@ -0,0 +1,113 @@
{- SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}
{-# LANGUAGE GADTs #-}
module Xrefcheck.RepoInfo
( RepoInfo
, mkRepoInfo
, riFiles
, lookupFile
, lookupDirectory
) where
import Universum
import Data.Char qualified as C
import Data.Map qualified as M
import Data.Reflection (Given)
import Fmt (Buildable (build), Builder)
import Text.Interpolation.Nyan
import Xrefcheck.Core
import Xrefcheck.Util
-- | Supose that we already have a type, `CanonicalPath`
-- that corresponds to a canonicalized `FilePath` (#197).
-- This is an example with an alias, and that is why
-- Golden tests are failing.
type CanonicalPath = FilePath
-- | The repository info: files and directories.
data RepoInfo = forall a. RepoInfo (RepoInfo' a)
-- | Generate a 'RepoInfo' with efficient path lookup depending
-- on the case-sensitivity of a given Markdown flavor.
mkRepoInfo
:: Flavor
-> [(CanonicalPath, FileStatus)]
-> [(CanonicalPath, DirectoryStatus)] -> RepoInfo
mkRepoInfo flavor files directories =
if caseInsensitiveAnchors flavor
then RepoInfo $ RICaseInsensitive $ RepoInfoData
{ ridFiles = M.fromList $ fmap (first CaseInsensitivePath) $ files
, ridDirectories = M.fromList $ fmap (first CaseInsensitivePath) $ directories
}
else RepoInfo $ RICaseSensitive $ RepoInfoData
{ ridFiles = M.fromList $ fmap (first CaseSensitivePath) $ files
, ridDirectories = M.fromList $ fmap (first CaseSensitivePath) $ directories
}
-- | All tracked files and directories.
data RepoInfoData a = RepoInfoData
{ ridFiles :: Map a FileStatus
-- ^ Files from the repo with `FileInfo` attached to files that we've scanned.
, ridDirectories :: Map a DirectoryStatus
-- ^ Directories containing those files.
}
data RepoInfo' a where
RICaseInsensitive :: RepoInfoData CaseInsensitivePath -> RepoInfo' CaseInsensitivePath
RICaseSensitive :: RepoInfoData CaseSensitivePath -> RepoInfo' CaseSensitivePath
-- Files from the repo with `FileInfo` attached to files that we've scanned.
riFiles :: RepoInfo -> [(CanonicalPath, FileStatus)]
riFiles (RepoInfo (RICaseInsensitive (RepoInfoData{..}))) =
first unCaseInsensitivePath <$> toPairs ridFiles
riFiles (RepoInfo (RICaseSensitive (RepoInfoData{..}))) =
first unCaseSensitivePath <$> toPairs ridFiles
-- Search for a file in the repository.
lookupFile :: CanonicalPath -> RepoInfo -> Maybe FileStatus
lookupFile path (RepoInfo (RICaseInsensitive (RepoInfoData{..}))) =
M.lookup (CaseInsensitivePath path) ridFiles
lookupFile path (RepoInfo (RICaseSensitive (RepoInfoData{..}))) =
M.lookup (CaseSensitivePath path) ridFiles
-- Search for a directory in the repository.
lookupDirectory :: CanonicalPath -> RepoInfo -> Maybe DirectoryStatus
lookupDirectory path (RepoInfo (RICaseInsensitive (RepoInfoData{..}))) =
M.lookup (CaseInsensitivePath path) ridDirectories
lookupDirectory path (RepoInfo (RICaseSensitive (RepoInfoData{..}))) =
M.lookup (CaseSensitivePath path) ridDirectories
data CaseSensitivePath = CaseSensitivePath
{ unCaseSensitivePath :: CanonicalPath
} deriving stock (Show, Eq, Ord)
data CaseInsensitivePath = CaseInsensitivePath
{ unCaseInsensitivePath :: CanonicalPath
} deriving stock (Show)
instance Eq CaseInsensitivePath where
(CaseInsensitivePath p1) == (CaseInsensitivePath p2) =
on (==) (fmap C.toLower) p1 p2
instance Ord CaseInsensitivePath where
compare (CaseInsensitivePath p1) (CaseInsensitivePath p2) =
on compare (fmap C.toLower) p1 p2
instance Given ColorMode => Buildable RepoInfo where
build repoInfo
| Just scanned <- nonEmpty [(name, info) | (name, Scanned info) <- riFiles repoInfo]
= interpolateUnlinesF $ buildFileReport <$> scanned
where
buildFileReport :: (CanonicalPath, FileInfo) -> Builder
buildFileReport (name, info) =
[int||
#{ colorIfNeeded Cyan $ name }:
#{ interpolateIndentF 2 $ build info }
|]
build _ = "No scannable files found."

View File

@ -13,7 +13,6 @@ module Xrefcheck.Scan
, Extension
, ScanAction
, FormatsSupport
, RepoInfo (..)
, ReadDirectoryMode(..)
, ScanError (..)
, ScanErrorDescription (..)
@ -47,6 +46,7 @@ import Text.Regex.TDFA.Text qualified as R
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.RepoInfo
import Xrefcheck.System (RelGlobPattern, matchesGlobPatterns, normaliseGlobPattern, readingSystem)
import Xrefcheck.Util
@ -87,7 +87,7 @@ type FormatsSupport = Extension -> Maybe ScanAction
data ScanResult = ScanResult
{ srScanErrors :: [ScanError]
, srRepoInfo :: RepoInfo
} deriving stock (Show)
}
data ScanError = ScanError
{ sePosition :: Position
@ -189,8 +189,8 @@ readDirectoryWith mode config scanner root =
scanRepo
:: MonadIO m
=> ScanPolicy -> Rewrite -> FormatsSupport -> ExclusionConfig -> FilePath -> m ScanResult
scanRepo scanMode rw formatsSupport config root = do
=> ScanPolicy -> Rewrite -> FormatsSupport -> ExclusionConfig -> Flavor -> FilePath -> m ScanResult
scanRepo scanMode rw formatsSupport config flavor root = do
putTextRewrite rw "Scanning repository..."
when (not $ isDirectory root) $
@ -221,12 +221,10 @@ scanRepo scanMode rw formatsSupport config root = do
let trackedDirs = foldMap (getDirs . fst) processedFiles
untrackedDirs = foldMap (getDirs . fst) notProcessedFiles
return . ScanResult errs $ RepoInfo
{ riFiles = M.fromList $ processedFiles <> notProcessedFiles
, riDirectories = M.fromList
$ map (, TrackedDirectory) trackedDirs
<> map (, UntrackedDirectory) untrackedDirs
}
return . ScanResult errs $ mkRepoInfo
flavor
(processedFiles <> notProcessedFiles)
(map (, TrackedDirectory) trackedDirs <> map (, UntrackedDirectory) untrackedDirs)
where
mscanner :: FilePath -> Maybe ScanAction
mscanner = formatsSupport . takeExtension

View File

@ -57,8 +57,7 @@ import Network.HTTP.Req
defaultHttpConfig, ignoreResponse, req, runReq, useURI)
import Network.HTTP.Types.Header (hRetryAfter)
import Network.HTTP.Types.Status (Status, statusCode, statusMessage)
import System.FilePath.Posix
(equalFilePath, joinPath, makeRelative, normalise, splitDirectories, takeDirectory, (</>))
import System.FilePath.Posix (makeRelative, normalise, splitDirectories, takeDirectory, (</>))
import Text.Interpolation.Nyan
import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift)
import Text.Regex.TDFA.Text (Regex, regexec)
@ -74,6 +73,7 @@ import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Orphans ()
import Xrefcheck.Progress
import Xrefcheck.RepoInfo
import Xrefcheck.Scan
import Xrefcheck.Scanners.Markdown (MarkdownConfig (mcFlavor))
import Xrefcheck.System
@ -361,10 +361,10 @@ verifyRepo
config@Config{..}
mode
root
repoInfo'@(RepoInfo files _)
repoInfo
= do
let toScan = do
(file, fileInfo) <- M.toList files
(file, fileInfo) <- riFiles repoInfo
guard . not $ matchesGlobPatterns root (ecIgnoreRefsFrom cExclusions) file
case fileInfo of
Scanned fi -> do
@ -379,7 +379,7 @@ verifyRepo
accumulated <- loopAsyncUntil (printer progressRef) do
forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) ->
verifyReference config mode progressRef repoInfo' root file ref
verifyReference config mode progressRef repoInfo root file ref
case accumulated of
Right res -> return $ fold res
Left (exception, partialRes) -> do
@ -431,7 +431,7 @@ verifyReference
config@Config{..}
mode
progressRef
(RepoInfo files dirs)
repoInfo
root
fileWithReference
ref@Reference{..}
@ -545,22 +545,6 @@ verifyReference
Left TrackedDirectory -> pass -- path leads to directory, currently
-- if such link contain anchor, we ignore it
-- expands ".." and "."
-- expandIndirections "a/b/../c" = "a/c"
-- expandIndirections "a/b/c/../../d" = "a/d"
-- expandIndirections "../../a" = "../../a"
-- expandIndirections "a/./b" = "a/b"
-- expandIndirections "a/b/./../c" = "a/c"
expandIndirections :: FilePath -> FilePath
expandIndirections = joinPath . reverse . expand 0 . reverse . splitDirectories
where
expand :: Int -> [FilePath] -> [FilePath]
expand acc ("..":xs) = expand (acc+1) xs
expand acc (".":xs) = expand acc xs
expand 0 (x:xs) = x : expand 0 xs
expand acc (_:xs) = expand (acc-1) xs
expand acc [] = replicate acc ".."
checkReferredFileIsInsideRepo file = unless
(noNegativeNesting $ makeRelative root file) $
throwError (LocalFileOutsideRepo file)
@ -580,18 +564,9 @@ verifyReference
-- Returns `Nothing` when path corresponds to an existing (and tracked) directory
tryGetFileStatus :: FilePath -> ExceptT VerifyError IO (Either DirectoryStatus FileStatus)
tryGetFileStatus file
| Just f <- mFile = return $ Right f
| Just d <- mDir = return $ Left d
| Just f <- lookupFile file repoInfo = return $ Right f
| Just d <- lookupDirectory file repoInfo = return $ Left d
| otherwise = throwError (LocalFileDoesNotExist file)
where
matchesFilePath :: FilePath -> Bool
matchesFilePath = equalFilePath $ expandIndirections file
mFile :: Maybe FileStatus
mFile = (files M.!) <$> find matchesFilePath (M.keys files)
mDir :: Maybe DirectoryStatus
mDir = (dirs M.!) <$> find matchesFilePath (M.keys dirs)
checkAnchor file fileAnchors anchor = do
checkAnchorReferenceAmbiguity file fileAnchors anchor

View File

@ -39,7 +39,7 @@ test_ignoreRegex = give WithoutColors $
in testGroup "Regular expressions performance"
[ testCase "Check that only not matched links are verified" $ do
scanResult <- allowRewrite showProgressBar $ \rw ->
scanRepo OnlyTracked rw formats (config ^. cExclusionsL) root
scanRepo OnlyTracked rw formats (config ^. cExclusionsL) GitHub root
verifyRes <- allowRewrite showProgressBar $ \rw ->
verifyRepo rw config verifyMode root $ srRepoInfo scanResult

View File

@ -15,6 +15,7 @@ import Text.Interpolation.Nyan
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.RepoInfo
import Xrefcheck.Scan
import Xrefcheck.Scanners.Markdown
import Xrefcheck.Util
@ -27,9 +28,9 @@ test_slash = testGroup "Trailing forward slash detection" $
testCase ("All the files within the root \"" <>
root <>
"\" should exist") $ do
(ScanResult _ (RepoInfo repoInfo _)) <- allowRewrite False $ \rw ->
scanRepo OnlyTracked rw format (cExclusions config & ecIgnoreL .~ []) root
nonExistentFiles <- lefts <$> forM (keys repoInfo) (\filePath -> do
(ScanResult _ repoInfo) <- allowRewrite False $ \rw ->
scanRepo OnlyTracked rw format (cExclusions config & ecIgnoreL .~ []) GitHub root
nonExistentFiles <- lefts <$> forM (fst <$> riFiles repoInfo) (\filePath -> do
predicate <- doesFileExist filePath
return $ if predicate
then Right ()

View File

@ -12,7 +12,6 @@ module Test.Xrefcheck.UtilRequests
import Universum
import Control.Exception qualified as E
import Data.Map qualified as M
import Text.Interpolation.Nyan
import Control.Concurrent (forkIO, killThread)
@ -20,6 +19,7 @@ import Test.Tasty.HUnit (assertBool)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.RepoInfo
import Xrefcheck.Scan
import Xrefcheck.Util
import Xrefcheck.Verify
@ -72,4 +72,4 @@ verifyReferenceWithProgress :: Reference -> IORef VerifyProgress -> IO (VerifyRe
verifyReferenceWithProgress reference progRef = do
fmap wrlItem <$> verifyReference
(defConfig GitHub & cExclusionsL . ecIgnoreExternalRefsToL .~ []) FullMode
progRef (RepoInfo M.empty mempty) "." "" reference
progRef (mkRepoInfo GitHub mempty mempty) "." "" reference