[#135] Add tests and markdowns to check ignoring regex performance

Problem: There are no tests checking ignoring
regex performance.

Solution: Add test checking that broken links
matched by regexs are not verified and
test checking that not matched broken links
are verified as links with error.
This commit is contained in:
Alyona Antonova 2020-12-28 15:44:41 +03:00
parent e22bb18c55
commit bc9e497efb
6 changed files with 121 additions and 2 deletions

View File

@ -74,6 +74,7 @@ dependencies:
- Glob
- http-client
- http-types
- HUnit
- lens
- pretty-terminal
- modern-uri

View File

@ -7,6 +7,7 @@
module Xrefcheck.Config where
import Control.Lens (makeLensesWith)
import Data.Aeson.Options (defaultOptions)
import Data.Aeson.TH (deriveFromJSON)
import Data.Yaml (FromJSON (..), decodeEither', prettyPrintParseException, withText)
@ -21,6 +22,7 @@ import Data.FileEmbed (embedFile)
import Time (KnownRatName, Second, Time, unitsP)
import Xrefcheck.System (RelGlobPattern)
import Xrefcheck.Util (postfixFields)
-- | Overall config.
data Config = Config
@ -46,6 +48,9 @@ data VerifyConfig = VerifyConfig
-- ^ Regular expressions that match external references we should not verify.
}
makeLensesWith postfixFields ''Config
makeLensesWith postfixFields ''VerifyConfig
-----------------------------------------------------------
-- Default config
-----------------------------------------------------------
@ -93,7 +98,6 @@ defaultCompOption =
, lastStarGreedy = False
}
-- Default boolean value according to
-- https://hackage.haskell.org/package/regex-tdfa-1.3.1.0/docs/Text-Regex-TDFA.html#t:ExecOption
-- ExecOption value to improve speed
defaultExecOption :: ExecOption
defaultExecOption = ExecOption {captureGroups = False}

View File

@ -8,8 +8,10 @@
module Xrefcheck.Util
( nameF'
, paren
, postfixFields
) where
import Control.Lens (LensRules, lensField, lensRules, mappingNamer)
import Fmt (Builder, build, fmt, nameF)
import System.Console.Pretty (Pretty (..), Style (Faint))
@ -24,3 +26,6 @@ paren :: Builder -> Builder
paren a
| a == "" = ""
| otherwise = "(" <> a <> ")"
postfixFields :: LensRules
postfixFields = lensRules & lensField .~ mappingNamer (\n -> [n ++ "L"])

View File

@ -0,0 +1,84 @@
{- SPDX-FileCopyrightText: 2019 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}
module Test.Xrefcheck.IgnoreRegexSpec where
import Data.Yaml (decodeEither')
import Test.Hspec (Spec, describe, it)
import Test.HUnit (assertFailure)
import Text.Regex.TDFA (Regex)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress (allowRewrite)
import Xrefcheck.Scan (gatherRepoInfo, specificFormatsSupport)
import Xrefcheck.Scanners.Markdown
import Xrefcheck.Verify
(VerifyError, VerifyResult, WithReferenceLoc(..), verifyErrors, verifyRepo)
spec :: Spec
spec = do
describe "Regular expressions performance" $ do
let root = "tests/markdowns/without-annotations"
let showProgressBar = False
let formats = specificFormatsSupport [markdownSupport]
let verifyMode = ExternalOnlyMode
let linksTxt =
[ "https://bad.((external.)?)reference(/?)"
, "https://bad.reference.(org|com)"
]
let regexs = linksToRegexs linksTxt
let config = setIgnoreRefs regexs defConfig
it "Check that only not matched links are verified" $ do
repoInfo <- allowRewrite showProgressBar $ \rw ->
gatherRepoInfo rw formats (config ^. cTraversalL) root
verifyRes <- allowRewrite showProgressBar $ \rw ->
verifyRepo rw (config ^. cVerificationL) verifyMode root repoInfo
let brokenLinks = pickBrokenLinks verifyRes
let matchedLinks =
[ "https://bad.referenc/"
, "https://bad.reference"
, "https://bad.external.reference/"
, "https://bad.external.reference"
, "https://bad.reference.org"
, "https://bad.reference.com"
]
let notMatchedLinks =
[ "https://non-existent.reference/"
, "https://bad.externall.reference"
, "https://bad.reference.io"
]
forM_ matchedLinks $ \link -> do
when (link `elem` brokenLinks) $
assertFailure $ "Link \"" <> show link <>
"\" is considered as broken but it should be ignored"
forM_ notMatchedLinks $ \link -> do
when (link `notElem` brokenLinks) $
assertFailure $ "Link \"" <> show link <>
"\" is not considered as broken but it is (and shouldn't be ignored)"
where
pickBrokenLinks :: VerifyResult (WithReferenceLoc VerifyError) -> [Text]
pickBrokenLinks verifyRes =
case verifyErrors verifyRes of
Just neWithRefLoc -> map (rLink . wrlReference) $ toList neWithRefLoc
Nothing -> []
linksToRegexs :: [Text] -> Maybe [Regex]
linksToRegexs links =
let errOrRegexs = map (decodeEither' . encodeUtf8) links
maybeRegexs = map (either (error . show) Just) errOrRegexs
in sequence maybeRegexs
setIgnoreRefs :: Maybe [Regex] -> Config -> Config
setIgnoreRefs regexs = (cVerificationL . vcIgnoreRefsL) .~ regexs

View File

@ -0,0 +1,11 @@
<!--
- SPDX-FileCopyrightText: 2018-2019 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-->
[Non-existent reference](https://non-existent.reference/)
[Bad externall reference](https://bad.externall.reference)
[Bad 'io' reference](https://bad.reference.io)

View File

@ -0,0 +1,14 @@
<!--
- SPDX-FileCopyrightText: 2018-2019 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-->
[Bad reference with '/'](https://bad.reference/)
[Bad reference without '/'](https://bad.reference)
[Bad external reference with '/'](https://bad.external.reference/)
[Bad external reference without '/'](https://bad.external.reference)
[Bad 'org' reference](https://bad.reference.org)
[Bad 'com' reference](https://bad.reference.com)