[#135] Add support of right in-place annotations

Problem: It can be convenient not only specifying
exclusions in the config, but also annotating
the excluded thing right in-place. It is about
adding comment before the link or the paragraph
or even the whole file to ignore this item.

Solution: Support annotation as html comment in
a "<!-- xrefcheck: ignore mode -->" format, where
mode is "link" or "paragraph" or "file". Modify
`nodeExtractInfo` from `Xrefcheck.Scanners.Markdown`
just to skip a node in AST while parsing. Take into
account that "ignore file" can only be at the top
of the file or right after the license. In markdown
terms: either the first node must be HTML_BLOCK
with "<!-- xrefcheck: ignore file -->" content
or the first is HTML_BLOCK comment (smth
between "<!--" and "-->") and the second is
HTML_BLOCK with "ignore file" content.
Also take into account that "ignore link" must be
followed by a link. Strictly speaking, there is
either LINK after "ignore link" or TEXT and LINK
(if there is some text before the link).
This commit is contained in:
Alyona Antonova 2020-11-10 23:38:28 +03:00
parent 40640124db
commit ecff1dc342
2 changed files with 191 additions and 46 deletions

View File

@ -52,6 +52,7 @@ default-extensions:
ghc-options:
- -Wall
- -Wincomplete-record-updates
- -Wincomplete-uni-patterns
dependencies:
- aeson
@ -87,6 +88,7 @@ dependencies:
- text-metrics
- th-lift-instances
- th-utilities
- transformers
- name: universum
mixin: [(Universum as Prelude), (Universum.Unsafe as Unsafe)]
- yaml

View File

@ -14,12 +14,13 @@ module Xrefcheck.Scanners.Markdown
import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode)
import Control.Lens ((%=))
import Control.Monad.Trans.Except (Except, runExcept, throwE)
import qualified Data.ByteString.Lazy as BSL
import Data.Char (isSpace)
import Data.Default (Default (..))
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import Fmt (Buildable (..), blockListF, nameF, (+|), (|+))
import GHC.Conc (par)
import Xrefcheck.Core
import Xrefcheck.Scan
@ -49,54 +50,196 @@ nodeExtractText = mconcat . map extractText . nodeFlatten
CODE t -> t
_ -> ""
nodeExtractInfo :: Node -> ExceptT Text Identity FileInfo
nodeExtractInfo docNode = fmap finaliseFileInfo $ execStateT (loop docNode) def
where
loop node@(Node pos ty subs) = case ty of
DOCUMENT ->
mapM_ loop subs
PARAGRAPH ->
mapM_ loop subs
HEADING lvl ->
let text = nodeExtractText node
aType = HeaderAnchor lvl
aName = headerToAnchor text
aPos = toPosition pos
in fiAnchors %= (Anchor{..} :)
LIST _ ->
mapM_ loop subs
ITEM ->
mapM_ loop subs
HTML_INLINE htmlText -> do
let mName = T.stripSuffix "\">" =<< T.stripPrefix "<a name=\"" htmlText
whenJust mName $ \aName -> do
let aType = HandAnchor
aPos = toPosition pos
fiAnchors %= (Anchor{..} :)
LINK url _ -> do
let rName = nodeExtractText node
rPos = toPosition pos
link = if null url then rName else url
let (rLink, rAnchor) = case T.splitOn "#" link of
[t] -> (t, Nothing)
t : ts -> (t, Just $ T.intercalate "#" ts)
[] -> error "impossible"
fiReferences %= (Reference{..} :)
_ -> pass
data IgnoreMode
= Link
| Paragraph
| File
deriving Eq
parseFileInfo :: FilePath -> LT.Text -> FileInfo
parseFileInfo path input =
let outcome = runIdentity . runExceptT $
nodeExtractInfo $ commonmarkToNode [] [] $ toStrict input
in case outcome of
Left err -> error $ "Failed to parse file " <> show path <>
": " <> show err
Right res -> res
nodeExtractInfo :: Node -> Except Text FileInfo
nodeExtractInfo (Node _ _ docNodes) =
if checkIgnoreFile docNodes
then return def
else finaliseFileInfo <$> extractionResult
where
extractionResult :: Except Text FileInfo
extractionResult =
execStateT (loop docNodes Nothing) def
loop :: [Node] -> Maybe IgnoreMode -> StateT FileInfo (Except Text) ()
loop [] _ = pass
loop (node@(Node pos ty subs) : nodes) toIgnore
| toIgnore == Just File = returnError toIgnore "" pos
| toIgnore == Just Link = do
let (Node startPos _ _) = maybe defNode id $ safeHead subs
let mNext = case ty of
PARAGRAPH -> afterIgnoredLink subs <> Just nodes
TEXT txt | null (dropWhile isSpace $ T.unpack txt) -> afterIgnoredLink nodes
SOFTBREAK -> afterIgnoredLink nodes
_ -> afterIgnoredLink (node : nodes)
case mNext of
Just next -> loop next Nothing
Nothing -> returnError toIgnore "" startPos
| toIgnore == Just Paragraph =
case ty of
PARAGRAPH -> loop nodes Nothing
_ -> returnError toIgnore (prettyType ty) pos
| otherwise =
case ty of
HTML_BLOCK _ -> processHtmlNode node pos nodes toIgnore
HEADING lvl -> do
let aType = HeaderAnchor lvl
let aName = headerToAnchor $ nodeExtractText node
let aPos = toPosition pos
fiAnchors %= (Anchor{..} :)
loop nodes toIgnore
HTML_INLINE htmlText -> do
let mName = T.stripSuffix "\">" =<< T.stripPrefix "<a name=\"" htmlText
whenJust mName $ \aName -> do
let aType = HandAnchor
aPos = toPosition pos
fiAnchors %= (Anchor{..} :)
processHtmlNode node pos nodes toIgnore
LINK url _ -> do
let rName = nodeExtractText node
rPos = toPosition pos
link = if null url then rName else url
let (rLink, rAnchor) = case T.splitOn "#" link of
[t] -> (t, Nothing)
t : ts -> (t, Just $ T.intercalate "#" ts)
[] -> error "impossible"
fiReferences %= (Reference{..} :)
loop nodes toIgnore
_ -> loop (subs ++ nodes) toIgnore
defNode :: Node
defNode = Node Nothing DOCUMENT [] -- hard-coded default Node
getCommentContent :: Node -> Maybe Text
getCommentContent node = do
txt <- getHTMLText node
T.stripSuffix "-->" =<< T.stripPrefix "<!--" (T.strip txt)
where
getHTMLText :: Node -> Maybe Text
getHTMLText (Node _ (HTML_BLOCK txt) _) = Just txt
getHTMLText (Node _ (HTML_INLINE txt) _) = Just txt
getHTMLText _ = Nothing
getXrefcheckContent :: Node -> Maybe Text
getXrefcheckContent node =
let notStripped = T.stripPrefix "xrefcheck:" . T.strip =<<
getCommentContent node
in T.strip <$> notStripped
getIgnoreMode :: Node -> Maybe IgnoreMode
getIgnoreMode node =
let mContent = getXrefcheckContent node
textToMode :: [Text] -> Maybe IgnoreMode
textToMode ("ignore" : [x])
| x == "link" = return Link
| x == "paragraph" = return Paragraph
| x == "file" = return File
| otherwise = Nothing
textToMode _ = Nothing
in textToMode . words =<< mContent
isComment :: Node -> Bool
isComment = isJust . getCommentContent
isIgnoreFile :: Node -> Bool
isIgnoreFile = (Just File ==) . getIgnoreMode
checkIgnoreFile :: [Node] -> Bool
checkIgnoreFile nodes =
let isSimpleComment :: Node -> Bool
isSimpleComment node = isComment node && not (isIgnoreFile node)
mIgnoreFile = safeHead $ dropWhile isSimpleComment nodes
in maybe False isIgnoreFile mIgnoreFile
isLink :: Node -> Bool
isLink (Node _ (LINK _ _) _) = True
isLink _ = False
isText :: Node -> Bool
isText (Node _ (TEXT _) _) = True
isText _ = False
afterIgnoredLink :: [Node] -> Maybe [Node]
afterIgnoredLink (fNode : nodes)
| isLink fNode = return nodes
| sNode : nodes' <- nodes =
if isText fNode && isLink sNode
then return nodes'
else Nothing
| otherwise = Nothing
afterIgnoredLink _ = Nothing
prettyPos :: Maybe PosInfo -> Text
prettyPos pos =
let posToText :: Position -> Text
posToText (Position mPos) = fromMaybe "" mPos
in "(" <> (posToText $ toPosition pos) <> ")"
prettyType :: NodeType -> Text
prettyType ty =
let mType = safeHead $ words $ show ty
in maybe "" id mType
fileMsg :: Text
fileMsg =
"\"ignore file\" must be at the top of \
\markdown or right after comments at the top"
linkMsg :: Text
linkMsg = "expected a LINK after \"ignore link\" "
paragraphMsg :: Text -> Text
paragraphMsg txt = unwords
[ "expected a PARAGRAPH after \
\\"ignore paragraph\", but found"
, txt
, ""
]
unrecognisedMsg :: Text -> Text
unrecognisedMsg txt = unwords
[ "unrecognised option"
, "\"" <> txt <> "\""
, "perhaps you meant \
\<\"ignore link\"|\"ignore paragraph\"|\"ignore file\"> "
]
returnError :: Maybe IgnoreMode -> Text -> Maybe PosInfo -> StateT FileInfo (Except Text) ()
returnError mode txt pos =
let errMsg = case mode of
Just Link -> linkMsg
Just Paragraph -> paragraphMsg txt
Just File -> fileMsg
Nothing -> unrecognisedMsg txt
posInfo = prettyPos pos
in lift $ throwE $ errMsg <> posInfo
processHtmlNode :: Node -> Maybe PosInfo -> [Node] -> Maybe IgnoreMode -> StateT FileInfo (Except Text) ()
processHtmlNode node pos nodes toIgnore = do
let xrefcheckContent = getXrefcheckContent node
case xrefcheckContent of
Just content -> maybe (returnError Nothing content pos)
(loop nodes . pure) $ getIgnoreMode node
Nothing -> loop nodes toIgnore
parseFileInfo :: LT.Text -> Either Text FileInfo
parseFileInfo input = runExcept $ nodeExtractInfo $
commonmarkToNode [] [] $ toStrict input
markdownScanner :: ScanAction
markdownScanner path = liftIO $ do
res <- parseFileInfo path . decodeUtf8 <$> BSL.readFile path
force res `par` return res
markdownScanner path = do
errOrInfo <- parseFileInfo . decodeUtf8 <$> BSL.readFile path
case errOrInfo of
Left errTxt -> do
die $ "Error when scanning " <> path <> ": " <> T.unpack errTxt
Right fileInfo -> return fileInfo
markdownSupport :: ([Extension], ScanAction)
markdownSupport = ([".md"], markdownScanner)