mirror of
https://github.com/serokell/xrefcheck.git
synced 2024-08-16 09:00:45 +03:00
[#135] Add support of right in-place annotations
Problem: It can be convenient not only specifying exclusions in the config, but also annotating the excluded thing right in-place. It is about adding comment before the link or the paragraph or even the whole file to ignore this item. Solution: Support annotation as html comment in a "<!-- xrefcheck: ignore mode -->" format, where mode is "link" or "paragraph" or "file". Modify `nodeExtractInfo` from `Xrefcheck.Scanners.Markdown` just to skip a node in AST while parsing. Take into account that "ignore file" can only be at the top of the file or right after the license. In markdown terms: either the first node must be HTML_BLOCK with "<!-- xrefcheck: ignore file -->" content or the first is HTML_BLOCK comment (smth between "<!--" and "-->") and the second is HTML_BLOCK with "ignore file" content. Also take into account that "ignore link" must be followed by a link. Strictly speaking, there is either LINK after "ignore link" or TEXT and LINK (if there is some text before the link).
This commit is contained in:
parent
40640124db
commit
ecff1dc342
@ -52,6 +52,7 @@ default-extensions:
|
||||
ghc-options:
|
||||
- -Wall
|
||||
- -Wincomplete-record-updates
|
||||
- -Wincomplete-uni-patterns
|
||||
|
||||
dependencies:
|
||||
- aeson
|
||||
@ -87,6 +88,7 @@ dependencies:
|
||||
- text-metrics
|
||||
- th-lift-instances
|
||||
- th-utilities
|
||||
- transformers
|
||||
- name: universum
|
||||
mixin: [(Universum as Prelude), (Universum.Unsafe as Unsafe)]
|
||||
- yaml
|
||||
|
@ -14,12 +14,13 @@ module Xrefcheck.Scanners.Markdown
|
||||
|
||||
import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode)
|
||||
import Control.Lens ((%=))
|
||||
import Control.Monad.Trans.Except (Except, runExcept, throwE)
|
||||
import qualified Data.ByteString.Lazy as BSL
|
||||
import Data.Char (isSpace)
|
||||
import Data.Default (Default (..))
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Lazy as LT
|
||||
import Fmt (Buildable (..), blockListF, nameF, (+|), (|+))
|
||||
import GHC.Conc (par)
|
||||
|
||||
import Xrefcheck.Core
|
||||
import Xrefcheck.Scan
|
||||
@ -49,54 +50,196 @@ nodeExtractText = mconcat . map extractText . nodeFlatten
|
||||
CODE t -> t
|
||||
_ -> ""
|
||||
|
||||
nodeExtractInfo :: Node -> ExceptT Text Identity FileInfo
|
||||
nodeExtractInfo docNode = fmap finaliseFileInfo $ execStateT (loop docNode) def
|
||||
where
|
||||
loop node@(Node pos ty subs) = case ty of
|
||||
DOCUMENT ->
|
||||
mapM_ loop subs
|
||||
PARAGRAPH ->
|
||||
mapM_ loop subs
|
||||
HEADING lvl ->
|
||||
let text = nodeExtractText node
|
||||
aType = HeaderAnchor lvl
|
||||
aName = headerToAnchor text
|
||||
aPos = toPosition pos
|
||||
in fiAnchors %= (Anchor{..} :)
|
||||
LIST _ ->
|
||||
mapM_ loop subs
|
||||
ITEM ->
|
||||
mapM_ loop subs
|
||||
HTML_INLINE htmlText -> do
|
||||
let mName = T.stripSuffix "\">" =<< T.stripPrefix "<a name=\"" htmlText
|
||||
whenJust mName $ \aName -> do
|
||||
let aType = HandAnchor
|
||||
aPos = toPosition pos
|
||||
fiAnchors %= (Anchor{..} :)
|
||||
LINK url _ -> do
|
||||
let rName = nodeExtractText node
|
||||
rPos = toPosition pos
|
||||
link = if null url then rName else url
|
||||
let (rLink, rAnchor) = case T.splitOn "#" link of
|
||||
[t] -> (t, Nothing)
|
||||
t : ts -> (t, Just $ T.intercalate "#" ts)
|
||||
[] -> error "impossible"
|
||||
fiReferences %= (Reference{..} :)
|
||||
_ -> pass
|
||||
data IgnoreMode
|
||||
= Link
|
||||
| Paragraph
|
||||
| File
|
||||
deriving Eq
|
||||
|
||||
parseFileInfo :: FilePath -> LT.Text -> FileInfo
|
||||
parseFileInfo path input =
|
||||
let outcome = runIdentity . runExceptT $
|
||||
nodeExtractInfo $ commonmarkToNode [] [] $ toStrict input
|
||||
in case outcome of
|
||||
Left err -> error $ "Failed to parse file " <> show path <>
|
||||
": " <> show err
|
||||
Right res -> res
|
||||
nodeExtractInfo :: Node -> Except Text FileInfo
|
||||
nodeExtractInfo (Node _ _ docNodes) =
|
||||
if checkIgnoreFile docNodes
|
||||
then return def
|
||||
else finaliseFileInfo <$> extractionResult
|
||||
where
|
||||
extractionResult :: Except Text FileInfo
|
||||
extractionResult =
|
||||
execStateT (loop docNodes Nothing) def
|
||||
|
||||
loop :: [Node] -> Maybe IgnoreMode -> StateT FileInfo (Except Text) ()
|
||||
loop [] _ = pass
|
||||
loop (node@(Node pos ty subs) : nodes) toIgnore
|
||||
| toIgnore == Just File = returnError toIgnore "" pos
|
||||
| toIgnore == Just Link = do
|
||||
let (Node startPos _ _) = maybe defNode id $ safeHead subs
|
||||
let mNext = case ty of
|
||||
PARAGRAPH -> afterIgnoredLink subs <> Just nodes
|
||||
TEXT txt | null (dropWhile isSpace $ T.unpack txt) -> afterIgnoredLink nodes
|
||||
SOFTBREAK -> afterIgnoredLink nodes
|
||||
_ -> afterIgnoredLink (node : nodes)
|
||||
case mNext of
|
||||
Just next -> loop next Nothing
|
||||
Nothing -> returnError toIgnore "" startPos
|
||||
| toIgnore == Just Paragraph =
|
||||
case ty of
|
||||
PARAGRAPH -> loop nodes Nothing
|
||||
_ -> returnError toIgnore (prettyType ty) pos
|
||||
| otherwise =
|
||||
case ty of
|
||||
HTML_BLOCK _ -> processHtmlNode node pos nodes toIgnore
|
||||
HEADING lvl -> do
|
||||
let aType = HeaderAnchor lvl
|
||||
let aName = headerToAnchor $ nodeExtractText node
|
||||
let aPos = toPosition pos
|
||||
fiAnchors %= (Anchor{..} :)
|
||||
loop nodes toIgnore
|
||||
HTML_INLINE htmlText -> do
|
||||
let mName = T.stripSuffix "\">" =<< T.stripPrefix "<a name=\"" htmlText
|
||||
whenJust mName $ \aName -> do
|
||||
let aType = HandAnchor
|
||||
aPos = toPosition pos
|
||||
fiAnchors %= (Anchor{..} :)
|
||||
processHtmlNode node pos nodes toIgnore
|
||||
LINK url _ -> do
|
||||
let rName = nodeExtractText node
|
||||
rPos = toPosition pos
|
||||
link = if null url then rName else url
|
||||
let (rLink, rAnchor) = case T.splitOn "#" link of
|
||||
[t] -> (t, Nothing)
|
||||
t : ts -> (t, Just $ T.intercalate "#" ts)
|
||||
[] -> error "impossible"
|
||||
fiReferences %= (Reference{..} :)
|
||||
loop nodes toIgnore
|
||||
_ -> loop (subs ++ nodes) toIgnore
|
||||
|
||||
defNode :: Node
|
||||
defNode = Node Nothing DOCUMENT [] -- hard-coded default Node
|
||||
|
||||
getCommentContent :: Node -> Maybe Text
|
||||
getCommentContent node = do
|
||||
txt <- getHTMLText node
|
||||
T.stripSuffix "-->" =<< T.stripPrefix "<!--" (T.strip txt)
|
||||
where
|
||||
getHTMLText :: Node -> Maybe Text
|
||||
getHTMLText (Node _ (HTML_BLOCK txt) _) = Just txt
|
||||
getHTMLText (Node _ (HTML_INLINE txt) _) = Just txt
|
||||
getHTMLText _ = Nothing
|
||||
|
||||
getXrefcheckContent :: Node -> Maybe Text
|
||||
getXrefcheckContent node =
|
||||
let notStripped = T.stripPrefix "xrefcheck:" . T.strip =<<
|
||||
getCommentContent node
|
||||
in T.strip <$> notStripped
|
||||
|
||||
getIgnoreMode :: Node -> Maybe IgnoreMode
|
||||
getIgnoreMode node =
|
||||
let mContent = getXrefcheckContent node
|
||||
|
||||
textToMode :: [Text] -> Maybe IgnoreMode
|
||||
textToMode ("ignore" : [x])
|
||||
| x == "link" = return Link
|
||||
| x == "paragraph" = return Paragraph
|
||||
| x == "file" = return File
|
||||
| otherwise = Nothing
|
||||
textToMode _ = Nothing
|
||||
in textToMode . words =<< mContent
|
||||
|
||||
isComment :: Node -> Bool
|
||||
isComment = isJust . getCommentContent
|
||||
|
||||
isIgnoreFile :: Node -> Bool
|
||||
isIgnoreFile = (Just File ==) . getIgnoreMode
|
||||
|
||||
checkIgnoreFile :: [Node] -> Bool
|
||||
checkIgnoreFile nodes =
|
||||
let isSimpleComment :: Node -> Bool
|
||||
isSimpleComment node = isComment node && not (isIgnoreFile node)
|
||||
|
||||
mIgnoreFile = safeHead $ dropWhile isSimpleComment nodes
|
||||
in maybe False isIgnoreFile mIgnoreFile
|
||||
|
||||
isLink :: Node -> Bool
|
||||
isLink (Node _ (LINK _ _) _) = True
|
||||
isLink _ = False
|
||||
|
||||
isText :: Node -> Bool
|
||||
isText (Node _ (TEXT _) _) = True
|
||||
isText _ = False
|
||||
|
||||
afterIgnoredLink :: [Node] -> Maybe [Node]
|
||||
afterIgnoredLink (fNode : nodes)
|
||||
| isLink fNode = return nodes
|
||||
| sNode : nodes' <- nodes =
|
||||
if isText fNode && isLink sNode
|
||||
then return nodes'
|
||||
else Nothing
|
||||
| otherwise = Nothing
|
||||
afterIgnoredLink _ = Nothing
|
||||
|
||||
prettyPos :: Maybe PosInfo -> Text
|
||||
prettyPos pos =
|
||||
let posToText :: Position -> Text
|
||||
posToText (Position mPos) = fromMaybe "" mPos
|
||||
in "(" <> (posToText $ toPosition pos) <> ")"
|
||||
|
||||
prettyType :: NodeType -> Text
|
||||
prettyType ty =
|
||||
let mType = safeHead $ words $ show ty
|
||||
in maybe "" id mType
|
||||
|
||||
fileMsg :: Text
|
||||
fileMsg =
|
||||
"\"ignore file\" must be at the top of \
|
||||
\markdown or right after comments at the top"
|
||||
|
||||
linkMsg :: Text
|
||||
linkMsg = "expected a LINK after \"ignore link\" "
|
||||
|
||||
paragraphMsg :: Text -> Text
|
||||
paragraphMsg txt = unwords
|
||||
[ "expected a PARAGRAPH after \
|
||||
\\"ignore paragraph\", but found"
|
||||
, txt
|
||||
, ""
|
||||
]
|
||||
|
||||
unrecognisedMsg :: Text -> Text
|
||||
unrecognisedMsg txt = unwords
|
||||
[ "unrecognised option"
|
||||
, "\"" <> txt <> "\""
|
||||
, "perhaps you meant \
|
||||
\<\"ignore link\"|\"ignore paragraph\"|\"ignore file\"> "
|
||||
]
|
||||
|
||||
returnError :: Maybe IgnoreMode -> Text -> Maybe PosInfo -> StateT FileInfo (Except Text) ()
|
||||
returnError mode txt pos =
|
||||
let errMsg = case mode of
|
||||
Just Link -> linkMsg
|
||||
Just Paragraph -> paragraphMsg txt
|
||||
Just File -> fileMsg
|
||||
Nothing -> unrecognisedMsg txt
|
||||
posInfo = prettyPos pos
|
||||
in lift $ throwE $ errMsg <> posInfo
|
||||
|
||||
processHtmlNode :: Node -> Maybe PosInfo -> [Node] -> Maybe IgnoreMode -> StateT FileInfo (Except Text) ()
|
||||
processHtmlNode node pos nodes toIgnore = do
|
||||
let xrefcheckContent = getXrefcheckContent node
|
||||
case xrefcheckContent of
|
||||
Just content -> maybe (returnError Nothing content pos)
|
||||
(loop nodes . pure) $ getIgnoreMode node
|
||||
Nothing -> loop nodes toIgnore
|
||||
|
||||
parseFileInfo :: LT.Text -> Either Text FileInfo
|
||||
parseFileInfo input = runExcept $ nodeExtractInfo $
|
||||
commonmarkToNode [] [] $ toStrict input
|
||||
|
||||
markdownScanner :: ScanAction
|
||||
markdownScanner path = liftIO $ do
|
||||
res <- parseFileInfo path . decodeUtf8 <$> BSL.readFile path
|
||||
force res `par` return res
|
||||
markdownScanner path = do
|
||||
errOrInfo <- parseFileInfo . decodeUtf8 <$> BSL.readFile path
|
||||
case errOrInfo of
|
||||
Left errTxt -> do
|
||||
die $ "Error when scanning " <> path <> ": " <> T.unpack errTxt
|
||||
Right fileInfo -> return fileInfo
|
||||
|
||||
markdownSupport :: ([Extension], ScanAction)
|
||||
markdownSupport = ([".md"], markdownScanner)
|
||||
|
Loading…
Reference in New Issue
Block a user