mirror of
https://github.com/serokell/xrefcheck.git
synced 2024-08-16 09:00:45 +03:00
[#71] Separate concerns in Node traversal.
Problem: The tree traversal uses explicit recursion and does not-closely-unrelated stuff at once. Solution: Separate different actions.
This commit is contained in:
parent
92c3de5587
commit
d644a95734
@ -27,6 +27,7 @@ default-extensions:
|
||||
- DefaultSignatures
|
||||
- DeriveDataTypeable
|
||||
- DeriveGeneric
|
||||
- DerivingStrategies
|
||||
- FlexibleContexts
|
||||
- FlexibleInstances
|
||||
- FunctionalDependencies
|
||||
@ -67,6 +68,7 @@ dependencies:
|
||||
- deepseq
|
||||
- directory-tree
|
||||
- directory
|
||||
- dlist
|
||||
- filepath
|
||||
- file-embed
|
||||
- fmt
|
||||
|
@ -26,6 +26,8 @@ import Text.Numeral.Roman (toRoman)
|
||||
|
||||
import Xrefcheck.Progress
|
||||
import Xrefcheck.Util
|
||||
import Data.DList (DList)
|
||||
import qualified Data.DList as DList
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- Types
|
||||
@ -94,6 +96,21 @@ data Anchor = Anchor
|
||||
, aPos :: Position
|
||||
} deriving (Show, Eq, Generic)
|
||||
|
||||
data FileInfoDiff = FileInfoDiff
|
||||
{ _fidReferences :: DList Reference
|
||||
, _fidAnchors :: DList Anchor
|
||||
}
|
||||
makeLenses ''FileInfoDiff
|
||||
|
||||
diffToFileInfo :: FileInfoDiff -> FileInfo
|
||||
diffToFileInfo (FileInfoDiff refs anchors) = FileInfo (DList.toList refs) (DList.toList anchors)
|
||||
|
||||
instance Semigroup FileInfoDiff where
|
||||
FileInfoDiff a b <> FileInfoDiff c d = FileInfoDiff (a <> c) (b <> d)
|
||||
|
||||
instance Monoid FileInfoDiff where
|
||||
mempty = FileInfoDiff mempty mempty
|
||||
|
||||
-- | All information regarding a single file we care about.
|
||||
data FileInfo = FileInfo
|
||||
{ _fiReferences :: [Reference]
|
||||
@ -102,7 +119,7 @@ data FileInfo = FileInfo
|
||||
makeLenses ''FileInfo
|
||||
|
||||
instance Default FileInfo where
|
||||
def = FileInfo [] []
|
||||
def = diffToFileInfo mempty
|
||||
|
||||
newtype RepoInfo = RepoInfo (Map FilePath FileInfo)
|
||||
deriving (Show)
|
||||
|
@ -18,12 +18,9 @@ module Xrefcheck.Scanners.Markdown
|
||||
import Universum
|
||||
|
||||
import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode)
|
||||
import Control.Lens ((%=))
|
||||
import Control.Monad.Trans.Except (Except, runExcept, throwE)
|
||||
import Control.Monad.Except (MonadError, throwError)
|
||||
import Data.Aeson.TH (deriveFromJSON)
|
||||
import Data.ByteString.Lazy qualified as BSL
|
||||
import Data.Char (isSpace)
|
||||
import Data.Default (Default (..))
|
||||
import Data.Text qualified as T
|
||||
import Data.Text.Lazy qualified as LT
|
||||
import Fmt (Buildable (..), blockListF, nameF, (+|), (|+))
|
||||
@ -31,6 +28,7 @@ import Fmt (Buildable (..), blockListF, nameF, (+|), (|+))
|
||||
import Xrefcheck.Core
|
||||
import Xrefcheck.Scan
|
||||
import Xrefcheck.Util
|
||||
import Data.Default (def)
|
||||
|
||||
data MarkdownConfig = MarkdownConfig
|
||||
{ mcFlavor :: Flavor
|
||||
@ -57,9 +55,8 @@ toPosition = Position . \case
|
||||
startLine |+ ":" +| startColumn |+ " - " +|
|
||||
endLine |+ ":" +| endColumn |+ ""
|
||||
|
||||
nodeFlatten :: Node -> [NodeType]
|
||||
nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs
|
||||
|
||||
{- | Extract text from the topmost node.
|
||||
-}
|
||||
nodeExtractText :: Node -> Text
|
||||
nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten
|
||||
where
|
||||
@ -68,143 +65,153 @@ nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten
|
||||
CODE t -> t
|
||||
_ -> ""
|
||||
|
||||
nodeFlatten :: Node -> [NodeType]
|
||||
nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs
|
||||
|
||||
data IgnoreMode
|
||||
= Link
|
||||
| Paragraph
|
||||
| File
|
||||
| None
|
||||
deriving Eq
|
||||
|
||||
nodeExtractInfo :: MarkdownConfig -> Node -> Except Text FileInfo
|
||||
nodeExtractInfo config (Node _ _ docNodes) =
|
||||
if checkIgnoreFile docNodes
|
||||
then return def
|
||||
else finaliseFileInfo <$> extractionResult
|
||||
{- | A fold over a `Node`.
|
||||
-}
|
||||
cataNode :: (Maybe PosInfo -> NodeType -> [c] -> c) -> Node -> c
|
||||
cataNode f (Node pos ty subs) = f pos ty (cataNode f <$> subs)
|
||||
|
||||
{- | Remove nodes with accordance with global `MarkdownConfig` and local
|
||||
overrides.
|
||||
-}
|
||||
removeIgnored :: Node -> Either Text Node
|
||||
removeIgnored = runIdentity . runExceptT . flip evalStateT None . cataNode remove
|
||||
where
|
||||
extractionResult :: Except Text FileInfo
|
||||
extractionResult = execStateT (loop docNodes Nothing) def
|
||||
remove
|
||||
:: (MonadError Text m, MonadState IgnoreMode m)
|
||||
=> Maybe PosInfo
|
||||
-> NodeType
|
||||
-> [m Node]
|
||||
-> m Node
|
||||
remove pos ty subs = do
|
||||
mode <- get
|
||||
case (mode, ty) of
|
||||
(Paragraph, PARAGRAPH) -> put None $> defNode
|
||||
(Paragraph, x) -> throwError (makeError mode (prettyType x) pos)
|
||||
(File, _) -> throwError (makeError mode "" pos)
|
||||
(Link, LINK {}) -> put None $> defNode
|
||||
(Link, _) -> Node pos ty <$> sequence subs
|
||||
(None, _) -> do
|
||||
case getIgnoreMode (Node pos ty []) of
|
||||
Just mode' -> put mode' $> defNode
|
||||
Nothing -> Node pos ty <$> sequence subs
|
||||
|
||||
loop :: [Node] -> Maybe IgnoreMode -> StateT FileInfo (Except Text) ()
|
||||
loop [] _ = pass
|
||||
loop (node@(Node pos ty subs) : nodes) toIgnore
|
||||
| toIgnore == Just File = returnError toIgnore "" pos
|
||||
| toIgnore == Just Link = do
|
||||
let (Node startPos _ _) = maybe defNode id $ safeHead subs
|
||||
let mNext = case ty of
|
||||
PARAGRAPH -> afterIgnoredLink subs <> Just nodes
|
||||
TEXT txt | null (dropWhile isSpace $ T.unpack txt) -> afterIgnoredLink nodes
|
||||
SOFTBREAK -> afterIgnoredLink nodes
|
||||
_ -> afterIgnoredLink (node : nodes)
|
||||
case mNext of
|
||||
Just next -> loop next Nothing
|
||||
Nothing -> returnError toIgnore "" startPos
|
||||
| toIgnore == Just Paragraph =
|
||||
case ty of
|
||||
PARAGRAPH -> loop nodes Nothing
|
||||
_ -> returnError toIgnore (prettyType ty) pos
|
||||
| otherwise =
|
||||
case ty of
|
||||
HTML_BLOCK _ -> processHtmlNode node pos nodes toIgnore
|
||||
HEADING lvl -> do
|
||||
let aType = HeaderAnchor lvl
|
||||
let aName = headerToAnchor (mcFlavor config) $
|
||||
nodeExtractText node
|
||||
let aPos = toPosition pos
|
||||
fiAnchors %= (Anchor{..} :)
|
||||
loop (subs ++ nodes) toIgnore
|
||||
HTML_INLINE htmlText -> do
|
||||
let mName = T.stripSuffix "\">" =<< T.stripPrefix "<a name=\"" htmlText
|
||||
whenJust mName $ \aName -> do
|
||||
let aType = HandAnchor
|
||||
aPos = toPosition pos
|
||||
fiAnchors %= (Anchor{..} :)
|
||||
processHtmlNode node pos nodes toIgnore
|
||||
LINK url _ -> do
|
||||
let rName = nodeExtractText node
|
||||
rPos = toPosition pos
|
||||
link = if null url then rName else url
|
||||
let (rLink, rAnchor) = case T.splitOn "#" link of
|
||||
[t] -> (t, Nothing)
|
||||
t : ts -> (t, Just $ T.intercalate "#" ts)
|
||||
[] -> error "impossible"
|
||||
fiReferences %= (Reference{..} :)
|
||||
loop nodes toIgnore
|
||||
_ -> loop (subs ++ nodes) toIgnore
|
||||
prettyType :: NodeType -> Text
|
||||
prettyType ty =
|
||||
let mType = safeHead $ words $ show ty
|
||||
in fromMaybe "" mType
|
||||
|
||||
defNode :: Node
|
||||
defNode = Node Nothing DOCUMENT [] -- hard-coded default Node
|
||||
{- | Custom `foldMap` for source tree.
|
||||
-}
|
||||
foldNode :: (Monoid a, Monad m) => (Node -> m a) -> Node -> m a
|
||||
foldNode action node@(Node _ _ subs) = do
|
||||
a <- action node
|
||||
b <- foldM (fmap . (<>)) mempty (foldNode action <$> subs)
|
||||
return (a <> b)
|
||||
|
||||
getCommentContent :: Node -> Maybe Text
|
||||
getCommentContent node = do
|
||||
txt <- getHTMLText node
|
||||
T.stripSuffix "-->" =<< T.stripPrefix "<!--" (T.strip txt)
|
||||
where
|
||||
getHTMLText :: Node -> Maybe Text
|
||||
getHTMLText (Node _ (HTML_BLOCK txt) _) = Just txt
|
||||
getHTMLText (Node _ (HTML_INLINE txt) _) = Just txt
|
||||
getHTMLText _ = Nothing
|
||||
{- | Extract information from source tree.
|
||||
-}
|
||||
nodeExtractInfo
|
||||
:: ( MonadError Text m
|
||||
, MonadState IgnoreMode m
|
||||
, MonadReader MarkdownConfig m
|
||||
)
|
||||
=> Node
|
||||
-> m FileInfo
|
||||
nodeExtractInfo input@(Node _ _ nSubs) = do
|
||||
if checkIgnoreFile nSubs
|
||||
then return def
|
||||
else case removeIgnored input of
|
||||
Left err -> throwError err
|
||||
Right relevant ->
|
||||
diffToFileInfo <$> foldNode extractor relevant
|
||||
|
||||
getXrefcheckContent :: Node -> Maybe Text
|
||||
getXrefcheckContent node =
|
||||
let notStripped = T.stripPrefix "xrefcheck:" . T.strip =<<
|
||||
getCommentContent node
|
||||
in T.strip <$> notStripped
|
||||
where
|
||||
extractor node@(Node pos ty _) =
|
||||
case ty of
|
||||
HTML_BLOCK _ -> do
|
||||
return mempty
|
||||
|
||||
getIgnoreMode :: Node -> Maybe IgnoreMode
|
||||
getIgnoreMode node =
|
||||
let mContent = getXrefcheckContent node
|
||||
HEADING lvl -> do
|
||||
flavor <- asks mcFlavor
|
||||
let aType = HeaderAnchor lvl
|
||||
let aName = headerToAnchor flavor $ nodeExtractText node
|
||||
let aPos = toPosition pos
|
||||
return $ FileInfoDiff mempty $ pure $ Anchor {aType, aName, aPos}
|
||||
|
||||
textToMode :: [Text] -> Maybe IgnoreMode
|
||||
textToMode ("ignore" : [x])
|
||||
| x == "link" = return Link
|
||||
| x == "paragraph" = return Paragraph
|
||||
| x == "file" = return File
|
||||
| otherwise = Nothing
|
||||
textToMode _ = Nothing
|
||||
in textToMode . words =<< mContent
|
||||
HTML_INLINE text -> do
|
||||
let mName = T.stripSuffix "\">" =<< T.stripPrefix "<a name=\"" text
|
||||
case mName of
|
||||
Just aName -> do
|
||||
let aType = HandAnchor
|
||||
aPos = toPosition pos
|
||||
return $ FileInfoDiff
|
||||
mempty
|
||||
(pure $ Anchor {aType, aName, aPos})
|
||||
|
||||
Nothing -> do
|
||||
return mempty
|
||||
|
||||
LINK url _ -> do
|
||||
let rName = nodeExtractText node
|
||||
rPos = toPosition pos
|
||||
link = if null url then rName else url
|
||||
let (rLink, rAnchor) = case T.splitOn "#" link of
|
||||
[t] -> (t, Nothing)
|
||||
t : ts -> (t, Just $ T.intercalate "#" ts)
|
||||
[] -> error "impossible"
|
||||
return $ FileInfoDiff
|
||||
(pure $ Reference {rName, rPos, rLink, rAnchor})
|
||||
mempty
|
||||
|
||||
_ -> return mempty
|
||||
|
||||
checkIgnoreFile :: [Node] -> Bool
|
||||
checkIgnoreFile nodes =
|
||||
let isSimpleComment :: Node -> Bool
|
||||
isSimpleComment node = isComment node && not (isIgnoreFile node)
|
||||
|
||||
mIgnoreFile = safeHead $ dropWhile isSimpleComment nodes
|
||||
in maybe False isIgnoreFile mIgnoreFile
|
||||
where
|
||||
isComment :: Node -> Bool
|
||||
isComment = isJust . getCommentContent
|
||||
|
||||
isIgnoreFile :: Node -> Bool
|
||||
isIgnoreFile = (Just File ==) . getIgnoreMode
|
||||
|
||||
checkIgnoreFile :: [Node] -> Bool
|
||||
checkIgnoreFile nodes =
|
||||
let isSimpleComment :: Node -> Bool
|
||||
isSimpleComment node = isComment node && not (isIgnoreFile node)
|
||||
|
||||
mIgnoreFile = safeHead $ dropWhile isSimpleComment nodes
|
||||
in maybe False isIgnoreFile mIgnoreFile
|
||||
defNode :: Node
|
||||
defNode = Node Nothing DOCUMENT [] -- hard-coded default Node
|
||||
|
||||
isLink :: Node -> Bool
|
||||
isLink (Node _ (LINK _ _) _) = True
|
||||
isLink _ = False
|
||||
|
||||
isText :: Node -> Bool
|
||||
isText (Node _ (TEXT _) _) = True
|
||||
isText _ = False
|
||||
|
||||
afterIgnoredLink :: [Node] -> Maybe [Node]
|
||||
afterIgnoredLink (fNode : nodes)
|
||||
| isLink fNode = return nodes
|
||||
| sNode : nodes' <- nodes =
|
||||
if isText fNode && isLink sNode
|
||||
then return nodes'
|
||||
else Nothing
|
||||
| otherwise = Nothing
|
||||
afterIgnoredLink _ = Nothing
|
||||
|
||||
prettyPos :: Maybe PosInfo -> Text
|
||||
prettyPos pos =
|
||||
makeError
|
||||
:: IgnoreMode
|
||||
-> Text
|
||||
-> Maybe PosInfo
|
||||
-> Text
|
||||
makeError mode txt pos =
|
||||
let errMsg = case mode of
|
||||
Link -> linkMsg
|
||||
Paragraph -> paragraphMsg
|
||||
File -> fileMsg
|
||||
None -> unrecognisedMsg
|
||||
in errMsg <> posInfo
|
||||
where
|
||||
posInfo :: Text
|
||||
posInfo =
|
||||
let posToText :: Position -> Text
|
||||
posToText (Position mPos) = fromMaybe "" mPos
|
||||
in "(" <> posToText (toPosition pos) <> ")"
|
||||
|
||||
prettyType :: NodeType -> Text
|
||||
prettyType ty =
|
||||
let mType = safeHead $ words $ show ty
|
||||
in maybe "" id mType
|
||||
|
||||
fileMsg :: Text
|
||||
fileMsg =
|
||||
"\"ignore file\" must be at the top of \
|
||||
@ -213,52 +220,61 @@ nodeExtractInfo config (Node _ _ docNodes) =
|
||||
linkMsg :: Text
|
||||
linkMsg = "expected a LINK after \"ignore link\" "
|
||||
|
||||
paragraphMsg :: Text -> Text
|
||||
paragraphMsg txt = unwords
|
||||
paragraphMsg :: Text
|
||||
paragraphMsg = unwords
|
||||
[ "expected a PARAGRAPH after \
|
||||
\\"ignore paragraph\", but found"
|
||||
\\"ignore paragraph\", but found"
|
||||
, txt
|
||||
, ""
|
||||
]
|
||||
|
||||
unrecognisedMsg :: Text -> Text
|
||||
unrecognisedMsg txt = unwords
|
||||
unrecognisedMsg :: Text
|
||||
unrecognisedMsg = unwords
|
||||
[ "unrecognised option"
|
||||
, "\"" <> txt <> "\""
|
||||
, "perhaps you meant \
|
||||
\<\"ignore link\"|\"ignore paragraph\"|\"ignore file\"> "
|
||||
\<\"ignore link\"|\"ignore paragraph\"|\"ignore file\"> "
|
||||
]
|
||||
|
||||
returnError
|
||||
:: Maybe IgnoreMode
|
||||
-> Text
|
||||
-> Maybe PosInfo
|
||||
-> StateT FileInfo (Except Text) ()
|
||||
returnError mode txt pos =
|
||||
let errMsg = case mode of
|
||||
Just Link -> linkMsg
|
||||
Just Paragraph -> paragraphMsg txt
|
||||
Just File -> fileMsg
|
||||
Nothing -> unrecognisedMsg txt
|
||||
posInfo = prettyPos pos
|
||||
in lift $ throwE $ errMsg <> posInfo
|
||||
getCommentContent :: Node -> Maybe Text
|
||||
getCommentContent node = do
|
||||
txt <- getHTMLText node
|
||||
T.stripSuffix "-->" =<< T.stripPrefix "<!--" (T.strip txt)
|
||||
where
|
||||
getHTMLText :: Node -> Maybe Text
|
||||
getHTMLText (Node _ (HTML_BLOCK txt) _) = Just txt
|
||||
getHTMLText (Node _ (HTML_INLINE txt) _) = Just txt
|
||||
getHTMLText _ = Nothing
|
||||
|
||||
processHtmlNode
|
||||
:: Node
|
||||
-> Maybe PosInfo
|
||||
-> [Node]
|
||||
-> Maybe IgnoreMode
|
||||
-> StateT FileInfo (Except Text) ()
|
||||
processHtmlNode node pos nodes toIgnore = do
|
||||
let xrefcheckContent = getXrefcheckContent node
|
||||
case xrefcheckContent of
|
||||
Just content -> maybe (returnError Nothing content pos)
|
||||
(loop nodes . pure) $ getIgnoreMode node
|
||||
Nothing -> loop nodes toIgnore
|
||||
getXrefcheckContent :: Node -> Maybe Text
|
||||
getXrefcheckContent node =
|
||||
let notStripped = T.stripPrefix "xrefcheck:" . T.strip =<<
|
||||
getCommentContent node
|
||||
in T.strip <$> notStripped
|
||||
|
||||
getIgnoreMode :: Node -> Maybe IgnoreMode
|
||||
getIgnoreMode node =
|
||||
let mContent = getXrefcheckContent node
|
||||
|
||||
in textToMode . words =<< mContent
|
||||
|
||||
textToMode :: [Text] -> Maybe IgnoreMode
|
||||
textToMode ("ignore" : [x])
|
||||
| x == "link" = return Link
|
||||
| x == "paragraph" = return Paragraph
|
||||
| x == "file" = return File
|
||||
| otherwise = return None
|
||||
textToMode _ = Nothing
|
||||
|
||||
parseFileInfo :: MarkdownConfig -> LT.Text -> Either Text FileInfo
|
||||
parseFileInfo config input = runExcept $ nodeExtractInfo config $
|
||||
commonmarkToNode [] [] $ toStrict input
|
||||
parseFileInfo config input
|
||||
= runIdentity
|
||||
$ runExceptT
|
||||
$ flip runReaderT config
|
||||
$ flip evalStateT None
|
||||
$ nodeExtractInfo
|
||||
$ commonmarkToNode [] []
|
||||
$ toStrict input
|
||||
|
||||
markdownScanner :: MarkdownConfig -> ScanAction
|
||||
markdownScanner config path = do
|
||||
|
@ -18,7 +18,7 @@ parse :: Flavor -> FilePath -> IO (Either Text FileInfo)
|
||||
parse fl path =
|
||||
parseFileInfo MarkdownConfig { mcFlavor = fl } . decodeUtf8 <$> BSL.readFile path
|
||||
|
||||
getFI :: Flavor -> FilePath -> IO FileInfo
|
||||
getFI :: HasCallStack => Flavor -> FilePath -> IO FileInfo
|
||||
getFI fl path =
|
||||
let errOrFI = parse fl path
|
||||
in either error id <$> errOrFI
|
||||
|
Loading…
Reference in New Issue
Block a user