diff --git a/package.yaml b/package.yaml index 18745b6..85942fe 100644 --- a/package.yaml +++ b/package.yaml @@ -27,6 +27,7 @@ default-extensions: - DefaultSignatures - DeriveDataTypeable - DeriveGeneric + - DerivingStrategies - FlexibleContexts - FlexibleInstances - FunctionalDependencies @@ -67,6 +68,7 @@ dependencies: - deepseq - directory-tree - directory + - dlist - filepath - file-embed - fmt diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 08f3d6a..19cbabc 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -26,6 +26,8 @@ import Text.Numeral.Roman (toRoman) import Xrefcheck.Progress import Xrefcheck.Util +import Data.DList (DList) +import qualified Data.DList as DList ----------------------------------------------------------- -- Types @@ -94,6 +96,21 @@ data Anchor = Anchor , aPos :: Position } deriving (Show, Eq, Generic) +data FileInfoDiff = FileInfoDiff + { _fidReferences :: DList Reference + , _fidAnchors :: DList Anchor + } +makeLenses ''FileInfoDiff + +diffToFileInfo :: FileInfoDiff -> FileInfo +diffToFileInfo (FileInfoDiff refs anchors) = FileInfo (DList.toList refs) (DList.toList anchors) + +instance Semigroup FileInfoDiff where + FileInfoDiff a b <> FileInfoDiff c d = FileInfoDiff (a <> c) (b <> d) + +instance Monoid FileInfoDiff where + mempty = FileInfoDiff mempty mempty + -- | All information regarding a single file we care about. data FileInfo = FileInfo { _fiReferences :: [Reference] @@ -102,7 +119,7 @@ data FileInfo = FileInfo makeLenses ''FileInfo instance Default FileInfo where - def = FileInfo [] [] + def = diffToFileInfo mempty newtype RepoInfo = RepoInfo (Map FilePath FileInfo) deriving (Show) diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index e06b3d4..94d0e64 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -18,12 +18,9 @@ module Xrefcheck.Scanners.Markdown import Universum import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode) -import Control.Lens ((%=)) -import Control.Monad.Trans.Except (Except, runExcept, throwE) +import Control.Monad.Except (MonadError, throwError) import Data.Aeson.TH (deriveFromJSON) import Data.ByteString.Lazy qualified as BSL -import Data.Char (isSpace) -import Data.Default (Default (..)) import Data.Text qualified as T import Data.Text.Lazy qualified as LT import Fmt (Buildable (..), blockListF, nameF, (+|), (|+)) @@ -31,6 +28,7 @@ import Fmt (Buildable (..), blockListF, nameF, (+|), (|+)) import Xrefcheck.Core import Xrefcheck.Scan import Xrefcheck.Util +import Data.Default (def) data MarkdownConfig = MarkdownConfig { mcFlavor :: Flavor @@ -57,9 +55,8 @@ toPosition = Position . \case startLine |+ ":" +| startColumn |+ " - " +| endLine |+ ":" +| endColumn |+ "" -nodeFlatten :: Node -> [NodeType] -nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs - +{- | Extract text from the topmost node. +-} nodeExtractText :: Node -> Text nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten where @@ -68,143 +65,153 @@ nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten CODE t -> t _ -> "" + nodeFlatten :: Node -> [NodeType] + nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs + data IgnoreMode = Link | Paragraph | File + | None deriving Eq -nodeExtractInfo :: MarkdownConfig -> Node -> Except Text FileInfo -nodeExtractInfo config (Node _ _ docNodes) = - if checkIgnoreFile docNodes - then return def - else finaliseFileInfo <$> extractionResult +{- | A fold over a `Node`. +-} +cataNode :: (Maybe PosInfo -> NodeType -> [c] -> c) -> Node -> c +cataNode f (Node pos ty subs) = f pos ty (cataNode f <$> subs) + +{- | Remove nodes with accordance with global `MarkdownConfig` and local + overrides. +-} +removeIgnored :: Node -> Either Text Node +removeIgnored = runIdentity . runExceptT . flip evalStateT None . cataNode remove where - extractionResult :: Except Text FileInfo - extractionResult = execStateT (loop docNodes Nothing) def + remove + :: (MonadError Text m, MonadState IgnoreMode m) + => Maybe PosInfo + -> NodeType + -> [m Node] + -> m Node + remove pos ty subs = do + mode <- get + case (mode, ty) of + (Paragraph, PARAGRAPH) -> put None $> defNode + (Paragraph, x) -> throwError (makeError mode (prettyType x) pos) + (File, _) -> throwError (makeError mode "" pos) + (Link, LINK {}) -> put None $> defNode + (Link, _) -> Node pos ty <$> sequence subs + (None, _) -> do + case getIgnoreMode (Node pos ty []) of + Just mode' -> put mode' $> defNode + Nothing -> Node pos ty <$> sequence subs - loop :: [Node] -> Maybe IgnoreMode -> StateT FileInfo (Except Text) () - loop [] _ = pass - loop (node@(Node pos ty subs) : nodes) toIgnore - | toIgnore == Just File = returnError toIgnore "" pos - | toIgnore == Just Link = do - let (Node startPos _ _) = maybe defNode id $ safeHead subs - let mNext = case ty of - PARAGRAPH -> afterIgnoredLink subs <> Just nodes - TEXT txt | null (dropWhile isSpace $ T.unpack txt) -> afterIgnoredLink nodes - SOFTBREAK -> afterIgnoredLink nodes - _ -> afterIgnoredLink (node : nodes) - case mNext of - Just next -> loop next Nothing - Nothing -> returnError toIgnore "" startPos - | toIgnore == Just Paragraph = - case ty of - PARAGRAPH -> loop nodes Nothing - _ -> returnError toIgnore (prettyType ty) pos - | otherwise = - case ty of - HTML_BLOCK _ -> processHtmlNode node pos nodes toIgnore - HEADING lvl -> do - let aType = HeaderAnchor lvl - let aName = headerToAnchor (mcFlavor config) $ - nodeExtractText node - let aPos = toPosition pos - fiAnchors %= (Anchor{..} :) - loop (subs ++ nodes) toIgnore - HTML_INLINE htmlText -> do - let mName = T.stripSuffix "\">" =<< T.stripPrefix " do - let aType = HandAnchor - aPos = toPosition pos - fiAnchors %= (Anchor{..} :) - processHtmlNode node pos nodes toIgnore - LINK url _ -> do - let rName = nodeExtractText node - rPos = toPosition pos - link = if null url then rName else url - let (rLink, rAnchor) = case T.splitOn "#" link of - [t] -> (t, Nothing) - t : ts -> (t, Just $ T.intercalate "#" ts) - [] -> error "impossible" - fiReferences %= (Reference{..} :) - loop nodes toIgnore - _ -> loop (subs ++ nodes) toIgnore + prettyType :: NodeType -> Text + prettyType ty = + let mType = safeHead $ words $ show ty + in fromMaybe "" mType - defNode :: Node - defNode = Node Nothing DOCUMENT [] -- hard-coded default Node +{- | Custom `foldMap` for source tree. +-} +foldNode :: (Monoid a, Monad m) => (Node -> m a) -> Node -> m a +foldNode action node@(Node _ _ subs) = do + a <- action node + b <- foldM (fmap . (<>)) mempty (foldNode action <$> subs) + return (a <> b) - getCommentContent :: Node -> Maybe Text - getCommentContent node = do - txt <- getHTMLText node - T.stripSuffix "-->" =<< T.stripPrefix "" =<< T.stripPrefix "