1
1
mirror of https://github.com/aelve/guide.git synced 2024-11-27 10:10:50 +03:00
guide/lib/Markdown.hs

350 lines
10 KiB
Haskell
Raw Normal View History

2016-03-11 14:58:11 +03:00
{-# LANGUAGE
OverloadedStrings,
2016-04-16 02:02:43 +03:00
FlexibleInstances,
FlexibleContexts,
2016-03-11 14:58:11 +03:00
NoImplicitPrelude
#-}
module Markdown
(
2016-04-16 02:02:43 +03:00
-- * Types
MarkdownInline(..),
MarkdownBlock(..),
MarkdownBlockWithTOC(..),
2016-04-16 02:02:43 +03:00
-- * Lenses
mdHtml,
mdText,
mdMarkdown,
mdIdPrefix,
2016-07-24 13:12:17 +03:00
mdTree,
mdTOC,
2016-04-16 02:02:43 +03:00
2016-07-24 13:12:17 +03:00
-- * Converting text to Markdown
toMarkdownInline,
toMarkdownBlock,
toMarkdownBlockWithTOC,
2016-04-16 02:02:43 +03:00
-- * Misc
2016-07-24 13:12:17 +03:00
renderMD,
markdownNull,
2016-03-11 14:58:11 +03:00
)
where
import BasePrelude hiding (Space)
2016-04-16 02:02:43 +03:00
-- Lenses
import Lens.Micro.Platform hiding ((&))
2016-03-11 14:58:11 +03:00
-- Monad transformers and monads
import Control.Monad.State
2016-03-11 14:58:11 +03:00
-- Text
2016-06-12 22:35:13 +03:00
import qualified Data.Text.All as T
import Data.Text.All (Text)
-- ByteString
import qualified Data.ByteString.Lazy as BSL
2016-07-24 13:12:17 +03:00
import qualified Data.ByteString as BS
import Data.ByteString (ByteString)
2016-03-11 14:58:11 +03:00
-- Parsing
import Text.Megaparsec hiding (State)
2016-07-27 16:31:55 +03:00
import Text.Megaparsec.Text
-- JSON
import qualified Data.Aeson as A
2016-03-11 14:58:11 +03:00
-- HTML
import Lucid
2016-07-24 13:12:17 +03:00
import Text.HTML.SanitizeXSS
-- Containers
2016-04-16 02:02:43 +03:00
import Data.Tree
import qualified Data.Set as S
import Data.Set (Set)
2016-03-11 14:58:11 +03:00
-- Markdown
2016-07-24 13:12:17 +03:00
import CMark hiding (Node)
import qualified CMark as MD
import CMark.Highlight
import CMark.Sections
2016-03-11 14:58:11 +03:00
import ShortcutLinks
import ShortcutLinks.All (hackage)
-- acid-state
import Data.SafeCopy
2016-03-11 14:58:11 +03:00
-- Local
import Utils
2016-03-11 14:58:11 +03:00
2016-04-16 02:02:43 +03:00
data MarkdownInline = MarkdownInline {
markdownInlineMdText :: Text,
2016-07-24 13:12:17 +03:00
markdownInlineMdHtml :: ByteString,
markdownInlineMdMarkdown :: ![MD.Node] }
2016-04-16 02:02:43 +03:00
data MarkdownBlock = MarkdownBlock {
markdownBlockMdText :: Text,
2016-07-24 13:12:17 +03:00
markdownBlockMdHtml :: ByteString,
markdownBlockMdMarkdown :: ![MD.Node] }
2016-04-16 02:02:43 +03:00
data MarkdownBlockWithTOC = MarkdownBlockWithTOC {
markdownBlockWithTOCMdText :: Text,
2016-07-24 13:12:17 +03:00
markdownBlockWithTOCMdTree :: !(Document Text ByteString),
markdownBlockWithTOCMdIdPrefix :: Text,
2016-07-24 13:12:17 +03:00
markdownBlockWithTOCMdTOC :: Forest ([MD.Node], Text) }
2016-04-16 02:02:43 +03:00
makeFields ''MarkdownInline
makeFields ''MarkdownBlock
makeFields ''MarkdownBlockWithTOC
2016-07-24 13:12:17 +03:00
parseMD :: Text -> [MD.Node]
parseMD s =
let MD.Node _ DOCUMENT ns =
highlightNode . shortcutLinks . commonmarkToNode [optSafe] $ s
in ns
renderMD :: [MD.Node] -> ByteString
renderMD ns
-- See https://github.com/jgm/cmark/issues/147
| any isInlineNode ns =
T.encodeUtf8 . sanitize . T.concat . map (nodeToHtml []) $ ns
| otherwise =
T.encodeUtf8 . sanitize . nodeToHtml [] $ MD.Node Nothing DOCUMENT ns
isInlineNode :: MD.Node -> Bool
isInlineNode (MD.Node _ tp _) = case tp of
EMPH -> True
STRONG -> True
LINK _ _ -> True
CUSTOM_INLINE _ _ -> True
SOFTBREAK -> True
LINEBREAK -> True
TEXT _ -> True
CODE _ -> True
HTML_INLINE _ -> True
_other -> False
2016-04-16 02:02:43 +03:00
2016-03-11 14:58:11 +03:00
-- | Convert a Markdown structure to a string with formatting removed.
2016-07-24 13:12:17 +03:00
stringify :: [MD.Node] -> Text
stringify = T.concat . map go
2016-03-11 14:58:11 +03:00
where
2016-07-24 13:12:17 +03:00
go (MD.Node _ tp ns) = case tp of
DOCUMENT -> stringify ns
THEMATIC_BREAK -> stringify ns
PARAGRAPH -> stringify ns
BLOCK_QUOTE -> stringify ns
CUSTOM_BLOCK _ _ -> stringify ns
HEADING _ -> stringify ns
LIST _ -> stringify ns
ITEM -> stringify ns
EMPH -> stringify ns
STRONG -> stringify ns
LINK _ _ -> stringify ns
IMAGE _ _ -> stringify ns
CUSTOM_INLINE _ _ -> stringify ns
CODE xs -> xs
CODE_BLOCK _ xs -> xs
TEXT xs -> xs
SOFTBREAK -> " "
LINEBREAK -> " "
HTML_BLOCK _ -> ""
HTML_INLINE _ -> ""
-- | Flatten Markdown by concatenating all block elements.
extractInlines :: [MD.Node] -> [MD.Node]
extractInlines = concatMap go
where
go node@(MD.Node _ tp ns) = case tp of
-- Block containers
DOCUMENT -> extractInlines ns
BLOCK_QUOTE -> extractInlines ns
CUSTOM_BLOCK _ _ -> extractInlines ns
LIST _ -> extractInlines ns
ITEM -> extractInlines ns
-- Inline containers
PARAGRAPH -> ns
HEADING _ -> ns
IMAGE _ _ -> ns
-- Inlines
EMPH -> [node]
STRONG -> [node]
LINK _ _ -> [node]
CUSTOM_INLINE _ _ -> [node]
SOFTBREAK -> [node]
LINEBREAK -> [node]
TEXT _ -> [node]
CODE _ -> [node]
-- Other stuff
THEMATIC_BREAK -> []
HTML_BLOCK xs -> [MD.Node Nothing (CODE xs) []]
HTML_INLINE xs -> [MD.Node Nothing (CODE xs) []]
CODE_BLOCK _ xs -> [MD.Node Nothing (CODE xs) []]
shortcutLinks :: MD.Node -> MD.Node
shortcutLinks node@(MD.Node pos (LINK url title) ns) | '@' <- T.head url =
-- %20s are possibly introduced by cmark (Pandoc definitely adds them,
-- no idea about cmark but better safe than sorry) and so they need to
2016-03-11 14:58:11 +03:00
-- be converted back to spaces
case parseLink (T.replace "%20" " " url) of
2016-07-24 13:12:17 +03:00
Left _err -> MD.Node pos (LINK url title) (map shortcutLinks ns)
2016-03-11 14:58:11 +03:00
Right (shortcut, opt, text) -> do
2016-07-24 13:12:17 +03:00
let text' = fromMaybe (stringify [node]) text
let shortcuts = (["hk"], hackage) : allShortcuts
case useShortcutFrom shortcuts shortcut opt text' of
2016-03-11 14:58:11 +03:00
Success link ->
2016-07-24 13:12:17 +03:00
MD.Node pos (LINK link title) (map shortcutLinks ns)
2016-03-11 14:58:11 +03:00
Warning warnings link ->
2016-07-24 13:12:17 +03:00
let warningText = "[warnings when processing shortcut link: " <>
T.pack (intercalate ", " warnings) <> "]"
warningNode = MD.Node Nothing (TEXT warningText) []
in MD.Node pos (LINK link title)
(warningNode : map shortcutLinks ns)
2016-03-11 14:58:11 +03:00
Failure err ->
2016-07-24 13:12:17 +03:00
let errorText = "[error when processing shortcut link: " <>
T.pack err <> "]"
in MD.Node Nothing (TEXT errorText) []
shortcutLinks (MD.Node pos tp ns) =
MD.Node pos tp (map shortcutLinks ns)
2016-03-11 14:58:11 +03:00
-- TODO: this should be in the shortcut-links package itself
-- | Parse a shortcut link. Allowed formats:
--
-- @
-- \@name
-- \@name:text
-- \@name(option)
-- \@name(option):text
-- @
parseLink :: Text -> Either String (Text, Maybe Text, Maybe Text)
parseLink = either (Left . show) Right . parse p ""
where
shortcut = some (alphaNumChar <|> char '-')
2016-07-27 16:31:55 +03:00
opt = char '(' *> some (noneOf [')']) <* char ')'
2016-03-11 14:58:11 +03:00
text = char ':' *> some anyChar
2016-07-27 16:31:55 +03:00
p :: Parser (Text, Maybe Text, Maybe Text)
2016-03-11 14:58:11 +03:00
p = do
char '@'
(,,) <$> T.pack <$> shortcut
<*> optional (T.pack <$> opt)
<*> optional (T.pack <$> text)
2016-07-24 13:12:17 +03:00
toMarkdownInline :: Text -> MarkdownInline
toMarkdownInline s = MarkdownInline {
markdownInlineMdText = s,
markdownInlineMdHtml = html,
markdownInlineMdMarkdown = inlines }
2016-03-11 14:58:11 +03:00
where
2016-07-24 13:12:17 +03:00
inlines = extractInlines (parseMD s)
html = renderMD inlines
toMarkdownBlock :: Text -> MarkdownBlock
toMarkdownBlock s = MarkdownBlock {
markdownBlockMdText = s,
2016-07-24 13:12:17 +03:00
markdownBlockMdHtml = html,
markdownBlockMdMarkdown = doc }
where
2016-07-24 13:12:17 +03:00
doc = parseMD s
html = renderMD doc
2016-07-24 13:12:17 +03:00
toMarkdownBlockWithTOC :: Text -> Text -> MarkdownBlockWithTOC
toMarkdownBlockWithTOC idPrefix s = MarkdownBlockWithTOC {
markdownBlockWithTOCMdText = s,
markdownBlockWithTOCMdIdPrefix = idPrefix,
2016-07-24 13:12:17 +03:00
markdownBlockWithTOCMdTree = tree,
markdownBlockWithTOCMdTOC = toc }
where
2016-07-24 13:12:17 +03:00
blocks :: [MD.Node]
blocks = parseMD s
--
slugify :: Text -> Text
slugify x = idPrefix <> makeSlug x
--
tree :: Document Text ByteString
tree = renderContents . slugifyDocument slugify $
nodesToDocument (Ann s blocks)
--
toc :: Forest ([MD.Node], Text) -- (heading, slug)
toc = sections tree
& each.each %~ (\Section{..} -> (annValue heading, headingAnn))
renderContents :: Document a b -> Document a ByteString
renderContents doc = doc {
prefaceAnn = renderMD (annValue (preface doc)),
sections = over (each.each) renderSection (sections doc) }
where
renderSection sec = sec {
contentAnn = renderMD (annValue (content sec)) }
slugifyDocument :: (Text -> Text) -> Document a b -> Document Text b
slugifyDocument slugify doc = doc {
sections = evalState ((each.each) process (sections doc)) mempty }
where
process :: Section a b -> State (Set Text) (Section Text b)
process sec = do
previousIds <- get
let slug = until (`S.notMember` previousIds) (<> "_")
(slugify (stringify (annValue (heading sec))))
modify (S.insert slug)
return sec{headingAnn = slug}
2016-04-09 23:34:24 +03:00
instance Show MarkdownInline where
2016-04-16 02:02:43 +03:00
show = show . view mdText
2016-04-09 23:34:24 +03:00
instance Show MarkdownBlock where
2016-04-16 02:02:43 +03:00
show = show . view mdText
instance Show MarkdownBlockWithTOC where
show = show . view mdText
2016-04-09 23:34:24 +03:00
instance A.ToJSON MarkdownInline where
toJSON md = A.object [
"text" A..= (md^.mdText),
"html" A..= T.decodeUtf8 (md^.mdHtml) ]
instance A.ToJSON MarkdownBlock where
toJSON md = A.object [
"text" A..= (md^.mdText),
"html" A..= T.decodeUtf8 (md^.mdHtml) ]
instance A.ToJSON MarkdownBlockWithTOC where
toJSON md = A.object [
"text" A..= (md^.mdText) ]
instance ToHtml MarkdownInline where
2016-07-24 13:12:17 +03:00
toHtmlRaw = toHtml
toHtml = toHtmlRaw . view mdHtml
instance ToHtml MarkdownBlock where
2016-07-24 13:12:17 +03:00
toHtmlRaw = toHtml
toHtml = toHtmlRaw . view mdHtml
instance ToHtml MarkdownBlockWithTOC where
2016-07-24 13:12:17 +03:00
toHtmlRaw = toHtml
toHtml = toHtmlRaw . renderDoc . view mdTree
where
renderDoc Document{..} = BS.concat $
prefaceAnn :
map renderSection (concatMap flatten sections)
renderSection Section{..} = BSL.toStrict . renderBS $ do
mkH $ do
span_ [id_ headingAnn] ""
toHtmlRaw (renderMD (annValue heading))
toHtmlRaw contentAnn
where
mkH = case level of
1 -> h1_; 2 -> h2_; 3 -> h3_;
4 -> h4_; 5 -> h5_; 6 -> h6_;
_other -> error "Markdown.toHtml: level > 6"
instance SafeCopy MarkdownInline where
version = 0
kind = base
2016-04-16 02:02:43 +03:00
putCopy = contain . safePut . view mdText
2016-07-24 13:12:17 +03:00
getCopy = contain $ toMarkdownInline <$> safeGet
instance SafeCopy MarkdownBlock where
version = 0
kind = base
2016-04-16 02:02:43 +03:00
putCopy = contain . safePut . view mdText
2016-07-24 13:12:17 +03:00
getCopy = contain $ toMarkdownBlock <$> safeGet
instance SafeCopy MarkdownBlockWithTOC where
version = 0
kind = base
putCopy md = contain $ do
safePut (md ^. mdIdPrefix)
safePut (md ^. mdText)
getCopy = contain $
2016-07-24 13:12:17 +03:00
toMarkdownBlockWithTOC <$> safeGet <*> safeGet
2016-07-24 13:12:17 +03:00
-- | Is a piece of Markdown empty?
markdownNull :: HasMdText a Text => a -> Bool
markdownNull = T.null . view mdText