From 1c2d30351d7d2a99f7239f79f2ce4f11ed5db84d Mon Sep 17 00:00:00 2001 From: Sridhar Ratnakumar <3998+srid@users.noreply.github.com> Date: Thu, 6 May 2021 15:44:53 -0400 Subject: [PATCH] Add markdown parsing helper (#24) add helpers to parse markdown; `parseMarkdownWithFrontMatter` and `parseMarkdown` --- CHANGELOG.md | 2 + ema.cabal | 13 +++- src/Ema/Example/Ex02_Basic.hs | 2 +- src/Ema/Helper/FileSystem.hs | 3 +- src/Ema/Helper/Markdown.hs | 137 ++++++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 src/Ema/Helper/Markdown.hs diff --git a/CHANGELOG.md b/CHANGELOG.md index 0453883..e1ef416 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ - Helpers.Tailwind - add overflow-y-scroll to body - Add twind shim *before* application's head +- Helpers.Markdown + - add helpers to parse markdown; `parseMarkdownWithFrontMatter` and `parseMarkdown` - Add Ex03_Basic.hs example - Add default implementation based on Enum for `staticRoute` diff --git a/ema.cabal b/ema.cabal index 362c36d..764ec15 100644 --- a/ema.cabal +++ b/ema.cabal @@ -42,6 +42,7 @@ library , data-default , directory , filepath + , filepattern , http-types , lvar , monad-logger @@ -63,12 +64,17 @@ library build-depends: , blaze-html , blaze-markup - , filepattern + , commonmark + , commonmark-extensions + , commonmark-pandoc , fsnotify + , HsYAML + , megaparsec + , pandoc-types + , parser-combinators if flag(with-examples) - build-depends: - , time + build-depends: time mixins: base hiding (Prelude), @@ -97,6 +103,7 @@ library if (flag(with-helpers) || flag(with-examples)) exposed-modules: Ema.Helper.FileSystem + Ema.Helper.Markdown Ema.Helper.Tailwind other-modules: diff --git a/src/Ema/Example/Ex02_Basic.hs b/src/Ema/Example/Ex02_Basic.hs index f102aa4..f850611 100644 --- a/src/Ema/Example/Ex02_Basic.hs +++ b/src/Ema/Example/Ex02_Basic.hs @@ -57,4 +57,4 @@ render emaAction (Model s) r = routeElem r' w = H.a ! A.class_ "text-red-500 hover:underline" ! routeHref r' $ w routeHref r' = - A.href (fromString . toString $ Ema.routeUrl r') \ No newline at end of file + A.href (fromString . toString $ Ema.routeUrl r') diff --git a/src/Ema/Helper/FileSystem.hs b/src/Ema/Helper/FileSystem.hs index 4953b29..d9ac996 100644 --- a/src/Ema/Helper/FileSystem.hs +++ b/src/Ema/Helper/FileSystem.hs @@ -3,7 +3,8 @@ -- | Helper to read a directory of files, and observe it for changes. -- --- Use @new@ in conjunction with @observe@ in your @runEma@ function call. +-- TODO: Publish this to hackage as an addon library for `lvar` (after renaming +-- lvar package)? module Ema.Helper.FileSystem ( -- | This is typically what you want. mountOnLVar, diff --git a/src/Ema/Helper/Markdown.hs b/src/Ema/Helper/Markdown.hs new file mode 100644 index 0000000..177cc6b --- /dev/null +++ b/src/Ema/Helper/Markdown.hs @@ -0,0 +1,137 @@ +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} + +-- | Helper to deal with Markdown files +-- +-- TODO: Publish this to hackage as `Text.Markdown.Simple`? +module Ema.Helper.Markdown + ( -- Parsing + parseMarkdownWithFrontMatter, + parseMarkdown, + -- Utilities + plainify, + ) +where + +import qualified Commonmark as CM +import qualified Commonmark.Extensions as CE +import qualified Commonmark.Pandoc as CP +import Control.Monad.Combinators (manyTill) +import qualified Data.YAML as Y +import qualified Text.Megaparsec as M +import qualified Text.Megaparsec.Char as M +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Definition (Pandoc (..)) +import qualified Text.Pandoc.Walk as W + +-- | Parse a Markdown file using commonmark-hs with all extensions enabled +parseMarkdownWithFrontMatter :: + forall meta. + Y.FromYAML meta => + -- | Path to file associated with this Markdown + FilePath -> + -- | Markdown text to parse + Text -> + Either Text (Maybe meta, Pandoc) +parseMarkdownWithFrontMatter fn s = do + (mMeta, markdown) <- partitionMarkdown fn s + mMetaVal <- parseYaml fn `traverse` mMeta + blocks <- first show $ join $ CM.commonmarkWith @(Either CM.ParseError) fullMarkdownSpec fn markdown + let doc = Pandoc mempty $ B.toList . CP.unCm @() @B.Blocks $ blocks + pure (mMetaVal, doc) + +-- | Like `parseMarkdownWithFrontMatter` but assumes that no YAML frontmatter is present. +parseMarkdown :: FilePath -> Text -> Either Text Pandoc +parseMarkdown fn s = do + cmBlocks <- first show $ join $ CM.commonmarkWith @(Either CM.ParseError) fullMarkdownSpec fn s + let blocks = B.toList . CP.unCm @() @B.Blocks $ cmBlocks + pure $ Pandoc mempty blocks + +type SyntaxSpec' m il bl = + ( Monad m, + CM.IsBlock il bl, + CM.IsInline il, + Typeable m, + Typeable il, + Typeable bl, + CE.HasEmoji il, + CE.HasStrikethrough il, + CE.HasPipeTable il bl, + CE.HasTaskList il bl, + CM.ToPlainText il, + CE.HasFootnote il bl, + CE.HasMath il, + CE.HasDefinitionList il bl, + CE.HasDiv bl, + CE.HasQuoted il, + CE.HasSpan il + ) + +fullMarkdownSpec :: + SyntaxSpec' m il bl => + CM.SyntaxSpec m il bl +fullMarkdownSpec = + mconcat + [ CE.gfmExtensions, + CE.fancyListSpec, + CE.footnoteSpec, + CE.mathSpec, + CE.smartPunctuationSpec, + CE.definitionListSpec, + CE.attributesSpec, + CE.rawAttributeSpec, + CE.fencedDivSpec, + CE.bracketedSpanSpec, + CE.autolinkSpec, + CM.defaultSyntaxSpec, + -- as the commonmark documentation states, pipeTableSpec should be placed after + -- fancyListSpec and defaultSyntaxSpec to avoid bad results when parsing + -- non-table lines + CE.pipeTableSpec + ] + +-- | Identify metadata block at the top, and split it from markdown body. +-- +-- FIXME: https://github.com/srid/neuron/issues/175 +partitionMarkdown :: FilePath -> Text -> Either Text (Maybe Text, Text) +partitionMarkdown = + parse (M.try splitP <|> fmap (Nothing,) M.takeRest) + where + separatorP :: M.Parsec Void Text () + separatorP = + void $ M.string "---" <* M.eol + splitP :: M.Parsec Void Text (Maybe Text, Text) + splitP = do + separatorP + a <- toText <$> manyTill M.anySingle (M.try $ M.eol *> separatorP) + b <- M.takeRest + pure (Just a, b) + parse :: M.Parsec Void Text a -> String -> Text -> Either Text a + parse p fn s = + first (toText . M.errorBundlePretty) $ + M.parse (p <* M.eof) fn s + +-- NOTE: HsYAML parsing is rather slow due to its use of DList. +-- See https://github.com/haskell-hvr/HsYAML/issues/40 +parseYaml :: Y.FromYAML a => FilePath -> Text -> Either Text a +parseYaml n (encodeUtf8 -> v) = do + let mkError (loc, emsg) = + toText $ n <> ":" <> Y.prettyPosWithSource loc v " error" <> emsg + first mkError $ Y.decode1 v + +-- | Convert Pandoc AST inlines to raw text. +plainify :: [B.Inline] -> Text +plainify = W.query $ \case + B.Str x -> x + B.Code _attr x -> x + B.Space -> " " + B.SoftBreak -> " " + B.LineBreak -> " " + B.RawInline _fmt s -> s + B.Math _mathTyp s -> s + -- Ignore the rest of AST nodes, as they are recursively defined in terms of + -- `Inline` which `W.query` will traverse again. + _ -> ""