Merge pull request #5187 from sellout/doc-lexer

2024-09-11 10:35:57 +03:00 · 2024-08-03 23:45:30 +00:00 · 2024-08-03 23:45:30 +00:00 · c049c65ad7
commit c049c65ad7
parent 97c9109291 1ee188d0e3
20 changed files with 2184 additions and 1676 deletions
--- a/parser-typechecker/src/Unison/PrintError.hs
+++ b/parser-typechecker/src/Unison/PrintError.hs
@ -54,7 +54,7 @@ import Unison.Result qualified as Result
 import Unison.Settings qualified as Settings
 import Unison.Symbol (Symbol)
 import Unison.Syntax.HashQualified qualified as HQ (toText)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toText)
 import Unison.Syntax.NamePrinter (prettyHashQualified0)
 import Unison.Syntax.Parser (Annotated, ann)
@ -1336,7 +1336,7 @@ prettyParseError s e =
    lexerOutput :: Pretty (AnnotatedText a)
    lexerOutput =
      if showLexerOutput
-        then "\nLexer output:\n" <> fromString (L.debugLex' s)
+        then "\nLexer output:\n" <> fromString (L.debugPreParse' s)
        else mempty

 renderParseErrors ::
@ -1861,6 +1861,14 @@ renderParseErrors s = \case
                    <> structuralVsUniqueDocsLink
              ]
       in (msg, rangeForToken <$> [void keyword, void name])
+    go (Parser.TypeNotAllowed tok) =
+      let msg =
+            Pr.lines
+              [ Pr.wrap "I expected to see a term here, but instead it’s a type:",
+                "",
+                tokenAsErrorSite s $ HQ.toText <$> tok
+              ]
+       in (msg, [rangeForToken tok])

    unknownConstructor ::
      String -> L.Token (HashQualified Name) -> Pretty ColorText
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@ -12,6 +12,7 @@ module Unison.Syntax.TermParser
  )
 where

+import Control.Comonad.Cofree (Cofree ((:<)))
 import Control.Monad.Reader (asks, local)
 import Data.Char qualified as Char
 import Data.Foldable (foldrM)
@ -24,7 +25,9 @@ import Data.Sequence qualified as Sequence
 import Data.Set qualified as Set
 import Data.Text qualified as Text
 import Data.Tuple.Extra qualified as TupleE
+import Data.Void (absurd, vacuous)
 import Text.Megaparsec qualified as P
+import U.Codebase.Reference (ReferenceType (..))
 import U.Core.ABT qualified as ABT
 import Unison.ABT qualified as ABT
 import Unison.Builtin.Decls qualified as DD
@ -38,18 +41,19 @@ import Unison.NameSegment qualified as NameSegment
 import Unison.Names (Names)
 import Unison.Names qualified as Names
 import Unison.NamesWithHistory qualified as Names
-import Unison.Parser.Ann (Ann)
+import Unison.Parser.Ann (Ann (Ann))
 import Unison.Parser.Ann qualified as Ann
 import Unison.Pattern (Pattern)
 import Unison.Pattern qualified as Pattern
 import Unison.Prelude
 import Unison.Reference (Reference)
 import Unison.Referent (Referent)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toText, toVar, unsafeParseVar)
 import Unison.Syntax.NameSegment qualified as NameSegment
 import Unison.Syntax.Parser hiding (seq)
 import Unison.Syntax.Parser qualified as Parser (seq, uniqueName)
+import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Syntax.TypeParser qualified as TypeParser
 import Unison.Term (IsTop, Term)
 import Unison.Term qualified as Term
@ -101,7 +105,7 @@ rewriteBlock = do
    rewriteTermlike kw mk = do
      kw <- quasikeyword kw
      lhs <- term
-      (_spanAnn, rhs) <- block "==>"
+      (_spanAnn, rhs) <- layoutBlock "==>"
      pure (mk (ann kw <> ann rhs) lhs rhs)
    rewriteTerm = rewriteTermlike "term" DD.rewriteTerm
    rewriteCase = rewriteTermlike "case" DD.rewriteCase
@ -113,8 +117,10 @@ rewriteBlock = do
      pure (DD.rewriteType (ann kw <> ann rhs) (L.payload <$> vs) lhs rhs)

 typeLink' :: (Monad m, Var v) => P v m (L.Token Reference)
-typeLink' = do
-  id <- hqPrefixId
+typeLink' = findUniqueType =<< hqPrefixId
+
+findUniqueType :: (Monad m, Var v) => L.Token (HQ.HashQualified Name) -> P v m (L.Token Reference)
+findUniqueType id = do
  ns <- asks names
  case Names.lookupHQType Names.IncludeSuffixes (L.payload id) ns of
    s
@ -160,13 +166,13 @@ match :: (Monad m, Var v) => TermP v m
 match = do
  start <- openBlockWith "match"
  scrutinee <- term
-  _ <- closeBlock
+  _ <- optionalCloseBlock
  _ <-
    P.try (openBlockWith "with") <|> do
      t <- anyToken
      P.customFailure (ExpectedBlockOpen "with" t)
  (_arities, cases) <- NonEmpty.unzip <$> matchCases1 start
-  _ <- closeBlock
+  _ <- optionalCloseBlock
  pure $
    Term.match
      (ann start <> ann (NonEmpty.last cases))
@ -208,10 +214,10 @@ matchCase = do
            [ Nothing <$ P.try (quasikeyword "otherwise"),
              Just <$> infixAppOrBooleanOp
            ]
-        (_spanAnn, t) <- block "->"
+        (_spanAnn, t) <- layoutBlock "->"
        pure (guard, t)
  let unguardedBlock = label "case match" do
-        (_spanAnn, t) <- block "->"
+        (_spanAnn, t) <- layoutBlock "->"
        pure (Nothing, t)
  -- a pattern's RHS is either one or more guards, or a single unguarded block.
  guardsAndBlocks <- guardedBlocks <|> (pure @[] <$> unguardedBlock)
@ -354,10 +360,10 @@ lam p = label "lambda" $ mkLam <$> P.try (some prefixDefinitionName <* reserved
       in Term.lam' (ann (head vs) <> ann b) annotatedArgs b

 letBlock, handle, ifthen :: (Monad m, Var v) => TermP v m
-letBlock = label "let" $ (snd <$> block "let")
+letBlock = label "let" $ (snd <$> layoutBlock "let")
 handle = label "handle" do
  (handleSpan, b) <- block "handle"
-  (_withSpan, handler) <- block "with"
+  (_withSpan, handler) <- layoutBlock "with"
  -- We don't use the annotation span from 'with' here because it will
  -- include a dedent if it's at the end of block.
  -- Meaning the newline gets overwritten when pretty-printing and it messes things up.
@ -374,7 +380,7 @@ lamCase = do
  start <- openBlockWith "cases"
  cases <- matchCases1 start
  (arity, cases) <- checkCasesArities cases
-  _ <- closeBlock
+  _ <- optionalCloseBlock
  lamvars <- replicateM arity (Parser.uniqueName 10)
  let vars =
        Var.named <$> [tweak v i | (v, i) <- lamvars `zip` [(1 :: Int) ..]]
@ -393,7 +399,7 @@ ifthen = label "if" do
  start <- peekAny
  (_spanAnn, c) <- block "if"
  (_spanAnn, t) <- block "then"
-  (_spanAnn, f) <- block "else"
+  (_spanAnn, f) <- layoutBlock "else"
  pure $ Term.iff (ann start <> ann f) c t f

 text :: (Var v) => TermP v m
@ -435,7 +441,7 @@ resolveHashQualified tok = do
  names <- asks names
  case L.payload tok of
    HQ.NameOnly n -> pure $ Term.var (ann tok) (Name.toVar n)
-    _ -> case Names.lookupHQTerm Names.IncludeSuffixes (L.payload tok) names of
+    hqn -> case Names.lookupHQTerm Names.IncludeSuffixes hqn names of
      s
        | Set.null s -> failCommitted $ UnknownTerm tok s
        | Set.size s > 1 -> failCommitted $ UnknownTerm tok s
@ -462,160 +468,173 @@ termLeaf =
      doc2Block <&> \(spanAnn, trm) -> trm {ABT.annotation = ABT.annotation trm <> spanAnn}
    ]

-- Syntax for documentation v2 blocks, which are surrounded by {{ }}.
+-- | Gives a parser an explicit stream to parse, so that it consumes nothing from the original stream when it runs.
+--
+--   This is used inside the `Doc` -> `Term` conversion, where we have chunks of Unison code embedded that need to be
+--   parsed. It’s a consequence of parsing Doc in the midst of the Unison lexer.
+subParse :: (Ord v, Monad m) => P v m a -> [L.Token L.Lexeme] -> P v m a
+subParse p toks = do
+  orig <- P.getInput
+  P.setInput $ Input toks
+  result <- p <* P.eof
+  P.setInput orig
+  pure result
+
+-- | Syntax for documentation v2 blocks, which are surrounded by @{{@ @}}@.
 -- The lexer does most of the heavy lifting so there's not a lot for
 -- the parser to do. For instance, in
 --
--   {{
--   Hi there!
--
--   goodbye.
--   }}
+-- > {{
+-- > Hi there!
+-- >
+-- > goodbye.
+-- > }}
 --
 -- the lexer will produce:
 --
-- [Open "syntax.docUntitledSection",
--    Open "syntax.docParagraph",
--      Open "syntax.docWord", Textual "Hi", Close,
--      Open "syntax.docWord", Textual "there!", Close,
--    Close
--    Open "syntax.docParagraph",
--      Open "syntax.docWord", Textual "goodbye", Close,
--    Close
--  Close]
+-- > [ Doc
+-- >   ( DocUntitledSection
+-- >     (DocParagraph (DocWord "Hi" :| [DocWord "there!"]))
+-- >     (DocParagraph (DocWord "goodbye" :| []))
+-- >   )
+-- > ]
 --
 -- The parser will parse this into the Unison expression:
 --
--   syntax.docUntitledSection [
--     syntax.docParagraph [syntax.docWord "Hi", syntax.docWord "there!"],
--     syntax.docParagraph [syntax.docWord "goodbye"]
--   ]
+-- > syntax.docUntitledSection [
+-- >   syntax.docParagraph [syntax.docWord "Hi", syntax.docWord "there!"],
+-- >   syntax.docParagraph [syntax.docWord "goodbye"]
+-- > ]
 --
-- Where `syntax.doc{Paragraph, UntitledSection,...}` are all ordinary term
+-- Where @syntax.doc{Paragraph, UntitledSection,...}@ are all ordinary term
 -- variables that will be looked up in the environment like anything else. This
 -- means that the documentation syntax can have its meaning changed by
-- overriding what functions the names `syntax.doc*` correspond to.
+-- overriding what functions the names @syntax.doc*@ correspond to.
 doc2Block :: forall m v. (Monad m, Var v) => P v m (Ann {- Annotation for the whole spanning block -}, Term v Ann)
 doc2Block = do
-  P.lookAhead (openBlockWith "syntax.docUntitledSection") *> elem
+  L.Token docContents startDoc endDoc <- doc
+  let docAnn = Ann startDoc endDoc
+  (docAnn,) . docUntitledSection (gann docAnn) <$> traverse (cata $ docTop <=< sequenceA) docContents
  where
-    -- For terms which aren't blocks the spanning annotation is the same as the
-    -- term annotation.
-    selfAnnotated :: Term v Ann -> (Ann, Term v Ann)
-    selfAnnotated t = (ann t, t)
-    elem :: P v m (Ann {- Annotation for the whole spanning block -}, Term v Ann)
-    elem =
-      (selfAnnotated <$> text) <|> do
-        startTok <- openBlock
-        let -- here, `t` will be something like `Open "syntax.docWord"`
-            -- so `f` will be a term var with the name "syntax.docWord".
-            f = f' startTok
-            f' t = Term.var (ann t) (Var.nameds (L.payload t))
+    cata :: (Functor f) => (f a -> a) -> Cofree f x -> a
+    cata fn (_ :< fx) = fn $ cata fn <$> fx

-            -- follows are some common syntactic forms used for parsing child elements
+    gann :: (Annotated a) => a -> Ann
+    gann = Ann.GeneratedFrom . ann

-            -- regular is parsed into `f child1 child2 child3` for however many children
-            regular = do
-              cs <- P.many (snd <$> elem)
-              endTok <- closeBlock
-              let trm = Term.apps' f cs
-              pure (ann startTok <> ann endTok, trm)
+    addDelay :: Term v Ann -> Term v Ann
+    addDelay tm = Term.delay (ann tm) tm

-            -- variadic is parsed into: `f [child1, child2, ...]`
-            variadic = variadic' f
-            variadic' f = do
-              cs <- P.many (snd <$> elem)
-              endTok <- closeBlock
-              let trm = Term.apps' f [Term.list (ann cs) cs]
-              pure (ann startTok <> ann endTok, trm)
+    f :: (Annotated a) => a -> String -> Term v Ann
+    f a = Term.var (gann a) . Var.nameds . ("syntax.doc" <>)

-            -- sectionLike is parsed into: `f tm [child1, child2, ...]`
-            sectionLike = do
-              arg1 <- (snd <$> elem)
-              cs <- P.many (snd <$> elem)
-              endTok <- closeBlock
-              let trm = Term.apps' f [arg1, Term.list (ann cs) cs]
-              pure (ann startTok <> ann endTok, trm)
+    docUntitledSection :: Ann -> Doc.UntitledSection (Term v Ann) -> Term v Ann
+    docUntitledSection ann (Doc.UntitledSection tops) =
+      Term.app ann (f ann "UntitledSection") $ Term.list (gann tops) tops

-            evalLike wrap = do
-              tm <- term
-              endTok <- closeBlock
-              let trm = Term.apps' f [wrap tm]
-              pure (ann startTok <> ann endTok, trm)
+    docTop :: Doc.Top (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
+    docTop d = case d of
+      Doc.Section title body -> pure $ Term.apps' (f d "Section") [title, Term.list (gann body) body]
+      Doc.Eval code ->
+        Term.app (gann d) (f d "Eval") . addDelay . snd
+          <$> subParse (block' False False "syntax.docEval" (pure $ pure ()) $ Ann.External <$ P.eof) code
+      Doc.ExampleBlock code ->
+        Term.apps' (f d "ExampleBlock") . (Term.nat (gann d) 0 :) . pure . addDelay . snd
+          <$> subParse (block' False True "syntax.docExampleBlock" (pure $ pure ()) $ Ann.External <$ P.eof) code
+      Doc.CodeBlock label body ->
+        pure $
+          Term.apps'
+            (f d "CodeBlock")
+            [Term.text (ann label) . Text.pack $ L.payload label, Term.text (ann body) . Text.pack $ L.payload body]
+      Doc.BulletedList items ->
+        pure $ Term.app (gann d) (f d "BulletedList") . Term.list (gann items) . toList $ docColumn <$> items
+      Doc.NumberedList items@((n, _) :| _) ->
+        pure $
+          Term.apps'
+            (f d "NumberedList")
+            [Term.nat (ann d) $ L.payload n, Term.list (gann $ snd <$> items) . toList $ docColumn . snd <$> items]
+      Doc.Paragraph leaves ->
+        Term.app (gann d) (f d "Paragraph") . Term.list (ann leaves) . toList <$> traverse docLeaf leaves

-            -- converts `tm` to `'tm`
-            --
-            -- Embedded examples like ``1 + 1`` are represented as terms,
-            -- but are wrapped in delays so they are left unevaluated for the
-            -- code which renders documents. (We want the doc display to get
-            -- the unevaluated expression `1 + 1` and not `2`)
-            addDelay tm = Term.delay (ann tm) tm
-        case L.payload startTok of
-          "syntax.docJoin" -> variadic
-          "syntax.docUntitledSection" -> variadic
-          "syntax.docColumn" -> variadic
-          "syntax.docParagraph" -> variadic
-          "syntax.docSignature" -> variadic
-          "syntax.docSource" -> variadic
-          "syntax.docFoldedSource" -> variadic
-          "syntax.docBulletedList" -> variadic
-          "syntax.docSourceAnnotations" -> variadic
-          "syntax.docSourceElement" -> do
-            link <- (snd <$> elem)
-            anns <- P.optional $ reserved "@" *> (snd <$> elem)
-            endTok <- closeBlock
-            let trm = Term.apps' f [link, fromMaybe (Term.list (ann link) mempty) anns]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docNumberedList" -> do
-            nitems@((n, _) : _) <- P.some nitem
-            endTok <- closeBlock
-            let items = snd <$> nitems
-            let trm = Term.apps' f [n, Term.list (ann items) items]
-            pure (ann startTok <> ann endTok, trm)
-            where
-              nitem = do
-                n <- number
-                t <- openBlockWith "syntax.docColumn"
-                let f = f' ("syntax.docColumn" <$ t)
-                (_spanAnn, child) <- variadic' f
-                pure (n, child)
-          "syntax.docSection" -> sectionLike
-          -- @source{ type Blah, foo, type Bar }
-          "syntax.docEmbedTermLink" -> do
-            tm <- addDelay <$> (hashQualifiedPrefixTerm <|> hashQualifiedInfixTerm)
-            endTok <- closeBlock
-            let trm = Term.apps' f [tm]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docEmbedSignatureLink" -> do
-            tm <- addDelay <$> (hashQualifiedPrefixTerm <|> hashQualifiedInfixTerm)
-            endTok <- closeBlock
-            let trm = Term.apps' f [tm]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docEmbedTypeLink" -> do
-            r <- typeLink'
-            endTok <- closeBlock
-            let trm = Term.apps' f [Term.typeLink (ann r) (L.payload r)]
-            pure (ann startTok <> ann endTok, trm)
-          "syntax.docExample" -> do
-            trm <- term
-            endTok <- closeBlock
-            let spanAnn = ann startTok <> ann endTok
-            pure . (spanAnn,) $ case trm of
-              tm@(Term.Apps' _ xs) ->
-                let fvs = List.Extra.nubOrd $ concatMap (toList . Term.freeVars) xs
-                    n = Term.nat (ann tm) (fromIntegral (length fvs))
-                    lam = addDelay $ Term.lam' (ann tm) ((Ann.GeneratedFrom spanAnn,) <$> fvs) tm
-                 in Term.apps' f [n, lam]
-              tm -> Term.apps' f [Term.nat (ann tm) 0, addDelay tm]
-          "syntax.docTransclude" -> evalLike id
-          "syntax.docEvalInline" -> evalLike addDelay
-          "syntax.docExampleBlock" -> do
-            (spanAnn, tm) <- block'' False True "syntax.docExampleBlock" (pure (void startTok)) closeBlock
-            pure $ (spanAnn, Term.apps' f [Term.nat (ann tm) 0, addDelay tm])
-          "syntax.docEval" -> do
-            (spanAnn, tm) <- block' False "syntax.docEval" (pure (void startTok)) closeBlock
-            pure $ (spanAnn, Term.apps' f [addDelay tm])
-          _ -> regular
+    docColumn :: Doc.Column (Term v Ann) -> Term v Ann
+    docColumn d@(Doc.Column para sublist) =
+      Term.app (gann d) (f d "Column") . Term.list (gann d) $ para : toList sublist
+
+    docLeaf :: Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m
+    docLeaf d = case d of
+      Doc.Link link -> Term.app (gann d) (f d "Link") <$> docEmbedLink link
+      Doc.NamedLink para target -> Term.apps' (f d "NamedLink") . (para :) . pure <$> docLeaf (vacuous target)
+      Doc.Example code -> do
+        trm <- subParse term code
+        pure . Term.apps' (f d "Example") $ case trm of
+          tm@(Term.Apps' _ xs) ->
+            let fvs = List.Extra.nubOrd $ concatMap (toList . Term.freeVars) xs
+                n = Term.nat (ann tm) (fromIntegral (length fvs))
+                lam = addDelay $ Term.lam' (ann tm) ((mempty,) <$> fvs) tm
+             in [n, lam]
+          tm -> [Term.nat (ann tm) 0, addDelay tm]
+      Doc.Transclude code -> Term.app (gann d) (f d "Transclude") <$> subParse term code
+      Doc.Bold para -> pure $ Term.app (gann d) (f d "Bold") para
+      Doc.Italic para -> pure $ Term.app (gann d) (f d "Italic") para
+      Doc.Strikethrough para -> pure $ Term.app (gann d) (f d "Strikethrough") para
+      Doc.Verbatim leaf -> Term.app (gann d) (f d "Verbatim") <$> docLeaf (bimap absurd absurd leaf)
+      Doc.Code leaf -> Term.app (gann d) (f d "Code") <$> docLeaf (bimap absurd absurd leaf)
+      Doc.Source elems ->
+        Term.app (gann d) (f d "Source") . Term.list (ann elems) . toList <$> traverse docSourceElement elems
+      Doc.FoldedSource elems ->
+        Term.app (gann d) (f d "FoldedSource") . Term.list (ann elems) . toList <$> traverse docSourceElement elems
+      Doc.EvalInline code -> Term.app (gann d) (f d "EvalInline") . addDelay <$> subParse term code
+      Doc.Signature links ->
+        Term.app (gann d) (f d "Signature") . Term.list (ann links) . toList <$> traverse docEmbedSignatureLink links
+      Doc.SignatureInline link -> Term.app (gann d) (f d "SignatureInline") <$> docEmbedSignatureLink link
+      Doc.Word txt -> pure . Term.app (gann d) (f d "Word") . Term.text (ann txt) . Text.pack $ L.payload txt
+      Doc.Group (Doc.Join leaves) ->
+        Term.app (gann d) (f d "Group") . Term.app (gann d) (f d "Join") . Term.list (ann leaves) . toList
+          <$> traverse docLeaf leaves
+
+    docEmbedLink :: Doc.EmbedLink (ReferenceType, HQ'.HashQualified Name) -> TermP v m
+    docEmbedLink d@(Doc.EmbedLink (L.Token (level, ident) start end)) = case level of
+      RtType ->
+        Term.app (gann d) (f d "EmbedTypeLink") . Term.typeLink (ann d) . L.payload
+          <$> findUniqueType (L.Token (HQ'.toHQ ident) start end)
+      RtTerm ->
+        Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (L.Token (HQ'.toHQ ident) start end)
+
+    docSourceElement ::
+      Doc.SourceElement
+        (ReferenceType, HQ'.HashQualified Name)
+        (Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
+      TermP v m
+    docSourceElement d@(Doc.SourceElement link anns) = do
+      link' <- docEmbedLink link
+      anns' <- traverse docEmbedAnnotation anns
+      pure $ Term.apps' (f d "SourceElement") [link', Term.list (ann anns) anns']
+
+    docEmbedSignatureLink :: Doc.EmbedSignatureLink (ReferenceType, HQ'.HashQualified Name) -> TermP v m
+    docEmbedSignatureLink d@(Doc.EmbedSignatureLink (L.Token (level, ident) start end)) = case level of
+      RtType -> P.customFailure . TypeNotAllowed $ L.Token (HQ'.toHQ ident) start end
+      RtTerm ->
+        Term.app (gann d) (f d "EmbedSignatureLink") . addDelay
+          <$> resolveHashQualified (L.Token (HQ'.toHQ ident) start end)
+
+    docEmbedAnnotation ::
+      Doc.EmbedAnnotation
+        (ReferenceType, HQ'.HashQualified Name)
+        (Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] Void) ->
+      TermP v m
+    docEmbedAnnotation d@(Doc.EmbedAnnotation a) =
+      -- This is the only place I’m not sure we’re doing the right thing. In the lexer, this can be an identifier or a
+      -- DocLeaf, but here it could be either /text/ or a Doc element. And I don’t think there’s any way the lexemes
+      -- produced for an identifier and the lexemes consumed for text line up. So, I think this is a bugfix I can’t
+      -- avoid.
+      Term.app (gann d) (f d "EmbedAnnotation")
+        <$> either
+          ( \(L.Token (level, ident) start end) -> case level of
+              RtType -> P.customFailure . TypeNotAllowed $ L.Token (HQ'.toHQ ident) start end
+              RtTerm -> resolveHashQualified $ L.Token (HQ'.toHQ ident) start end
+          )
+          (docLeaf . vacuous)
+          a

 docBlock :: (Monad m, Var v) => TermP v m
 docBlock = do
@ -989,7 +1008,7 @@ delayQuote = P.label "quote" do

 delayBlock :: (Monad m, Var v) => P v m (Ann {- Ann spanning the whole block -}, Term v Ann)
 delayBlock = P.label "do" do
-  (spanAnn, b) <- block "do"
+  (spanAnn, b) <- layoutBlock "do"
  let argSpan = (ann b {- would be nice to use the annotation for 'do' here, but it's not terribly important -})
  pure $ (spanAnn, DD.delayTerm (ann b) argSpan b)

@ -1076,7 +1095,7 @@ destructuringBind = do
    let boundVars' = snd <$> boundVars
    _ <- P.lookAhead (openBlockWith "=")
    pure (p, boundVars')
-  (_spanAnn, scrute) <- block "=" -- Dwight K. Scrute ("The People's Scrutinee")
+  (_spanAnn, scrute) <- layoutBlock "=" -- Dwight K. Scrute ("The People's Scrutinee")
  let guard = Nothing
  let absChain vs t = foldr (\v t -> ABT.abs' (ann t) v t) t vs
      thecase t = Term.MatchCase p (fmap (absChain boundVars) guard) $ absChain boundVars t
@ -1144,7 +1163,10 @@ customFailure :: (P.MonadParsec e s m) => e -> m a
 customFailure = P.customFailure

 block :: forall m v. (Monad m, Var v) => String -> P v m (Ann, Term v Ann)
-block s = block' False s (openBlockWith s) closeBlock
+block s = block' False False s (openBlockWith s) closeBlock
+
+layoutBlock :: forall m v. (Monad m, Var v) => String -> P v m (Ann, Term v Ann)
+layoutBlock s = block' False False s (openBlockWith s) optionalCloseBlock

 -- example: use Foo.bar.Baz + ++ x
 -- + ++ and x are called the "suffixes" of the `use` statement, and
@ -1214,24 +1236,16 @@ substImports ns imports =
      ]

 block' ::
-  (Monad m, Var v) =>
-  IsTop ->
-  String ->
-  P v m (L.Token ()) ->
-  P v m (L.Token ()) ->
-  P v m (Ann {- ann which spans the whole block -}, Term v Ann)
-block' isTop = block'' isTop False
-
-block'' ::
  forall m v end.
  (Monad m, Var v, Annotated end) =>
  IsTop ->
-  Bool -> -- `True` means insert `()` at end of block if it ends with a statement
+  -- | `True` means insert `()` at end of block if it ends with a statement
+  Bool ->
  String ->
  P v m (L.Token ()) ->
  P v m end ->
  P v m (Ann {- ann which spans the whole block -}, Term v Ann)
-block'' isTop implicitUnitAtEnd s openBlock closeBlock = do
+block' isTop implicitUnitAtEnd s openBlock closeBlock = do
  open <- openBlock
  (names, imports) <- imports
  _ <- optional semi
--- a/parser-typechecker/src/Unison/Syntax/TermPrinter.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermPrinter.hs
@ -51,7 +51,7 @@ import Unison.Reference qualified as Reference
 import Unison.Referent (Referent)
 import Unison.Referent qualified as Referent
 import Unison.Syntax.HashQualified qualified as HQ (unsafeFromVar)
-import Unison.Syntax.Lexer (showEscapeChar)
+import Unison.Syntax.Lexer.Unison (showEscapeChar)
 import Unison.Syntax.Name qualified as Name (isSymboly, parseText, parseTextEither, toText, unsafeParseText)
 import Unison.Syntax.NamePrinter (styleHashQualified'')
 import Unison.Syntax.NameSegment qualified as NameSegment (toEscapedText)
--- a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs
+++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs
@ -152,8 +152,7 @@ import Unison.ShortHash qualified as SH
 import Unison.Sqlite qualified as Sqlite
 import Unison.Symbol (Symbol)
 import Unison.Syntax.HashQualified qualified as HQ (parseTextWith, toText)
-import Unison.Syntax.Lexer qualified as L
-import Unison.Syntax.Lexer qualified as Lexer
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toText, toVar, unsafeParseVar)
 import Unison.Syntax.NameSegment qualified as NameSegment
 import Unison.Syntax.Parser qualified as Parser
@ -1162,7 +1161,7 @@ handleFindI isVerbose fscope ws input = do

        -- name query
        qs -> do
-          let anythingBeforeHash :: Megaparsec.Parsec (Lexer.Token Text) [Char] Text
+          let anythingBeforeHash :: Megaparsec.Parsec (L.Token Text) [Char] Text
              anythingBeforeHash = Text.pack <$> Megaparsec.takeWhileP Nothing (/= '#')
          let srs =
                searchBranchScored
--- a/unison-cli/src/Unison/LSP/FileAnalysis.hs
+++ b/unison-cli/src/Unison/LSP/FileAnalysis.hs
@ -57,7 +57,7 @@ import Unison.Result (Note)
 import Unison.Result qualified as Result
 import Unison.Symbol (Symbol)
 import Unison.Syntax.HashQualifiedPrime qualified as HQ' (toText)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name
 import Unison.Syntax.Parser qualified as Parser
 import Unison.Syntax.TypePrinter qualified as TypePrinter
--- a/unison-cli/src/Unison/LSP/Types.hs
+++ b/unison-cli/src/Unison/LSP/Types.hs
@ -41,7 +41,7 @@ import Unison.Server.Backend qualified as Backend
 import Unison.Server.NameSearch (NameSearch)
 import Unison.Sqlite qualified as Sqlite
 import Unison.Symbol
-import Unison.Syntax.Lexer qualified as Lexer
+import Unison.Syntax.Lexer.Unison qualified as Lexer
 import Unison.Type (Type)
 import Unison.UnisonFile qualified as UF
 import Unison.UnisonFile.Summary (FileSummary (..))
--- a/unison-src/transcripts-round-trip/reparses-with-same-hash.u
+++ b/unison-src/transcripts-round-trip/reparses-with-same-hash.u
@ -542,7 +542,7 @@ fix_4384d = {{ {{ docExampleBlock 0 '[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
 fix_4384e =
  id : x -> x
  id x = x
-  {{ {{ docExampleBlock 0 (id id id id id id id id id id id id id id id id id id id id id (x -> 0) }} }}
+  {{ {{ docExampleBlock 0 (id id id id id id id id id id id id id id id id id id id id id (x -> 0)) }} }}

 fnApplicationSyntax = 
  Environment.default = do 1 + 1
--- a/unison-src/transcripts/error-messages.output.md
+++ b/unison-src/transcripts/error-messages.output.md
@ -290,6 +290,7 @@ x = match Some a with
  I was surprised to find a -> here.
  I was expecting one of these instead:
  
+  * end of input
  * newline or semicolon

 ```
@ -312,6 +313,7 @@ x = match Some a with
  I was surprised to find a '|' here.
  I was expecting one of these instead:
  
+  * end of input
  * newline or semicolon

 ```
--- a/unison-src/transcripts/fix5076.md
+++ b/unison-src/transcripts/fix5076.md
@ -0,0 +1,12 @@
+```ucm:hide
+scratch/main> builtins.mergeio
+```
+
+Nested call to code lexer wasn’t terminating inline examples containing blocks properly.
+
+```unison
+x = {{
+    ``let "me"`` live
+    ``do "me"`` in
+  }}
+```
--- a/unison-src/transcripts/fix5076.output.md
+++ b/unison-src/transcripts/fix5076.output.md
@ -0,0 +1,22 @@
+Nested call to code lexer wasn’t terminating inline examples containing blocks properly.
+
+``` unison
+x = {{
+    ``let "me"`` live
+    ``do "me"`` in
+  }}
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I found and typechecked these definitions in scratch.u. If you
+  do an `add` or `update`, here's how your codebase would
+  change:
+  
+    ⍟ These new definitions are ok to `add`:
+    
+      x : Doc2
+
+```
--- a/unison-syntax/package.yaml
+++ b/unison-syntax/package.yaml
@ -9,7 +9,9 @@ dependencies:
  - bytes
  - containers
  - cryptonite
+  - deriving-compat
  - extra
+  - free
  - lens
  - megaparsec
  - mtl
--- a/unison-syntax/src/Unison/Parser/Ann.hs
+++ b/unison-syntax/src/Unison/Parser/Ann.hs
@ -4,7 +4,11 @@

 module Unison.Parser.Ann where

+import Control.Comonad.Cofree (Cofree ((:<)))
+import Data.List.NonEmpty (NonEmpty)
+import Data.Void (absurd)
 import Unison.Lexer.Pos qualified as L
+import Unison.Prelude

 data Ann
  = -- Used for things like Builtins which don't have a source position.
@ -79,3 +83,24 @@ encompasses (GeneratedFrom ann) other = encompasses ann other
 encompasses ann (GeneratedFrom other) = encompasses ann other
 encompasses (Ann start1 end1) (Ann start2 end2) =
  Just $ start1 <= start2 && end1 >= end2
+
+class Annotated a where
+  ann :: a -> Ann
+
+instance Annotated Ann where
+  ann = id
+
+instance (Annotated a) => Annotated [a] where
+  ann = foldMap ann
+
+instance (Annotated a) => Annotated (NonEmpty a) where
+  ann = foldMap ann
+
+instance (Annotated a) => Annotated (Maybe a) where
+  ann = foldMap ann
+
+instance Annotated Void where
+  ann = absurd
+
+instance (Annotated a) => Annotated (Cofree f a) where
+  ann (a :< _) = ann a
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
--- a/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Token.hs
@ -6,9 +6,10 @@ module Unison.Syntax.Lexer.Token
 where

 import Data.Text qualified as Text
-import Text.Megaparsec (ParsecT, TraversableStream)
+import Text.Megaparsec (MonadParsec, TraversableStream)
 import Text.Megaparsec qualified as P
 import Unison.Lexer.Pos (Pos (Pos))
+import Unison.Parser.Ann (Ann (Ann), Annotated (..))
 import Unison.Prelude

 data Token a = Token
@ -18,6 +19,9 @@ data Token a = Token
  }
  deriving stock (Eq, Ord, Show, Functor, Foldable, Traversable)

+instance Annotated (Token a) where
+  ann (Token _ s e) = Ann s e
+
 instance Applicative Token where
  pure a = Token a (Pos 0 0) (Pos 0 0)
  Token f start _ <*> Token a _ end = Token (f a) start end
@ -39,14 +43,14 @@ instance Applicative Token where
 instance P.ShowErrorComponent (Token Text) where
  showErrorComponent = Text.unpack . payload

-tokenP :: (Ord e, TraversableStream s) => ParsecT e s m a -> ParsecT e s m (Token a)
+tokenP :: (Ord e, TraversableStream s, MonadParsec e s m) => m a -> m (Token a)
 tokenP p = do
  start <- posP
  payload <- p
  end <- posP
  pure Token {payload, start, end}

-posP :: (Ord e, TraversableStream s) => ParsecT e s m Pos
+posP :: (Ord e, TraversableStream s, MonadParsec e s m) => m Pos
 posP = do
  p <- P.getSourcePos
  pure (Pos (P.unPos (P.sourceLine p)) (P.unPos (P.sourceColumn p)))
--- a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
--- a/unison-syntax/src/Unison/Syntax/Parser.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser.hs
@ -1,10 +1,12 @@
 {-# LANGUAGE UndecidableInstances #-}
+{-# OPTIONS_GHC -Wno-orphans #-}

 module Unison.Syntax.Parser
  ( Annotated (..),
    Err,
    Error (..),
-    Input,
+    -- FIXME: Don’t export the data constructor
+    Input (..),
    P,
    ParsingEnv (..),
    UniqueName,
@ -15,6 +17,8 @@ module Unison.Syntax.Parser
    chainr1,
    character,
    closeBlock,
+    optionalCloseBlock,
+    doc,
    failCommitted,
    failureIf,
    hqInfixId,
@ -57,6 +61,7 @@ where
 import Control.Monad.Reader (ReaderT (..))
 import Control.Monad.Reader.Class (asks)
 import Crypto.Random qualified as Random
+import Data.Bool (bool)
 import Data.Bytes.Put (runPutS)
 import Data.Bytes.Serial (serialize)
 import Data.Bytes.VarInt (VarInt (..))
@ -67,6 +72,7 @@ import Data.Set qualified as Set
 import Data.Text qualified as Text
 import Text.Megaparsec (runParserT)
 import Text.Megaparsec qualified as P
+import U.Codebase.Reference (ReferenceType (..))
 import U.Util.Base32Hex qualified as Base32Hex
 import Unison.ABT qualified as ABT
 import Unison.ConstructorReference (ConstructorReference)
@ -77,14 +83,16 @@ import Unison.Hashable qualified as Hashable
 import Unison.Name as Name
 import Unison.Names (Names)
 import Unison.Names.ResolutionResult qualified as Names
-import Unison.Parser.Ann (Ann (..))
+import Unison.Parser.Ann (Ann (..), Annotated (..))
 import Unison.Pattern (Pattern)
 import Unison.Pattern qualified as Pattern
 import Unison.Prelude
 import Unison.Reference (Reference)
 import Unison.Referent (Referent)
-import Unison.Syntax.Lexer qualified as L
+import Unison.Syntax.Lexer.Unison qualified as L
 import Unison.Syntax.Name qualified as Name (toVar, unsafeParseText)
+import Unison.Syntax.Parser.Doc qualified as Doc
+import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Term (MatchCase (..))
 import Unison.UnisonFile.Error qualified as UF
 import Unison.Util.Bytes (Bytes)
@ -154,19 +162,22 @@ data Error v
  | UnknownType (L.Token (HQ.HashQualified Name)) (Set Reference)
  | UnknownId (L.Token (HQ.HashQualified Name)) (Set Referent) (Set Reference)
  | ExpectedBlockOpen String (L.Token L.Lexeme)
-  | -- Indicates a cases or match/with which doesn't have any patterns
+  | -- | Indicates a cases or match/with which doesn't have any patterns
    EmptyMatch (L.Token ())
  | EmptyWatch Ann
  | UseInvalidPrefixSuffix (Either (L.Token Name) (L.Token Name)) (Maybe [L.Token Name])
  | UseEmpty (L.Token String) -- an empty `use` statement
  | DidntExpectExpression (L.Token L.Lexeme) (Maybe (L.Token L.Lexeme))
  | TypeDeclarationErrors [UF.Error v Ann]
-  | -- MissingTypeModifier (type|ability) name
+  | -- | MissingTypeModifier (type|ability) name
    MissingTypeModifier (L.Token String) (L.Token v)
+  | -- | A type was found in a position that requires a term
+    TypeNotAllowed (L.Token (HQ.HashQualified Name))
  | ResolutionFailures [Names.ResolutionFailure v Ann]
  | DuplicateTypeNames [(v, [Ann])]
  | DuplicateTermNames [(v, [Ann])]
-  | PatternArityMismatch Int Int Ann -- PatternArityMismatch expectedArity actualArity location
+  | -- | PatternArityMismatch expectedArity actualArity location
+    PatternArityMismatch Int Int Ann
  | FloatPattern Ann
  deriving (Show, Eq, Ord)

@ -177,25 +188,12 @@ newtype Input = Input {inputStream :: [L.Token L.Lexeme]}
  deriving stock (Eq, Ord, Show)
  deriving newtype (P.Stream, P.VisualStream)

-class Annotated a where
-  ann :: a -> Ann
-
-instance Annotated Ann where
-  ann = id
-
-instance Annotated (L.Token a) where
-  ann (L.Token _ s e) = Ann s e
-
 instance (Annotated a) => Annotated (ABT.Term f v a) where
  ann = ann . ABT.annotation

 instance (Annotated a) => Annotated (Pattern a) where
  ann = ann . Pattern.loc

-instance (Annotated a) => Annotated [a] where
-  ann [] = mempty
-  ann (h : t) = foldl' (\acc a -> acc <> ann a) (ann h) t
-
 instance (Annotated a, Annotated b) => Annotated (MatchCase a b) where
  ann (MatchCase p _ b) = ann p <> ann b

@ -207,8 +205,7 @@ label = P.label
 traceRemainingTokens :: (Ord v) => String -> P v m ()
 traceRemainingTokens label = do
  remainingTokens <- lookAhead $ many anyToken
-  let _ =
-        trace ("REMAINDER " ++ label ++ ":\n" ++ L.debugLex'' remainingTokens) ()
+  let _ = trace ("REMAINDER " ++ label ++ ":\n" ++ L.debugPreParse (L.preParse remainingTokens)) ()
  pure ()

 mkAnn :: (Annotated a, Annotated b) => a -> b -> Ann
@ -239,23 +236,20 @@ rootFile p = p <* P.eof

 run' :: (Monad m, Ord v) => P v m a -> String -> String -> ParsingEnv m -> m (Either (Err v) a)
 run' p s name env =
-  let lex =
-        if debug
-          then L.lexer name (trace (L.debugLex''' "lexer receives" s) s)
-          else L.lexer name s
+  let lex = bool id (traceWith L.debugPreParse) debug . L.preParse $ L.lexer name s
      pTraced = traceRemainingTokens "parser receives" *> p
-   in runReaderT (runParserT pTraced name (Input lex)) env <&> \case
+   in runReaderT (runParserT pTraced name . Input $ toList lex) env <&> \case
        Left err -> Left (Nel.head (P.bundleErrors err))
        Right x -> Right x

 run :: (Monad m, Ord v) => P v m a -> String -> ParsingEnv m -> m (Either (Err v) a)
 run p s = run' p s ""

-- Virtual pattern match on a lexeme.
+-- | Virtual pattern match on a lexeme.
 queryToken :: (Ord v) => (L.Lexeme -> Maybe a) -> P v m (L.Token a)
 queryToken f = P.token (traverse f) Set.empty

-- Consume a block opening and return the string that opens the block.
+-- | Consume a block opening and return the string that opens the block.
 openBlock :: (Ord v) => P v m (L.Token String)
 openBlock = queryToken getOpen
  where
@ -265,33 +259,38 @@ openBlock = queryToken getOpen
 openBlockWith :: (Ord v) => String -> P v m (L.Token ())
 openBlockWith s = void <$> P.satisfy ((L.Open s ==) . L.payload)

-- Match a particular lexeme exactly, and consume it.
+-- | Match a particular lexeme exactly, and consume it.
 matchToken :: (Ord v) => L.Lexeme -> P v m (L.Token L.Lexeme)
 matchToken x = P.satisfy ((==) x . L.payload)

-- Consume a virtual semicolon
+-- | Consume a virtual semicolon
 semi :: (Ord v) => P v m (L.Token ())
 semi = label "newline or semicolon" $ queryToken go
  where
    go (L.Semi _) = Just ()
    go _ = Nothing

-- Consume the end of a block
+-- | Consume the end of a block
 closeBlock :: (Ord v) => P v m (L.Token ())
 closeBlock = void <$> matchToken L.Close

+-- | With layout, blocks might “close” without an explicit outdent (e.g., not even a newline at the end of a
+--  `Doc.Transclude`). This allows those blocks to be closed by EOF.
+optionalCloseBlock :: (Ord v) => P v m (L.Token ())
+optionalCloseBlock = closeBlock <|> (\() -> L.Token () mempty mempty) <$> P.eof
+
 wordyPatternName :: (Var v) => P v m (L.Token v)
 wordyPatternName = queryToken \case
  L.WordyId (HQ'.NameOnly n) -> Just $ Name.toVar n
  _ -> Nothing

-- Parse an prefix identifier e.g. Foo or (+), discarding any hash
+-- | Parse a prefix identifier e.g. Foo or (+), discarding any hash
 prefixDefinitionName :: (Var v) => P v m (L.Token v)
 prefixDefinitionName =
  wordyDefinitionName <|> parenthesize symbolyDefinitionName

-- Parse a prefix identifier e.g. Foo or (+), rejecting any hash
-- This is useful for term declarations, where type signatures and term names should not have hashes.
+-- | Parse a prefix identifier e.g. Foo or (+), rejecting any hash
+--   This is useful for term declarations, where type signatures and term names should not have hashes.
 prefixTermName :: (Var v) => P v m (L.Token v)
 prefixTermName = wordyTermName <|> parenthesize symbolyTermName
  where
@ -303,34 +302,34 @@ prefixTermName = wordyTermName <|> parenthesize symbolyTermName
      L.SymbolyId (HQ'.NameOnly n) -> Just $ Name.toVar n
      _ -> Nothing

-- Parse a wordy identifier e.g. Foo, discarding any hash
+-- | Parse a wordy identifier e.g. Foo, discarding any hash
 wordyDefinitionName :: (Var v) => P v m (L.Token v)
 wordyDefinitionName = queryToken $ \case
  L.WordyId n -> Just $ Name.toVar (HQ'.toName n)
  L.Blank s -> Just $ Var.nameds ("_" <> s)
  _ -> Nothing

-- Parse a wordyId as a Name, rejecting any hash
+-- | Parse a wordyId as a Name, rejecting any hash
 importWordyId :: (Ord v) => P v m (L.Token Name)
 importWordyId = queryToken \case
  L.WordyId (HQ'.NameOnly n) -> Just n
  L.Blank s | not (null s) -> Just $ Name.unsafeParseText (Text.pack ("_" <> s))
  _ -> Nothing

-- The `+` in: use Foo.bar + as a Name
+-- | The `+` in: use Foo.bar + as a Name
 importSymbolyId :: (Ord v) => P v m (L.Token Name)
 importSymbolyId = queryToken \case
  L.SymbolyId (HQ'.NameOnly n) -> Just n
  _ -> Nothing

-- Parse a symboly ID like >>= or &&, discarding any hash
+-- | Parse a symboly ID like >>= or &&, discarding any hash
 symbolyDefinitionName :: (Var v) => P v m (L.Token v)
 symbolyDefinitionName = queryToken $ \case
  L.SymbolyId n -> Just $ Name.toVar (HQ'.toName n)
  _ -> Nothing

 -- | Expect parentheses around a token, includes the parentheses within the start/end
-- annotations of the resulting token.
+--   annotations of the resulting token.
 parenthesize :: (Ord v) => P v m (L.Token a) -> P v m (L.Token a)
 parenthesize p = do
  (start, a) <- P.try do
@ -344,7 +343,7 @@ hqPrefixId, hqInfixId :: (Ord v) => P v m (L.Token (HQ.HashQualified Name))
 hqPrefixId = hqWordyId_ <|> parenthesize hqSymbolyId_
 hqInfixId = hqSymbolyId_

-- Parse a hash-qualified alphanumeric identifier
+-- | Parse a hash-qualified alphanumeric identifier
 hqWordyId_ :: (Ord v) => P v m (L.Token (HQ.HashQualified Name))
 hqWordyId_ = queryToken \case
  L.WordyId n -> Just $ HQ'.toHQ n
@ -352,20 +351,20 @@ hqWordyId_ = queryToken \case
  L.Blank s | not (null s) -> Just $ HQ.NameOnly (Name.unsafeParseText (Text.pack ("_" <> s)))
  _ -> Nothing

-- Parse a hash-qualified symboly ID like >>=#foo or &&
+-- | Parse a hash-qualified symboly ID like >>=#foo or &&
 hqSymbolyId_ :: (Ord v) => P v m (L.Token (HQ.HashQualified Name))
 hqSymbolyId_ = queryToken \case
  L.SymbolyId n -> Just (HQ'.toHQ n)
  _ -> Nothing

-- Parse a reserved word
+-- | Parse a reserved word
 reserved :: (Ord v) => String -> P v m (L.Token String)
 reserved w = label w $ queryToken getReserved
  where
    getReserved (L.Reserved w') | w == w' = Just w
    getReserved _ = Nothing

-- Parse a placeholder or typed hole
+-- | Parse a placeholder or typed hole
 blank :: (Ord v) => P v m (L.Token String)
 blank = label "blank" $ queryToken getBlank
  where
@ -405,6 +404,12 @@ string = queryToken getString
    getString (L.Textual s) = Just (Text.pack s)
    getString _ = Nothing

+doc ::
+  (Ord v) => P v m (L.Token (Doc.UntitledSection (Doc.Tree (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme])))
+doc = queryToken \case
+  L.Doc d -> pure d
+  _ -> Nothing
+
 -- | Parses a tuple of 'a's, or a single parenthesized 'a'
 --
 -- returns the result of combining elements with 'pair', alongside the annotation containing
@ -435,12 +440,12 @@ chainr1 p op = go1
    go1 = p >>= go2
    go2 hd = do { op <- op; op hd <$> go1 } <|> pure hd

-- Parse `p` 1+ times, combining with `op`
+-- | Parse `p` 1+ times, combining with `op`
 chainl1 :: (Ord v) => P v m a -> P v m (a -> a -> a) -> P v m a
 chainl1 p op = foldl (flip ($)) <$> p <*> P.many (flip <$> op <*> p)

-- If `p` would succeed, this fails uncommitted.
-- Otherwise, `failIfOk` used to produce the output
+-- | If `p` would succeed, this fails uncommitted.
+--   Otherwise, `failIfOk` used to produce the output
 failureIf :: (Ord v) => P v m (P v m b) -> P v m a -> P v m b
 failureIf failIfOk p = do
  dontwant <- P.try . P.lookAhead $ failIfOk
@ -448,9 +453,9 @@ failureIf failIfOk p = do
  when (isJust p) $ fail "failureIf"
  dontwant

-- Gives this var an id based on its position - a useful trick to
-- obtain a variable whose id won't match any other id in the file
-- `positionalVar a Var.missingResult`
+-- | Gives this var an id based on its position - a useful trick to
+--   obtain a variable whose id won't match any other id in the file
+--  `positionalVar a Var.missingResult`
 positionalVar :: (Annotated a, Var v) => a -> v -> v
 positionalVar a v =
  let s = start (ann a)
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs
@ -0,0 +1,624 @@
+-- | The parser for Unison’s @Doc@ syntax.
+--
+--   This is completely independent of the Unison language, and requires a couple parsers to be passed in to then
+--   provide a parser for @Doc@ applied to any host language.
+--
+-- - an identifer parser
+-- - a code parser (that accepts a termination parser)
+-- - a termination parser, for this parser to know when to give up
+--
+-- Each of those parsers is expected to satisfy @(`Ord` e, `P.MonadParsec` e `String` m)@.
+module Unison.Syntax.Parser.Doc
+  ( Tree,
+    initialEnv,
+    doc,
+    untitledSection,
+    sectionElem,
+    leaf,
+
+    -- * section elements
+    section,
+    eval,
+    exampleBlock,
+    codeBlock,
+    list,
+    bulletedList,
+    numberedList,
+    paragraph,
+
+    -- * leaves
+    link,
+    namedLink,
+    example,
+    transclude,
+    bold,
+    italic,
+    strikethrough,
+    verbatim,
+    source,
+    foldedSource,
+    evalInline,
+    signatures,
+    signatureInline,
+    group,
+    word,
+
+    -- * other components
+    column',
+    embedLink,
+    embedSignatureLink,
+    join,
+  )
+where
+
+import Control.Comonad.Cofree (Cofree ((:<)))
+import Control.Monad.Reader qualified as R
+import Data.Char (isControl, isSpace)
+import Data.List qualified as List
+import Data.List.Extra qualified as List
+import Data.List.NonEmpty (NonEmpty ((:|)))
+import Data.List.NonEmpty qualified as NonEmpty
+import Text.Megaparsec qualified as P
+import Text.Megaparsec.Char (char)
+import Text.Megaparsec.Char qualified as CP
+import Text.Megaparsec.Char.Lexer qualified as LP
+import Unison.Parser.Ann (Ann, Annotated (..))
+import Unison.Prelude hiding (join)
+import Unison.Syntax.Lexer (column, line, lit, sepBy1', some', someTill', (<+>))
+import Unison.Syntax.Lexer.Token (Token (Token), posP, tokenP)
+import Unison.Syntax.Parser.Doc.Data
+
+type Tree ident code = Cofree (Top ident code) Ann
+
+data ParsingEnv = ParsingEnv
+  { -- | Use a stack to remember the parent section and allow docSections within docSections.
+    -- - 1 means we are inside a # Heading 1
+    parentSections :: [Int],
+    -- | 4 means we are inside a list starting at the fourth column
+    parentListColumn :: Int
+  }
+  deriving (Show)
+
+initialEnv :: ParsingEnv
+initialEnv = ParsingEnv [0] 0
+
+doc ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m end ->
+  m (UntitledSection (Tree ident code))
+doc ident code = flip R.runReaderT initialEnv . untitledSection . sectionElem ident code . void
+
+-- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that
+--   Unison wraps `Doc` literals in `}}`).
+untitledSection :: (P.MonadParsec e String m) => m a -> m (UntitledSection a)
+untitledSection a = UntitledSection <$> P.many (a <* CP.space)
+
+sectionElem ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  R.ReaderT ParsingEnv m (Tree ident code)
+sectionElem ident code docClose =
+  fmap wrap' $
+    section ident code docClose
+      <|> lift (P.label "block eval (syntax: a fenced code block)" (eval code <|> exampleBlock code <|> codeBlock))
+      <|> list ident code docClose
+      <|> lift (paragraph ident code docClose)
+
+paragraph ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Top ident code (Tree ident code))
+paragraph ident code docClose = fmap Paragraph . spaced docClose $ leafy ident code docClose
+
+word :: (Ord e, P.MonadParsec e String m) => m end -> m (Leaf ident code void)
+word closing = fmap Word . tokenP . P.try $ do
+  let end = P.lookAhead $ void (P.satisfy isSpace) <|> void closing
+  word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end
+  guard (not $ reserved word || null word)
+  pure word
+  where
+    reserved word = List.isPrefixOf "}}" word || all (== '#') word
+
+leaf ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+leaf ident code closing =
+  link ident
+    <|> namedLink ident code closing
+    <|> example code
+    <|> transclude code
+    <|> bold ident code closing
+    <|> italic ident code closing
+    <|> strikethrough ident code closing
+    <|> verbatim
+    <|> source ident code
+    <|> foldedSource ident code
+    <|> evalInline code
+    <|> signatures ident
+    <|> signatureInline ident
+    <|> word closing
+
+leafy ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+leafy ident code closing = do
+  p <- leaf ident code closing
+  after <- P.optional . P.try $ leafy ident code closing
+  case after of
+    Nothing -> pure p
+    Just after -> group . pure $ p :| pure after
+
+comma :: (P.MonadParsec e String m) => m String
+comma = lit "," <* CP.space
+
+source :: (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m (Leaf ident code a)
+source ident = fmap Source . (lit "@source" *>) . sourceElements ident
+
+foldedSource :: (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m (Leaf ident code a)
+foldedSource ident = fmap FoldedSource . (lit "@foldedSource" *>) . sourceElements ident
+
+sourceElements ::
+  (Ord e, P.MonadParsec e String m) =>
+  m ident ->
+  (m () -> m code) ->
+  m (NonEmpty (SourceElement ident (Leaf ident code Void)))
+sourceElements ident code = do
+  _ <- (lit " {" <|> lit "{") *> CP.space
+  s <- sepBy1' srcElem comma
+  _ <- lit "}"
+  pure s
+  where
+    srcElem =
+      SourceElement
+        <$> embedLink ident
+        <*> ( fmap (fromMaybe []) . P.optional $
+                (lit "@") *> (CP.space *> annotations)
+            )
+      where
+        annotation = fmap Left (tokenP ident) <|> fmap Right (transclude code) <* CP.space
+        annotations = P.some (EmbedAnnotation <$> annotation)
+
+signatures :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a)
+signatures ident = fmap Signature $ do
+  _ <- (lit "@signatures" <|> lit "@signature") *> (lit " {" <|> lit "{") *> CP.space
+  s <- sepBy1' (embedSignatureLink ident) comma
+  _ <- lit "}"
+  pure s
+
+signatureInline :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a)
+signatureInline ident = fmap SignatureInline $ do
+  _ <- lit "@inlineSignature" *> (lit " {" <|> lit "{") *> CP.space
+  s <- embedSignatureLink ident
+  _ <- lit "}"
+  pure s
+
+evalInline :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code a)
+evalInline code = fmap EvalInline $ do
+  _ <- lit "@eval" *> (lit " {" <|> lit "{") *> CP.space
+  let inlineEvalClose = void $ lit "}"
+  s <- code inlineEvalClose
+  pure s
+
+-- | Not an actual node, but this pattern is referenced in multiple places
+embedLink :: (Ord e, P.MonadParsec e s m, P.TraversableStream s) => m ident -> m (EmbedLink ident)
+embedLink = fmap EmbedLink . tokenP
+
+embedSignatureLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedSignatureLink ident)
+embedSignatureLink ident = EmbedSignatureLink <$> tokenP ident <* CP.space
+
+verbatim :: (Ord e, P.MonadParsec e String m) => m (Leaf ident code a)
+verbatim =
+  P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
+    Token originalText start stop <- tokenP do
+      -- a single backtick followed by a non-backtick is treated as monospaced
+      let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
+      -- also two or more ' followed by that number of closing '
+      quotes <- tick <|> (lit "''" <+> P.takeWhileP Nothing (== '\''))
+      P.someTill P.anySingle (lit quotes)
+    let isMultiLine = line start /= line stop
+    pure
+      if isMultiLine
+        then
+          let trimmed = (trimAroundDelimiters originalText)
+              txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
+           in -- If it's a multi-line verbatim block we trim any whitespace representing
+              -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
+              Verbatim . Word $ Token txt start stop
+        else Code . Word $ Token originalText start stop
+
+example :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code void)
+example code =
+  P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
+    fmap Example $ do
+      n <- P.try $ do
+        _ <- lit "`"
+        length <$> P.takeWhile1P (Just "backticks") (== '`')
+      let end = void . lit $ replicate (n + 1) '`'
+      CP.space *> code end
+
+link :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a)
+link ident = P.label "link (examples: {type List}, {Nat.+})" $ Link <$> P.try (lit "{" *> embedLink ident <* lit "}")
+
+transclude :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code a)
+transclude code =
+  fmap Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $
+    lit "{{" *> code (void $ lit "}}")
+
+nonNewlineSpaces :: (P.MonadParsec e String m) => m String
+nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace
+  where
+    nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r'
+
+eval :: (P.MonadParsec e String m, Annotated code) => (m () -> m code) -> m (Top ident code (Tree ident code))
+eval code =
+  Eval <$> do
+    -- commit after seeing that ``` is on its own line
+    fence <- P.try $ do
+      fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+      b <- all isSpace <$> P.lookAhead (P.takeWhileP Nothing (/= '\n'))
+      fence <$ guard b
+    CP.space *> code (void $ lit fence)
+
+exampleBlock :: (P.MonadParsec e String m, Annotated code) => (m () -> m code) -> m (Top ident code (Tree ident code))
+exampleBlock code =
+  ExampleBlock
+    <$> do
+      void $ lit "@typecheck" <* CP.space
+      fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+      code . void $ lit fence
+
+codeBlock :: (Ord e, P.MonadParsec e String m) => m (Top ident code (Tree ident code))
+codeBlock = do
+  column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel
+  let tabWidth = toInteger . P.unPos $ P.defaultTabWidth
+  fence <- lit "```" <+> P.takeWhileP Nothing (== '`')
+  name <-
+    nonNewlineSpaces
+      *> tokenP (P.takeWhile1P Nothing (not . isSpace))
+      <* nonNewlineSpaces
+  _ <- void CP.eol
+  verbatim <-
+    tokenP $
+      uncolumn column tabWidth . trimAroundDelimiters
+        <$> P.someTill P.anySingle ([] <$ lit fence)
+  pure $ CodeBlock name verbatim
+  where
+    uncolumn column tabWidth s =
+      let skip col r | col < 1 = r
+          skip col s@('\t' : _) | col < tabWidth = s
+          skip col ('\t' : r) = skip (col - tabWidth) r
+          skip col (c : r)
+            | isSpace c && (not $ isControl c) =
+                skip (col - 1) r
+          skip _ s = s
+       in List.intercalate "\n" $ skip column <$> lines s
+
+emphasis ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  Char ->
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Tree ident code)
+emphasis delimiter ident code closing = do
+  let start = some (P.satisfy (== delimiter))
+  end <- P.try $ do
+    end <- start
+    P.lookAhead (P.satisfy (not . isSpace))
+    pure end
+  wrap' . Paragraph
+    <$> someTill'
+      (leafy ident code (closing <|> (void $ lit end)) <* void whitespaceWithoutParagraphBreak)
+      (lit end)
+  where
+    -- Allows whitespace including up to one newline
+    whitespaceWithoutParagraphBreak = void do
+      void nonNewlineSpaces
+      optional newline >>= \case
+        Just _ -> void nonNewlineSpaces
+        Nothing -> pure ()
+
+bold ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+bold ident code = fmap Bold . emphasis '*' ident code
+
+italic ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+italic ident code = fmap Italic . emphasis '_' ident code
+
+strikethrough ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+strikethrough ident code = fmap Strikethrough . emphasis '~' ident code
+
+namedLink ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  m (Leaf ident code (Tree ident code))
+namedLink ident code docClose =
+  P.label "hyperlink (example: [link name](https://destination.com))" do
+    _ <- lit "["
+    p <- spaced docClose . leafy ident code . void $ char ']'
+    _ <- lit "]"
+    _ <- lit "("
+    target <- group $ fmap pure (link ident) <|> some' (transclude code <|> word (docClose <|> void (char ')')))
+    _ <- lit ")"
+    pure $ NamedLink (wrap' $ Paragraph p) target
+
+sp :: (P.MonadParsec e String m) => m () -> m String
+sp docClose = P.try $ do
+  spaces <- P.takeWhile1P (Just "space") isSpace
+  close <- P.optional (P.lookAhead docClose)
+  case close of
+    Nothing -> guard $ ok spaces
+    Just _ -> pure ()
+  pure spaces
+  where
+    ok s = length [() | '\n' <- s] < 2
+
+spaced :: (P.MonadParsec e String m) => m () -> m a -> m (NonEmpty a)
+spaced docClose p = some' $ p <* P.optional (sp docClose)
+
+-- | Not an actual node, but this pattern is referenced in multiple places
+list ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
+list ident code docClose = bulletedList ident code docClose <|> numberedList ident code docClose
+
+listSep :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m ()
+listSep = P.try $ newline *> nonNewlineSpaces *> P.lookAhead (void bulletedStart <|> void numberedStart)
+
+bulletedStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m (Int, [a])
+bulletedStart = P.try $ do
+  r <- listItemStart $ [] <$ P.satisfy bulletChar
+  P.lookAhead (P.satisfy isSpace)
+  pure r
+  where
+    bulletChar ch = ch == '*' || ch == '-' || ch == '+'
+
+listItemStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m a -> m (Int, a)
+listItemStart gutter = P.try do
+  nonNewlineSpaces
+  col <- column <$> posP
+  parentCol <- R.asks parentListColumn
+  guard (col > parentCol)
+  (col,) <$> gutter
+
+numberedStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m (Int, Token Word64)
+numberedStart = listItemStart $ P.try (tokenP $ LP.decimal <* lit ".")
+
+-- | FIXME: This should take a @`P` a@
+numberedList ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
+numberedList ident code docClose = NumberedList <$> sepBy1' numberedItem listSep
+  where
+    numberedItem = P.label "numbered list (examples: 1. item1, 8. start numbering at '8')" do
+      (col, s) <- numberedStart
+      (s,) <$> column' ident code docClose col
+
+-- | FIXME: This should take a @`P` a@
+bulletedList ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
+bulletedList ident code docClose = BulletedList <$> sepBy1' bullet listSep
+  where
+    bullet = P.label "bullet (examples: * item1, - item2)" do
+      (col, _) <- bulletedStart
+      column' ident code docClose col
+
+column' ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  Int ->
+  R.ReaderT ParsingEnv m (Column (Tree ident code))
+column' ident code docClose col =
+  Column . wrap'
+    <$> (nonNewlineSpaces *> listItemParagraph)
+    <*> R.local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> fmap wrap' (list ident code docClose))
+  where
+    listItemParagraph =
+      Paragraph <$> do
+        col <- column <$> posP
+        some' (lift (leafy ident code docClose) <* sep col)
+      where
+        -- Trickiness here to support hard line breaks inside of
+        -- a bulleted list, so for instance this parses as expected:
+        --
+        --   * uno dos
+        --     tres quatro
+        --   * alice bob
+        --     carol dave eve
+        sep col = do
+          _ <- nonNewlineSpaces
+          _ <-
+            P.optional . P.try $
+              newline
+                *> nonNewlineSpaces
+                *> do
+                  col2 <- column <$> posP
+                  guard $ col2 >= col
+                  (P.notFollowedBy $ void numberedStart <|> void bulletedStart)
+          pure ()
+
+newline :: (P.MonadParsec e String m) => m String
+newline = P.label "newline" $ lit "\n" <|> lit "\r\n"
+
+-- |
+--
+-- > ## Section title
+-- >
+-- > A paragraph under this section.
+-- > Part of the same paragraph. Blanklines separate paragraphs.
+-- >
+-- > ### A subsection title
+-- >
+-- > A paragraph under this subsection.
+-- >
+-- > # A section title (not a subsection)
+section ::
+  (Ord e, P.MonadParsec e String m, Annotated code) =>
+  m ident ->
+  (m () -> m code) ->
+  m () ->
+  R.ReaderT ParsingEnv m (Top ident code (Tree ident code))
+section ident code docClose = do
+  ns <- R.asks parentSections
+  hashes <- lift $ P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp docClose
+  title <- lift $ paragraph ident code docClose <* CP.space
+  let m = length hashes + head ns
+  body <-
+    R.local (\env -> env {parentSections = m : tail ns}) $
+      P.many (sectionElem ident code docClose <* CP.space)
+  pure $ Section (wrap' title) body
+
+-- | FIXME: This should just take a @`P` code@ and @`P` a@.
+group :: (P.MonadParsec e s m) => m (NonEmpty (Leaf ident code a)) -> m (Leaf ident code a)
+group = fmap Group . join
+
+-- | FIXME: This should just take a @`P` a@
+join :: (P.MonadParsec e s m) => m (NonEmpty a) -> m (Join a)
+join = fmap Join
+
+-- * utility functions
+
+wrap' :: (Annotated code) => Top ident code (Tree ident code) -> Tree ident code
+wrap' doc = ann doc :< doc
+
+-- | If it's a multi-line verbatim block we trim any whitespace representing
+-- indentation from the pretty-printer.
+--
+-- E.g.
+--
+-- @@
+-- {{
+--   # Heading
+--     '''
+--     code
+--       indented
+--     '''
+-- }}
+-- @@
+--
+-- Should lex to the text literal "code\n  indented".
+--
+-- If there's text in the literal that has LESS trailing whitespace than the
+-- opening delimiters, we don't trim it at all. E.g.
+--
+-- @@
+-- {{
+--   # Heading
+--     '''
+--   code
+--     '''
+-- }}
+-- @@
+--
+--  Is parsed as "  code".
+--
+--  Trim the expected amount of whitespace from a text literal:
+--  >>> trimIndentFromVerbatimBlock 2 "  code\n    indented"
+-- "code\n  indented"
+--
+-- If the text literal has less leading whitespace than the opening delimiters,
+-- leave it as-is
+-- >>> trimIndentFromVerbatimBlock 2 "code\n  indented"
+-- "code\n  indented"
+trimIndentFromVerbatimBlock :: Int -> String -> String
+trimIndentFromVerbatimBlock leadingSpaces txt = fromMaybe txt $ do
+  List.intercalate "\n" <$> for (lines txt) \line -> do
+    -- If any 'stripPrefix' fails, we fail and return the unaltered text
+    case List.stripPrefix (replicate leadingSpaces ' ') line of
+      Just stripped -> Just stripped
+      Nothing ->
+        -- If it was a line with all white-space, just use an empty line,
+        -- this can happen easily in editors which trim trailing whitespace.
+        if all isSpace line
+          then Just ""
+          else Nothing
+
+-- | Trim leading/trailing whitespace from around delimiters, e.g.
+--
+-- {{
+--   '''___ <- whitespace here including newline
+--   text block
+-- 👇 or here
+-- __'''
+-- }}
+-- >>> trimAroundDelimiters "  \n  text block \n  "
+-- "  text block "
+--
+-- Should leave leading and trailing line untouched if it contains non-whitespace, e.g.:
+--
+-- '''  leading whitespace
+--   text block
+-- trailing whitespace:  '''
+-- >>> trimAroundDelimiters "  leading whitespace\n  text block \ntrailing whitespace:  "
+-- "  leading whitespace\n  text block \ntrailing whitespace:  "
+--
+-- Should keep trailing newline if it's the only thing on the line, e.g.:
+--
+-- '''
+-- newline below
+--
+-- '''
+-- >>> trimAroundDelimiters "\nnewline below\n\n"
+-- "newline below\n\n"
+trimAroundDelimiters :: String -> String
+trimAroundDelimiters txt =
+  txt
+    & ( \s ->
+          List.breakOn "\n" s
+            & \case
+              (prefix, suffix)
+                | all isSpace prefix -> drop 1 suffix
+                | otherwise -> prefix <> suffix
+      )
+    & ( \s ->
+          List.breakOnEnd "\n" s
+            & \case
+              (_prefix, "") -> s
+              (prefix, suffix)
+                | all isSpace suffix -> dropTrailingNewline prefix
+                | otherwise -> prefix <> suffix
+      )
+  where
+    dropTrailingNewline = \case
+      [] -> []
+      (x : xs) -> NonEmpty.init (x NonEmpty.:| xs)
--- a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
+++ b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs
@ -0,0 +1,194 @@
+{-# LANGUAGE TemplateHaskell #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+-- | Haskell parallel to @unison/base.Doc@.
+--
+--   These types have two significant parameters: @ident@ and @code@ that are expected to be parameterized by some
+--   representation of identifiers and source code of the host language.
+--
+--   This is much more restricted than @unison/base.Doc@, but it covers everything we can parse from Haskell. The
+--   mismatch with Unison is a problem, as someone can create a Unison Doc with explicit constructors or function calls,
+--   have it rendered to a scratch file, and then we can’t parse it. Changing the types here to match Unison wouldn’t
+--   fix the issue. We have to modify the types and parser in concert (in both Haskell and Unison) to bring them in
+--   line.
+module Unison.Syntax.Parser.Doc.Data where
+
+import Data.Eq.Deriving (deriveEq1, deriveEq2)
+import Data.List.NonEmpty (NonEmpty)
+import Data.Ord.Deriving (deriveOrd1, deriveOrd2)
+import Text.Show.Deriving (deriveShow1, deriveShow2)
+import Unison.Parser.Ann (Annotated (..))
+import Unison.Prelude
+import Unison.Syntax.Lexer.Token (Token (..))
+
+newtype UntitledSection a = UntitledSection [a]
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data Top ident code a
+  = -- | The first argument is always a `Paragraph`
+    Section a [a]
+  | Eval code
+  | ExampleBlock code
+  | CodeBlock (Token String) (Token String)
+  | BulletedList (NonEmpty (Column a))
+  | NumberedList (NonEmpty (Token Word64, Column a))
+  | Paragraph (NonEmpty (Leaf ident code a))
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data Column a
+  = -- | The first is always a `Paragraph`, and the second a `BulletedList` or `NumberedList`
+    Column a (Maybe a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+data Leaf ident code a
+  = Link (EmbedLink ident)
+  | -- | first is a Paragraph, second is always a Group (which contains either a single Term/Type link or list of
+    --   `Transclude`s & `Word`s)
+    NamedLink a (Leaf ident code Void)
+  | Example code
+  | Transclude code
+  | -- | Always a Paragraph
+    Bold a
+  | -- | Always a Paragraph
+    Italic a
+  | -- | Always a Paragraph
+    Strikethrough a
+  | -- | Always a Word
+    Verbatim (Leaf ident Void Void)
+  | -- | Always a Word
+    Code (Leaf ident Void Void)
+  | -- | Always a Transclude
+    Source (NonEmpty (SourceElement ident (Leaf ident code Void)))
+  | -- | Always a Transclude
+    FoldedSource (NonEmpty (SourceElement ident (Leaf ident code Void)))
+  | EvalInline code
+  | Signature (NonEmpty (EmbedSignatureLink ident))
+  | SignatureInline (EmbedSignatureLink ident)
+  | Word (Token String)
+  | Group (Join (Leaf ident code a))
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+instance Bifunctor (Leaf ident) where
+  bimap f g = \case
+    Link x -> Link x
+    NamedLink a leaf -> NamedLink (g a) $ first f leaf
+    Example code -> Example $ f code
+    Transclude code -> Transclude $ f code
+    Bold a -> Bold $ g a
+    Italic a -> Italic $ g a
+    Strikethrough a -> Strikethrough $ g a
+    Verbatim leaf -> Verbatim leaf
+    Code leaf -> Code leaf
+    Source elems -> Source $ fmap (first f) <$> elems
+    FoldedSource elems -> FoldedSource $ fmap (first f) <$> elems
+    EvalInline code -> EvalInline $ f code
+    Signature x -> Signature x
+    SignatureInline x -> SignatureInline x
+    Word x -> Word x
+    Group join -> Group $ bimap f g <$> join
+
+-- | This is a deviation from the Unison Doc data model – in Unison, Doc distinguishes between type and term links, but
+--   here Doc knows nothing about what namespaces may exist.
+data EmbedLink ident = EmbedLink (Token ident)
+  deriving (Eq, Ord, Show)
+
+data SourceElement ident a = SourceElement (EmbedLink ident) [EmbedAnnotation ident a]
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+newtype EmbedSignatureLink ident = EmbedSignatureLink (Token ident)
+  deriving (Eq, Ord, Show)
+
+newtype Join a = Join (NonEmpty a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+newtype EmbedAnnotation ident a
+  = EmbedAnnotation (Either (Token ident) a)
+  deriving (Eq, Ord, Show, Foldable, Functor, Traversable)
+
+instance (Annotated code, Annotated a) => Annotated (Top ident code a) where
+  ann = \case
+    Section title body -> ann title <> ann body
+    Eval code -> ann code
+    ExampleBlock code -> ann code
+    CodeBlock label body -> ann label <> ann body
+    BulletedList items -> ann items
+    NumberedList items -> ann $ snd <$> items
+    Paragraph leaves -> ann leaves
+
+instance (Annotated a) => Annotated (Column a) where
+  ann (Column para list) = ann para <> ann list
+
+instance (Annotated code, Annotated a) => Annotated (Leaf ident code a) where
+  ann = \case
+    Link link -> ann link
+    NamedLink label target -> ann label <> ann target
+    Example code -> ann code
+    Transclude code -> ann code
+    Bold para -> ann para
+    Italic para -> ann para
+    Strikethrough para -> ann para
+    Verbatim word -> ann word
+    Code word -> ann word
+    Source elems -> ann elems
+    FoldedSource elems -> ann elems
+    EvalInline code -> ann code
+    Signature links -> ann links
+    SignatureInline link -> ann link
+    Word text -> ann text
+    Group (Join leaves) -> ann leaves
+
+instance Annotated (EmbedLink ident) where
+  ann (EmbedLink name) = ann name
+
+instance (Annotated code) => Annotated (SourceElement ident code) where
+  ann (SourceElement link target) = ann link <> ann target
+
+instance Annotated (EmbedSignatureLink ident) where
+  ann (EmbedSignatureLink name) = ann name
+
+instance (Annotated code) => Annotated (EmbedAnnotation ident code) where
+  ann (EmbedAnnotation a) = either ann ann a
+
+$(deriveEq1 ''Column)
+$(deriveOrd1 ''Column)
+$(deriveShow1 ''Column)
+
+$(deriveEq1 ''Token)
+$(deriveOrd1 ''Token)
+$(deriveShow1 ''Token)
+
+$(deriveEq1 ''EmbedAnnotation)
+$(deriveOrd1 ''EmbedAnnotation)
+$(deriveShow1 ''EmbedAnnotation)
+$(deriveEq2 ''EmbedAnnotation)
+$(deriveOrd2 ''EmbedAnnotation)
+$(deriveShow2 ''EmbedAnnotation)
+
+$(deriveEq1 ''EmbedLink)
+$(deriveOrd1 ''EmbedLink)
+$(deriveShow1 ''EmbedLink)
+
+$(deriveEq1 ''SourceElement)
+$(deriveOrd1 ''SourceElement)
+$(deriveShow1 ''SourceElement)
+$(deriveEq2 ''SourceElement)
+$(deriveOrd2 ''SourceElement)
+$(deriveShow2 ''SourceElement)
+
+$(deriveEq1 ''Join)
+$(deriveOrd1 ''Join)
+$(deriveShow1 ''Join)
+
+$(deriveEq1 ''Leaf)
+$(deriveOrd1 ''Leaf)
+$(deriveShow1 ''Leaf)
+$(deriveEq2 ''Leaf)
+$(deriveOrd2 ''Leaf)
+$(deriveShow2 ''Leaf)
+
+$(deriveEq1 ''Top)
+$(deriveOrd1 ''Top)
+$(deriveShow1 ''Top)
+$(deriveEq2 ''Top)
+$(deriveOrd2 ''Top)
+$(deriveShow2 ''Top)
--- a/unison-syntax/test/Main.hs
+++ b/unison-syntax/test/Main.hs
@ -10,7 +10,7 @@ import Unison.Prelude
 import Unison.ShortHash (ShortHash)
 import Unison.ShortHash qualified as ShortHash
 import Unison.Syntax.HashQualifiedPrime qualified as HQ' (unsafeParseText)
-import Unison.Syntax.Lexer
+import Unison.Syntax.Lexer.Unison

 main :: IO ()
 main =
@ -221,8 +221,8 @@ test =

 t :: String -> [Lexeme] -> Test ()
 t s expected =
-  let actual0 = payload <$> lexer "ignored filename" s
-      actual = take (length actual0 - 2) . drop 1 $ actual0
+  let actual0 = payload <$> preParse (lexer "ignored filename" s)
+      actual = take (length actual0 - 2) . drop 1 $ toList actual0
   in scope s $
        if actual == expected
          then ok
--- a/unison-syntax/unison-syntax.cabal
+++ b/unison-syntax/unison-syntax.cabal
@ -23,9 +23,12 @@ library
      Unison.Syntax.HashQualifiedPrime
      Unison.Syntax.Lexer
      Unison.Syntax.Lexer.Token
+      Unison.Syntax.Lexer.Unison
      Unison.Syntax.Name
      Unison.Syntax.NameSegment
      Unison.Syntax.Parser
+      Unison.Syntax.Parser.Doc
+      Unison.Syntax.Parser.Doc.Data
      Unison.Syntax.ReservedWords
      Unison.Syntax.ShortHash
      Unison.Syntax.Var
@ -68,7 +71,9 @@ library
    , bytes
    , containers
    , cryptonite
+    , deriving-compat
    , extra
+    , free
    , lens
    , megaparsec
    , mtl
@ -125,8 +130,10 @@ test-suite syntax-tests
    , code-page
    , containers
    , cryptonite
+    , deriving-compat
    , easytest
    , extra
+    , free
    , lens
    , megaparsec
    , mtl