Merge pull request #4655 from unisonweb/cp/text-codeblock-fix

Fix raw text-block indentation
2024-09-11 02:25:39 +03:00 · 2024-01-31 18:02:55 -05:00 · 2024-01-31 18:02:55 -05:00 · 9e4bc32b94
commit 9e4bc32b94
parent a162fcc73f 1f67d095f2
9 changed files with 457 additions and 32 deletions
--- a/parser-typechecker/src/Unison/Syntax/TermPrinter.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermPrinter.hs
@ -1737,13 +1737,9 @@ prettyDoc2 ::
 prettyDoc2 ac tm = do
  ppe <- getPPE
  let brace p =
-        fmt S.DocDelimiter "{{"
-          <> PP.softbreak
-          <> p
-          <> PP.softbreak
-          <> fmt
-            S.DocDelimiter
-            "}}"
+        if PP.isMultiLine p
+          then fmt S.DocDelimiter "{{" <> PP.newline <> p <> PP.newline <> fmt S.DocDelimiter "}}"
+          else fmt S.DocDelimiter "{{" <> PP.softbreak <> p <> PP.softbreak <> fmt S.DocDelimiter "}}"
      bail tm = brace <$> pretty0 ac tm
      -- Finds the longest run of a character and return one bigger than that
      longestRun c s =
--- a/unison-src/transcripts-round-trip/main.output.md
+++ b/unison-src/transcripts-round-trip/main.output.md
@ -166,7 +166,8 @@ fix_2224b = cases

 fix_2271 : Doc2
 fix_2271 =
-  {{ # Full doc body indented
+  {{
+  # Full doc body indented
  
    ``` raw
    myVal1 = 42 
@ -179,7 +180,8 @@ fix_2271 =
    indented2="this is two indents"
    ```
    
-    I am two spaces over }}
+    I am two spaces over
+  }}

 Fix_2337.f : Fix_2337 -> Boolean
 Fix_2337.f = cases Fix_2337 a b -> a
@ -304,10 +306,12 @@ fix_4384b = {{ {{ docExampleBlock 0 '99 }} }}
 fix_4384c : Doc2
 fix_4384c =
  use Nat +
-  {{ {{ docExampleBlock 0 do
+  {{
+  {{ docExampleBlock 0 do
    x = 1
    y = 2
-    x + y }} }}
+    x + y }}
+  }}

 fix_4384d : Doc2
 fix_4384d =
@ -432,11 +436,13 @@ multiline_list =

 nested_fences : Doc2
 nested_fences =
-  {{ ```` raw
+  {{
+  ```` raw
  ```unison
  r = "boopydoo"
  ```
-  ```` }}
+  ````
+  }}

 raw_a : Text
 raw_a =
--- a/unison-src/transcripts-using-base/doc.output.md
+++ b/unison-src/transcripts-using-base/doc.output.md
@ -553,7 +553,8 @@ Lastly, it's common to build longer documents including subdocuments via `{{ sub

  doc.guide : Doc2
  doc.guide =
-    {{ # Unison computable documentation
+    {{
+    # Unison computable documentation
    
      {{ basicFormatting }}
      
@ -565,7 +566,8 @@ Lastly, it's common to build longer documents including subdocuments via `{{ sub
      
      {{ nonUnisonCodeBlocks }}
      
-      {{ otherElements }} }}
+      {{ otherElements }}
+    }}

 .> display doc.guide

--- a/unison-src/transcripts/doc2.md
+++ b/unison-src/transcripts/doc2.md
@ -0,0 +1,110 @@
+# Test parsing and round-trip of doc2 syntax elements
+
+```ucm:hide
+.> builtins.mergeio
+```
+
+```unison:hide
+otherDoc : a -> Doc2
+otherDoc _ = {{ yo }}
+
+otherTerm : Nat
+otherTerm = 99
+
+fulldoc : Doc2
+fulldoc =
+  use Nat +
+  {{
+Heres some text with a
+soft line break
+
+hard line break
+
+Here's a cool **BOLD** __italic__ ~~strikethrough~~ thing with an inline code block ''1 + 2''
+
+# Heading
+
+## Heading 2
+
+Term Link: {otherTerm}
+
+Type Link: {type Optional}
+
+Term source:
+
+@source{term}
+
+Term signature:
+
+@signature{term}
+
+* List item
+
+Inline code:
+
+`` 1 + 2 ``
+
+` "doesn't typecheck" + 1 `
+
+[Link](https://unison-lang.org)
+
+![Image](https://share-next.unison-lang.org/static/unison-logo-circle.png)
+
+Horizontal rule
+
+---
+
+Video
+
+{{
+Special
+    (Embed
+    (Any (Video [MediaSource "test.mp4" None] [("poster", "test.png")])))
+}}
+
+Transclusion/evaluation:
+
+{{ otherDoc (a -> Word a) }}
+
+---
+
+The following markdown features aren't supported by the Doc format yet, but maybe will someday
+
+
+> Block quote
+
+
+Table
+
+| Header 1 | Header 2 | 
+| -------- | -------- | 
+| Cell 1 | Cell 2 | 
+
+
+    Indented Code block
+
+'''
+    Exact whitespace should be preserved across multiple updates. Don't mess with the logo!
+
+     _____     _
+    |  |  |___|_|___ ___ ___
+    |  |  |   | |_ -| . |   |
+    |_____|_|_|_|___|___|_|_|
+
+    Line with no whitespace:
+
+    Should have one full trailing newline below here:
+
+'''
+
+Inline '' text literal with 1 space of padding '' in the middle of a sentence.
+
+
+}}
+```
+
+Format it to check that everything pretty-prints in a valid way.
+
+```ucm
+.> debug.format
+```
--- a/unison-src/transcripts/doc2.output.md
+++ b/unison-src/transcripts/doc2.output.md
@ -0,0 +1,201 @@
+# Test parsing and round-trip of doc2 syntax elements
+
+```unison
+otherDoc : a -> Doc2
+otherDoc _ = {{ yo }}
+
+otherTerm : Nat
+otherTerm = 99
+
+fulldoc : Doc2
+fulldoc =
+  use Nat +
+  {{
+Heres some text with a
+soft line break
+
+hard line break
+
+Here's a cool **BOLD** __italic__ ~~strikethrough~~ thing with an inline code block ''1 + 2''
+
+# Heading
+
+## Heading 2
+
+Term Link: {otherTerm}
+
+Type Link: {type Optional}
+
+Term source:
+
+@source{term}
+
+Term signature:
+
+@signature{term}
+
+* List item
+
+Inline code:
+
+`` 1 + 2 ``
+
+` "doesn't typecheck" + 1 `
+
+[Link](https://unison-lang.org)
+
+![Image](https://share-next.unison-lang.org/static/unison-logo-circle.png)
+
+Horizontal rule
+
+---
+
+Video
+
+{{
+Special
+    (Embed
+    (Any (Video [MediaSource "test.mp4" None] [("poster", "test.png")])))
+}}
+
+Transclusion/evaluation:
+
+{{ otherDoc (a -> Word a) }}
+
+---
+
+The following markdown features aren't supported by the Doc format yet, but maybe will someday
+
+
+> Block quote
+
+
+Table
+
+| Header 1 | Header 2 | 
+| -------- | -------- | 
+| Cell 1 | Cell 2 | 
+
+
+    Indented Code block
+
+'''
+    Exact whitespace should be preserved across multiple updates. Don't mess with the logo!
+
+     _____     _
+    |  |  |___|_|___ ___ ___
+    |  |  |   | |_ -| . |   |
+    |_____|_|_|_|___|___|_|_|
+
+    Line with no whitespace:
+
+    Should have one full trailing newline below here:
+
+'''
+
+Inline '' text literal with 1 space of padding '' in the middle of a sentence.
+
+
+}}
+```
+
+Format it to check that everything pretty-prints in a valid way.
+
+```ucm
+.> debug.format
+
+```
+```unison:added-by-ucm scratch.u
+otherDoc : a -> Doc2
+otherDoc _ = {{ yo }}
+
+otherTerm : Nat
+otherTerm = 99
+
+fulldoc : Doc2
+fulldoc =
+  use Nat +
+  {{
+  Heres some text with a soft line break
+  
+  hard line break
+  
+  Here's a cool **BOLD** __italic__ ~~strikethrough~~ thing with an inline code
+  block ''1 + 2''
+  
+  # Heading
+  
+    ## Heading 2
+    
+       Term Link: {otherTerm}
+       
+       Type Link: {type Optional}
+       
+       Term source:
+       
+           @source{term}
+       
+       Term signature:
+       
+           @signature{term}
+       
+       * List item
+       
+       Inline code:
+       
+       `` 1 + 2 ``
+       
+       '' "doesn't typecheck" + 1 ''
+       
+       [Link](https://unison-lang.org)
+       
+       ![Image](https://share-next.unison-lang.org/static/unison-logo-circle.png)
+       
+       Horizontal rule
+       
+       ---
+       
+       Video
+       
+       {{
+       Special
+         (Embed
+           (Any (Video [MediaSource "test.mp4" None] [("poster", "test.png")])))
+       }}
+       
+       Transclusion/evaluation:
+       
+       {{ otherDoc (a -> Word a) }}
+       
+       ---
+       
+       The following markdown features aren't supported by the Doc format yet,
+       but maybe will someday
+       
+       > Block quote
+       
+       Table
+       
+       | Header 1 | Header 2 | | -------- | -------- | | Cell 1 | Cell 2 |
+       
+       Indented Code block
+       
+       '''
+           Exact whitespace should be preserved across multiple updates. Don't mess with the logo!
+       
+            _____     _
+           |  |  |___|_|___ ___ ___
+           |  |  |   | |_ -| . |   |
+           |_____|_|_|_|___|___|_|_|
+       
+           Line with no whitespace:
+       
+           Should have one full trailing newline below here:
+       
+       '''
+       
+       Inline '' text literal with 1 space of padding '' in the middle of a
+       sentence.
+  }}
+```
+
--- a/unison-src/transcripts/formatter.output.md
+++ b/unison-src/transcripts/formatter.output.md
@ -52,11 +52,13 @@ type Two = One Nat | Two Text
 ```
 ```unison:added-by-ucm scratch.u
 x.doc =
-  {{ # Doc This is a **doc**!
+  {{
+  # Doc This is a **doc**!
  
    term link {x}
    
-    type link {type Optional} }}
+    type link {type Optional}
+  }}
 x : Nat -> Nat
 x y =
  use Nat +
@ -70,11 +72,13 @@ oneLiner = {{ one liner }}

 -- Before
 explicit.doc =
-  {{ # Here's a top-level doc
+  {{
+  # Here's a top-level doc
  
    With a paragraph
    
-    Or two }}
+    Or two
+  }}
 -- After

 Thing.doc = {{ A doc before an ability }}
--- a/unison-syntax/package.yaml
+++ b/unison-syntax/package.yaml
@ -9,6 +9,7 @@ dependencies:
  - bytes
  - containers
  - cryptonite
+  - extra
  - lens
  - megaparsec
  - mtl
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@ -38,7 +38,10 @@ import Control.Lens.TH (makePrisms)
 import Control.Monad.State qualified as S
 import Data.Char
 import Data.List
+import Data.List qualified as List
+import Data.List.Extra qualified as List
 import Data.List.NonEmpty qualified as Nel
+import Data.List.NonEmpty qualified as NonEmpty
 import Data.Map.Strict qualified as Map
 import Data.Set qualified as Set
 import Data.Text qualified as Text
@ -524,28 +527,26 @@ lexemes' eof =

        verbatim =
          P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
-            (start, txt, stop) <- positioned $ do
+            (start, originalText, stop) <- positioned $ do
              -- a single backtick followed by a non-backtick is treated as monospaced
              let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
              -- also two or more ' followed by that number of closing '
              quotes <- tick <|> (lit "''" <+> many (P.satisfy (== '\'')))
              P.someTill P.anySingle (lit quotes)
-            if all isSpace $ takeWhile (/= '\n') txt
-              then
+            let isMultiLine = line start /= line stop
+            if isMultiLine
+              then do
+                let trimmed = (trimAroundDelimiters originalText)
+                let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
+                -- If it's a multi-line verbatim block we trim any whitespace representing
+                -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
                wrap "syntax.docVerbatim" $
                  wrap "syntax.docWord" $
-                    pure [Token (Textual (trim txt)) start stop]
+                    pure [Token (Textual txt) start stop]
              else
                wrap "syntax.docCode" $
                  wrap "syntax.docWord" $
-                    pure [Token (Textual txt) start stop]
-
-        trim = f . f
-          where
-            f = reverse . dropThru
-            dropThru = dropNl . dropWhile (\ch -> isSpace ch && ch /= '\n')
-            dropNl ('\n' : t) = t
-            dropNl as = as
+                    pure [Token (Textual originalText) start stop]

        exampleInline =
          P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
@ -629,7 +630,7 @@ lexemes' eof =
              _ <- void CP.eol
              verbatim <-
                tok $
-                  Textual . uncolumn column tabWidth . trim
+                  Textual . uncolumn column tabWidth . trimAroundDelimiters
                    <$> P.someTill P.anySingle ([] <$ lit fence)
              pure (name <> verbatim)

@ -1127,6 +1128,108 @@ lexemes' eof =
          where
            ok c = isDelayOrForce c || isSpace c || isAlphaNum c || Set.member c delimiters || c == '\"'

+-- | If it's a multi-line verbatim block we trim any whitespace representing
+-- indentation from the pretty-printer.
+--
+-- E.g.
+--
+-- @@
+-- {{
+--   # Heading
+--     '''
+--     code
+--       indented
+--     '''
+-- }}
+-- @@
+--
+-- Should lex to the text literal "code\n  indented".
+--
+-- If there's text in the literal that has LESS trailing whitespace than the
+-- opening delimiters, we don't trim it at all. E.g.
+--
+-- @@
+-- {{
+--   # Heading
+--     '''
+--   code
+--     '''
+-- }}
+-- @@
+--
+--  Is parsed as "  code".
+--
+--  Trim the expected amount of whitespace from a text literal:
+--  >>> trimIndentFromVerbatimBlock 2 "  code\n    indented"
+-- "code\n  indented"
+--
+-- If the text literal has less leading whitespace than the opening delimiters,
+-- leave it as-is
+-- >>> trimIndentFromVerbatimBlock 2 "code\n  indented"
+-- "code\n  indented"
+trimIndentFromVerbatimBlock :: Int -> String -> String
+trimIndentFromVerbatimBlock leadingSpaces txt = fromMaybe txt $ do
+  List.intercalate "\n" <$> for (lines txt) \line -> do
+    -- If any 'stripPrefix' fails, we fail and return the unaltered text
+    case stripPrefix (replicate leadingSpaces ' ') line of
+      Just stripped -> Just stripped
+      Nothing ->
+        -- If it was a line with all white-space, just use an empty line,
+        -- this can happen easily in editors which trim trailing whitespace.
+        if all isSpace line
+          then Just ""
+          else Nothing
+
+-- Trim leading/trailing whitespace from around delimiters, e.g.
+--
+-- {{
+--   '''___ <- whitespace here including newline
+--   text block
+-- 👇 or here
+-- __'''
+-- }}
+-- >>> trimAroundDelimiters "  \n  text block \n  "
+-- "  text block "
+--
+-- Should leave leading and trailing line untouched if it contains non-whitespace, e.g.:
+--
+-- '''  leading whitespace
+--   text block
+-- trailing whitespace:  '''
+-- >>> trimAroundDelimiters "  leading whitespace\n  text block \ntrailing whitespace:  "
+-- "  leading whitespace\n  text block \ntrailing whitespace:  "
+--
+-- Should keep trailing newline if it's the only thing on the line, e.g.:
+--
+-- '''
+-- newline below
+--
+-- '''
+-- >>> trimAroundDelimiters "\nnewline below\n\n"
+-- "newline below\n\n"
+trimAroundDelimiters :: String -> String
+trimAroundDelimiters txt =
+  txt
+    & ( \s ->
+          List.breakOn "\n" s
+            & \case
+              (prefix, suffix)
+                | all isSpace prefix -> drop 1 suffix
+                | otherwise -> prefix <> suffix
+      )
+    & ( \s ->
+          List.breakOnEnd "\n" s
+            & \case
+              (_prefix, "") -> s
+              (prefix, suffix)
+                | all isSpace suffix -> dropTrailingNewline prefix
+                | otherwise -> prefix <> suffix
+      )
+  where
+    dropTrailingNewline = \case
+      [] -> []
+      (x : xs) -> NonEmpty.init (x NonEmpty.:| xs)
+
 separated :: (Char -> Bool) -> P a -> P a
 separated ok p = P.try $ p <* P.lookAhead (void (P.satisfy ok) <|> P.eof)

--- a/unison-syntax/unison-syntax.cabal
+++ b/unison-syntax/unison-syntax.cabal
@ -63,6 +63,7 @@ library
    , bytes
    , containers
    , cryptonite
+    , extra
    , lens
    , megaparsec
    , mtl
@ -118,6 +119,7 @@ test-suite syntax-tests
    , containers
    , cryptonite
    , easytest
+    , extra
    , lens
    , megaparsec
    , mtl