Merge pull request #4655 from unisonweb/cp/text-codeblock-fix

Fix raw text-block indentation
This commit is contained in:
Arya Irani 2024-01-31 18:02:55 -05:00 committed by GitHub
commit 9e4bc32b94
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 457 additions and 32 deletions

View File

@ -1737,13 +1737,9 @@ prettyDoc2 ::
prettyDoc2 ac tm = do
ppe <- getPPE
let brace p =
fmt S.DocDelimiter "{{"
<> PP.softbreak
<> p
<> PP.softbreak
<> fmt
S.DocDelimiter
"}}"
if PP.isMultiLine p
then fmt S.DocDelimiter "{{" <> PP.newline <> p <> PP.newline <> fmt S.DocDelimiter "}}"
else fmt S.DocDelimiter "{{" <> PP.softbreak <> p <> PP.softbreak <> fmt S.DocDelimiter "}}"
bail tm = brace <$> pretty0 ac tm
-- Finds the longest run of a character and return one bigger than that
longestRun c s =

View File

@ -166,7 +166,8 @@ fix_2224b = cases
fix_2271 : Doc2
fix_2271 =
{{ # Full doc body indented
{{
# Full doc body indented
``` raw
myVal1 = 42
@ -179,7 +180,8 @@ fix_2271 =
indented2="this is two indents"
```
I am two spaces over }}
I am two spaces over
}}
Fix_2337.f : Fix_2337 -> Boolean
Fix_2337.f = cases Fix_2337 a b -> a
@ -304,10 +306,12 @@ fix_4384b = {{ {{ docExampleBlock 0 '99 }} }}
fix_4384c : Doc2
fix_4384c =
use Nat +
{{ {{ docExampleBlock 0 do
{{
{{ docExampleBlock 0 do
x = 1
y = 2
x + y }} }}
x + y }}
}}
fix_4384d : Doc2
fix_4384d =
@ -432,11 +436,13 @@ multiline_list =
nested_fences : Doc2
nested_fences =
{{ ```` raw
{{
```` raw
```unison
r = "boopydoo"
```
```` }}
````
}}
raw_a : Text
raw_a =

View File

@ -553,7 +553,8 @@ Lastly, it's common to build longer documents including subdocuments via `{{ sub
doc.guide : Doc2
doc.guide =
{{ # Unison computable documentation
{{
# Unison computable documentation
{{ basicFormatting }}
@ -565,7 +566,8 @@ Lastly, it's common to build longer documents including subdocuments via `{{ sub
{{ nonUnisonCodeBlocks }}
{{ otherElements }} }}
{{ otherElements }}
}}
.> display doc.guide

View File

@ -0,0 +1,110 @@
# Test parsing and round-trip of doc2 syntax elements
```ucm:hide
.> builtins.mergeio
```
```unison:hide
otherDoc : a -> Doc2
otherDoc _ = {{ yo }}
otherTerm : Nat
otherTerm = 99
fulldoc : Doc2
fulldoc =
use Nat +
{{
Heres some text with a
soft line break
hard line break
Here's a cool **BOLD** __italic__ ~~strikethrough~~ thing with an inline code block ''1 + 2''
# Heading
## Heading 2
Term Link: {otherTerm}
Type Link: {type Optional}
Term source:
@source{term}
Term signature:
@signature{term}
* List item
Inline code:
`` 1 + 2 ``
` "doesn't typecheck" + 1 `
[Link](https://unison-lang.org)
![Image](https://share-next.unison-lang.org/static/unison-logo-circle.png)
Horizontal rule
---
Video
{{
Special
(Embed
(Any (Video [MediaSource "test.mp4" None] [("poster", "test.png")])))
}}
Transclusion/evaluation:
{{ otherDoc (a -> Word a) }}
---
The following markdown features aren't supported by the Doc format yet, but maybe will someday
> Block quote
Table
| Header 1 | Header 2 |
| -------- | -------- |
| Cell 1 | Cell 2 |
Indented Code block
'''
Exact whitespace should be preserved across multiple updates. Don't mess with the logo!
_____ _
| | |___|_|___ ___ ___
| | | | |_ -| . | |
|_____|_|_|_|___|___|_|_|
Line with no whitespace:
Should have one full trailing newline below here:
'''
Inline '' text literal with 1 space of padding '' in the middle of a sentence.
}}
```
Format it to check that everything pretty-prints in a valid way.
```ucm
.> debug.format
```

View File

@ -0,0 +1,201 @@
# Test parsing and round-trip of doc2 syntax elements
```unison
otherDoc : a -> Doc2
otherDoc _ = {{ yo }}
otherTerm : Nat
otherTerm = 99
fulldoc : Doc2
fulldoc =
use Nat +
{{
Heres some text with a
soft line break
hard line break
Here's a cool **BOLD** __italic__ ~~strikethrough~~ thing with an inline code block ''1 + 2''
# Heading
## Heading 2
Term Link: {otherTerm}
Type Link: {type Optional}
Term source:
@source{term}
Term signature:
@signature{term}
* List item
Inline code:
`` 1 + 2 ``
` "doesn't typecheck" + 1 `
[Link](https://unison-lang.org)
![Image](https://share-next.unison-lang.org/static/unison-logo-circle.png)
Horizontal rule
---
Video
{{
Special
(Embed
(Any (Video [MediaSource "test.mp4" None] [("poster", "test.png")])))
}}
Transclusion/evaluation:
{{ otherDoc (a -> Word a) }}
---
The following markdown features aren't supported by the Doc format yet, but maybe will someday
> Block quote
Table
| Header 1 | Header 2 |
| -------- | -------- |
| Cell 1 | Cell 2 |
Indented Code block
'''
Exact whitespace should be preserved across multiple updates. Don't mess with the logo!
_____ _
| | |___|_|___ ___ ___
| | | | |_ -| . | |
|_____|_|_|_|___|___|_|_|
Line with no whitespace:
Should have one full trailing newline below here:
'''
Inline '' text literal with 1 space of padding '' in the middle of a sentence.
}}
```
Format it to check that everything pretty-prints in a valid way.
```ucm
.> debug.format
```
```unison:added-by-ucm scratch.u
otherDoc : a -> Doc2
otherDoc _ = {{ yo }}
otherTerm : Nat
otherTerm = 99
fulldoc : Doc2
fulldoc =
use Nat +
{{
Heres some text with a soft line break
hard line break
Here's a cool **BOLD** __italic__ ~~strikethrough~~ thing with an inline code
block ''1 + 2''
# Heading
## Heading 2
Term Link: {otherTerm}
Type Link: {type Optional}
Term source:
@source{term}
Term signature:
@signature{term}
* List item
Inline code:
`` 1 + 2 ``
'' "doesn't typecheck" + 1 ''
[Link](https://unison-lang.org)
![Image](https://share-next.unison-lang.org/static/unison-logo-circle.png)
Horizontal rule
---
Video
{{
Special
(Embed
(Any (Video [MediaSource "test.mp4" None] [("poster", "test.png")])))
}}
Transclusion/evaluation:
{{ otherDoc (a -> Word a) }}
---
The following markdown features aren't supported by the Doc format yet,
but maybe will someday
> Block quote
Table
| Header 1 | Header 2 | | -------- | -------- | | Cell 1 | Cell 2 |
Indented Code block
'''
Exact whitespace should be preserved across multiple updates. Don't mess with the logo!
_____ _
| | |___|_|___ ___ ___
| | | | |_ -| . | |
|_____|_|_|_|___|___|_|_|
Line with no whitespace:
Should have one full trailing newline below here:
'''
Inline '' text literal with 1 space of padding '' in the middle of a
sentence.
}}
```

View File

@ -52,11 +52,13 @@ type Two = One Nat | Two Text
```
```unison:added-by-ucm scratch.u
x.doc =
{{ # Doc This is a **doc**!
{{
# Doc This is a **doc**!
term link {x}
type link {type Optional} }}
type link {type Optional}
}}
x : Nat -> Nat
x y =
use Nat +
@ -70,11 +72,13 @@ oneLiner = {{ one liner }}
-- Before
explicit.doc =
{{ # Here's a top-level doc
{{
# Here's a top-level doc
With a paragraph
Or two }}
Or two
}}
-- After
Thing.doc = {{ A doc before an ability }}

View File

@ -9,6 +9,7 @@ dependencies:
- bytes
- containers
- cryptonite
- extra
- lens
- megaparsec
- mtl

View File

@ -38,7 +38,10 @@ import Control.Lens.TH (makePrisms)
import Control.Monad.State qualified as S
import Data.Char
import Data.List
import Data.List qualified as List
import Data.List.Extra qualified as List
import Data.List.NonEmpty qualified as Nel
import Data.List.NonEmpty qualified as NonEmpty
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as Text
@ -524,28 +527,26 @@ lexemes' eof =
verbatim =
P.label "code (examples: ''**unformatted**'', `words` or '''_words_''')" $ do
(start, txt, stop) <- positioned $ do
(start, originalText, stop) <- positioned $ do
-- a single backtick followed by a non-backtick is treated as monospaced
let tick = P.try (lit "`" <* P.lookAhead (P.satisfy (/= '`')))
-- also two or more ' followed by that number of closing '
quotes <- tick <|> (lit "''" <+> many (P.satisfy (== '\'')))
P.someTill P.anySingle (lit quotes)
if all isSpace $ takeWhile (/= '\n') txt
then
let isMultiLine = line start /= line stop
if isMultiLine
then do
let trimmed = (trimAroundDelimiters originalText)
let txt = trimIndentFromVerbatimBlock (column start - 1) trimmed
-- If it's a multi-line verbatim block we trim any whitespace representing
-- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock'
wrap "syntax.docVerbatim" $
wrap "syntax.docWord" $
pure [Token (Textual (trim txt)) start stop]
pure [Token (Textual txt) start stop]
else
wrap "syntax.docCode" $
wrap "syntax.docWord" $
pure [Token (Textual txt) start stop]
trim = f . f
where
f = reverse . dropThru
dropThru = dropNl . dropWhile (\ch -> isSpace ch && ch /= '\n')
dropNl ('\n' : t) = t
dropNl as = as
pure [Token (Textual originalText) start stop]
exampleInline =
P.label "inline code (examples: ``List.map f xs``, ``[1] :+ 2``)" $
@ -629,7 +630,7 @@ lexemes' eof =
_ <- void CP.eol
verbatim <-
tok $
Textual . uncolumn column tabWidth . trim
Textual . uncolumn column tabWidth . trimAroundDelimiters
<$> P.someTill P.anySingle ([] <$ lit fence)
pure (name <> verbatim)
@ -1127,6 +1128,108 @@ lexemes' eof =
where
ok c = isDelayOrForce c || isSpace c || isAlphaNum c || Set.member c delimiters || c == '\"'
-- | If it's a multi-line verbatim block we trim any whitespace representing
-- indentation from the pretty-printer.
--
-- E.g.
--
-- @@
-- {{
-- # Heading
-- '''
-- code
-- indented
-- '''
-- }}
-- @@
--
-- Should lex to the text literal "code\n indented".
--
-- If there's text in the literal that has LESS trailing whitespace than the
-- opening delimiters, we don't trim it at all. E.g.
--
-- @@
-- {{
-- # Heading
-- '''
-- code
-- '''
-- }}
-- @@
--
-- Is parsed as " code".
--
-- Trim the expected amount of whitespace from a text literal:
-- >>> trimIndentFromVerbatimBlock 2 " code\n indented"
-- "code\n indented"
--
-- If the text literal has less leading whitespace than the opening delimiters,
-- leave it as-is
-- >>> trimIndentFromVerbatimBlock 2 "code\n indented"
-- "code\n indented"
trimIndentFromVerbatimBlock :: Int -> String -> String
trimIndentFromVerbatimBlock leadingSpaces txt = fromMaybe txt $ do
List.intercalate "\n" <$> for (lines txt) \line -> do
-- If any 'stripPrefix' fails, we fail and return the unaltered text
case stripPrefix (replicate leadingSpaces ' ') line of
Just stripped -> Just stripped
Nothing ->
-- If it was a line with all white-space, just use an empty line,
-- this can happen easily in editors which trim trailing whitespace.
if all isSpace line
then Just ""
else Nothing
-- Trim leading/trailing whitespace from around delimiters, e.g.
--
-- {{
-- '''___ <- whitespace here including newline
-- text block
-- 👇 or here
-- __'''
-- }}
-- >>> trimAroundDelimiters " \n text block \n "
-- " text block "
--
-- Should leave leading and trailing line untouched if it contains non-whitespace, e.g.:
--
-- ''' leading whitespace
-- text block
-- trailing whitespace: '''
-- >>> trimAroundDelimiters " leading whitespace\n text block \ntrailing whitespace: "
-- " leading whitespace\n text block \ntrailing whitespace: "
--
-- Should keep trailing newline if it's the only thing on the line, e.g.:
--
-- '''
-- newline below
--
-- '''
-- >>> trimAroundDelimiters "\nnewline below\n\n"
-- "newline below\n\n"
trimAroundDelimiters :: String -> String
trimAroundDelimiters txt =
txt
& ( \s ->
List.breakOn "\n" s
& \case
(prefix, suffix)
| all isSpace prefix -> drop 1 suffix
| otherwise -> prefix <> suffix
)
& ( \s ->
List.breakOnEnd "\n" s
& \case
(_prefix, "") -> s
(prefix, suffix)
| all isSpace suffix -> dropTrailingNewline prefix
| otherwise -> prefix <> suffix
)
where
dropTrailingNewline = \case
[] -> []
(x : xs) -> NonEmpty.init (x NonEmpty.:| xs)
separated :: (Char -> Bool) -> P a -> P a
separated ok p = P.try $ p <* P.lookAhead (void (P.satisfy ok) <|> P.eof)

View File

@ -63,6 +63,7 @@ library
, bytes
, containers
, cryptonite
, extra
, lens
, megaparsec
, mtl
@ -118,6 +119,7 @@ test-suite syntax-tests
, containers
, cryptonite
, easytest
, extra
, lens
, megaparsec
, mtl