diff --git a/gen/cram.hoon b/gen/cram.hoon new file mode 100644 index 000000000..1719b9404 --- /dev/null +++ b/gen/cram.hoon @@ -0,0 +1,59 @@ +:: +:::: hoon/cram/gen + :: + :: test generator for the cram markdown syntax + :: + :: todo: integrate with ++sail and embed in hoon compiler + :: + :: ++cram is a simple markdown-inspired parser that makes + :: common html tropes easy to type. you can think of ++cram + :: as "rational markdown" or "markdown with syntax errors." + :: a document format should be easy to type and read, but + :: that doesn't mean it can't or have rigorous syntax. + :: + :: tldr: ++cram is indent-oriented. indent 2 spaces for + :: a dynamic interpolation, 4 spaces for example code, 6 + :: spaces for a blockquote and 8 spaces for verse. separate + :: every semantic block by a blank line. use - for + :: unordered lists, + for ordered lists. + :: + :: markdown link syntax works. * means bold, _ means + :: italics, "" inserts smart quotes. all enclosed + :: strings are reparsed; escape the terminator within + :: the string, eg, *star \* in bold text*. + :: + :: markdown `literal` syntax is supported, but all hoon + :: constants are automatically marked as code. also, any + :: hoon expression prefixed with # is a code literal. + :: + :: (++cram is a valid hoon parsing rule, but it does a lot + :: of custom processing internally, since the language is + :: context-sensitive. we use a context-sensitive parser + :: to cut the lines into blocks, then reparse flow blocks + :: with normal hoon rules. multipass parsing is the tax + :: humans have to pay for simple but human-friendly syntax.) + :: +::|= inp/cord +::=< (steam-marl (rash inp apex:(sail &))) +=< |=(pax/path (test pax)) +|% :: +++ test :: test text parsing + |= pax/path + ^- tape + :: + :: src: text file as (list cord) + :: txt: source as tape with newlines + :: vex: parsing result + :: + =/ src .^(wain %cx pax) + =. src ['---' src] + =/ txt (zing (turn src |=(@t (weld (rip 3 +<) `tape`~[`@`10])))) + =/ vex (cram:vast [1 1] txt) + :: + :: print result as error or xml text + ?~ q.vex + "syntax error: line {(scow %ud p.p.vex)}, column {(scow %ud q.p.vex)}" + ?: [freeze=|] (poxo (snag 1 ~(shut ap p.u.q.vex))) + (poxo ;;(manx q:(slap !>(..zuse) p.u.q.vex))) +:: +-- diff --git a/lib/vast2.hoon b/lib/vast2.hoon new file mode 100644 index 000000000..e8862852c --- /dev/null +++ b/lib/vast2.hoon @@ -0,0 +1,1823 @@ + =+ [bug=`?`| was=*(set path) wer=*path] + |% + ++ gash %+ cook + |= a/(list tyke) ^- tyke + ?~(a ~ (weld i.a $(a t.a))) + (more fas gasp) + ++ gasp ;~ pose + %+ cook + |=({a/tyke b/tyke c/tyke} :(weld a b c)) + ;~ plug + (cook |=(a/(list) (turn a |=(b/* ~))) (star tis)) + (cook |=(a/twig [[~ a] ~]) hasp) + (cook |=(a/(list) (turn a |=(b/* ~))) (star tis)) + == + (cook |=(a/(list) (turn a |=(b/* ~))) (plus tis)) + == + ++ glam ~+((glue ace)) + ++ hasp ;~ pose + (ifix [sel ser] wide) + (stag %call (ifix [pel per] (most ace wide))) + (stag %sand (stag %t qut)) + %+ cook + |=(a/coin [%sand ?:(?=({$~ $tas *} a) %tas %ta) ~(rent co a)]) + nuck:so + == + ++ mota %+ cook + |=({a/tape b/tape} (rap 3 (weld a b))) + ;~(plug (star low) (star hig)) + :: + ++ plex + |= gen/twig ^- (unit path) + ?: ?=({$dbug *} gen) + $(gen q.gen) + ?. ?=({$conl *} gen) ~ + %+ reel p.gen + |= {a/twig b/_`(unit path)`[~ u=/]} + ?~ b ~ + ?. ?=({$sand ?($ta $tas) @} a) ~ + `[q.a u.b] + :: + ++ pray + |= gen/twig ~| %pray ^- (unit twig) + ~& [%pray-disabled gen] + !! + :: + ++ prey + |= gun/(list twig) ^- (unit twig) + ?~ gun `[%$ 1] + =+ gup=(pray i.gun) + ?~ gup ~ + ?~ t.gun gup + (bind $(gun t.gun) |=(a/twig [%per u.gup a])) + :: + ++ phax + |= ruw/(list (list beer)) + =+ [yun=*(list twig) cah=*(list @)] + =+ wod=|=({a/tape b/(list twig)} ^+(b ?~(a b [[%nub %knit (flop a)] b]))) + |- ^+ yun + ?~ ruw + (flop (wod cah yun)) + ?~ i.ruw $(ruw t.ruw) + ?@ i.i.ruw + $(i.ruw t.i.ruw, cah [i.i.ruw cah]) + $(i.ruw t.i.ruw, cah ~, yun [p.i.i.ruw (wod cah yun)]) + :: + ++ posh + |= {pre/(unit tyke) pof/(unit {p/@ud q/tyke})} + ^- (unit (list twig)) + =- ?^(- - ~&(%posh-fail -)) + =+ wom=(poof wer) + %+ biff + ?~ pre `u=wom + %+ bind (poon wom u.pre) + |= moz/(list twig) + ?~(pof moz (weld moz (slag (lent u.pre) wom))) + |= yez/(list twig) + ?~ pof `yez + =+ zey=(flop yez) + =+ [moz=(scag p.u.pof zey) gul=(slag p.u.pof zey)] + =+ zom=(poon (flop moz) q.u.pof) + ?~(zom ~ `(weld (flop gul) u.zom)) + :: + ++ poof |=(pax/path ^-((list twig) (turn pax |=(a/@ta [%sand %ta a])))) + ++ poon + |= {pag/(list twig) goo/tyke} + ^- (unit (list twig)) + ?~ goo `~ + %+ both + ?^(i.goo i.goo ?~(pag ~ `u=i.pag)) + $(goo t.goo, pag ?~(pag ~ t.pag)) + :: + ++ poor + %+ sear posh + ;~ plug + (stag ~ gash) + ;~(pose (stag ~ ;~(pfix cen porc)) (easy ~)) + == + :: + ++ porc + ;~ plug + (cook |=(a/(list) (lent a)) (star cen)) + ;~(pfix fas gash) + == + :: + ++ rump + %+ sear + |= {a/wing b/(unit twig)} ^- (unit twig) + ?~(b [~ %wing a] ?.(?=({@ $~} a) ~ [~ [%rock %tas i.a] u.b])) + ;~(plug rope ;~(pose (stag ~ ;~(pfix lus wide)) (easy ~))) + :: + ++ rood + ;~ pfix fas + (stag %conl poor) + == + :: + ++ rupl + %+ cook + |= {a/? b/(list twig) c/?} + ?: a + ?: c + [%conl [%conl b] ~] + [%conl b] + ?: c + [%conl [%conp b] ~] + [%conp b] + ;~ plug + ;~ pose + (cold | (just '[')) + (cold & (jest '~[')) + == + :: + ;~ pose + (ifix [ace gap] (most gap tall)) + (most ace wide) + == + :: + ;~ pose + (cold & (jest ']~')) + (cold | (just ']')) + == + == + :: + :: + ++ cram :: parse unmark + => |% + ++ item (pair mite marl:twig) :: xml node generator + ++ colm @ud :: column + ++ tarp marl:twig :: node or generator + ++ mite :: context + $? $down :: outer embed + $lunt :: unordered list + $lime :: list item + $lord :: ordered list + $poem :: verse + $bloc :: blockquote + $head :: heading + == :: + ++ trig :: line style + $: col/@ud :: start column + sty/trig-style :: style + == :: + ++ trig-style :: type of parsed line + $% $: $end :: terminator + $? $done :: end of input + $stet :: == end of markdown + $dent :: outdent + == == :: + $: $one :: leaf node + $? $rule :: --- horz rule + $fens :: ``` code fence + $expr :: ;sail expression + == == :: + {$new p/trig-new} :: open container + {$old $text} :: anything else + == :: + ++ trig-new :: start a + $? $lite :: + line item + $lint :: - line item + $head :: # heading + $bloc :: > block-quote + $poem :: [ ]{8} poem + == :: + ++ graf :: paragraph element + $% {$bold p/(list graf)} :: *bold* + {$talc p/(list graf)} :: _italics_ + {$quod p/(list graf)} :: "double quote" + {$code p/tape} :: code literal + {$text p/tape} :: text symbol + {$link p/(list graf) q/tape} :: URL + {$expr p/tuna:twig} :: interpolated hoon + == + -- + =< (non-empty:parse |=(nail `(like tarp)`~($ main +<))) + |% + ++ main + :: + :: state of the parsing loop. we maintain a construction + :: stack for elements and a line stack for lines in the + :: current block. a blank line causes the current block + :: to be parsed and thrown in the current element. when + :: the indent column retreats, the element stack rolls up. + :: + :: verbose: debug printing enabled + :: err: error position + :: ind: outer and inner indent level + :: hac: stack of items under construction + :: cur: current item under construction + :: par: current "paragraph" being read in + :: [loc txt]: parsing state + :: + =/ verbose & + =| err/(unit hair) + =| ind/{out/@ud inr/@ud} + =| hac/(list item) + =/ cur/item [%down ~] + =| par/(unit (pair hair wall)) + |_ {loc/hair txt/tape} + :: + ++ $ :: resolve + ^- (like tarp) + => line + :: + :: if error position is set, produce error + ?. =(~ err) + ~& err+err + [+.err ~] + :: + :: all data was consumed + =- [loc `[- [loc txt]]] + => close-par + |- ^- tarp + :: + :: fold all the way to top + ?~ hac cur-to-tarp + $(..^$ close-item) + :: + ::+| + :: + ++ cur-indent + ?- p.cur + $down 2 + $head 0 + $lunt 0 + $lime 2 + $lord 0 + $poem 8 + $bloc 2 + == + :: + ++ back :: column retreat + |= luc/@ud + ^+ +> + ?: (gte luc inr.ind) +> + :: + :: nex: next backward step that terminates this context + =/ nex/@ud cur-indent ::REVIEW code and poem blocks are handled elsewhere + ?: (gth nex (sub inr.ind luc)) + :: + :: indenting pattern violation + ~? verbose indent-pattern-violation+[p.cur nex inr.ind luc] + ..^$(inr.ind luc, err `[p.loc luc]) + =. ..^$ close-item + $(inr.ind (sub inr.ind nex)) + :: + ++ cur-to-tarp :: item to tarp + ^- tarp + ?: ?=(?($down $head $expr) p.cur) + (flop q.cur) + =- [[- ~] (flop q.cur)]~ + ?- p.cur + $lunt %ul + $lord %ol + $lime %li + $poem %div ::REVIEW actual container element? + $bloc %blockquote + == + :: + ++ close-item ^+ . :: complete and pop + ?~ hac . + %= . + hac t.hac + cur [p.i.hac (weld cur-to-tarp q.i.hac)] + == + :: + ++ read-line :: capture raw line + =| lin/tape + |- ^+ [[lin *(unit _err)] +<.^$] :: parsed tape and halt/error + :: + :: no unterminated lines + ?~ txt + ~? verbose %unterminated-line + [[~ ``loc] +<.^$] + ?. =(`@`10 i.txt) + ?: (gth inr.ind q.loc) + ?. =(' ' i.txt) + ~? verbose expected-indent+[inr.ind loc txt] + [[~ ``loc] +<.^$] + $(txt t.txt, q.loc +(q.loc)) + :: + :: save byte and repeat + $(txt t.txt, q.loc +(q.loc), lin [i.txt lin]) + =. lin + :: + :: trim trailing spaces + |- ^- tape + ?: ?=({$' ' *} lin) + $(lin t.lin) + (flop lin) + :: + =/ eat-newline/nail [[+(p.loc) 1] t.txt] + =/ saw look(+<.$ eat-newline) + :: + ?: ?=({$~ @ $end ?($stet $dent)} saw) :: stop on == or dedent + [[lin `~] +<.^$] + [[lin ~] eat-newline] + :: + ++ look :: inspect line + ^- (unit trig) + %+ bind (wonk (look:parse loc txt)) + |= a/trig ^+ a + :: + :: treat a non-terminator as a terminator + :: if it's outdented + ?: =(%end -.sty.a) a + ?: (lth col.a out.ind) + a(sty [%end %dent]) + a + :: + ++ close-par :: make block + ^+ . + :: + :: empty block, no action + ?~ par . + :: + :: if block is verse + ?: ?=($poem p.cur) + :: + :: add break between stanzas + =. q.cur ?~(q.cur q.cur [[[%br ~] ~] q.cur]) + =- close-item(par ~, q.cur (weld - q.cur), inr.ind (sub inr.ind 8)) + %+ turn q.u.par + |= tape ^- manx + :: + :: each line is a paragraph + :- [%p ~] + :_ ~ + ;/("{+<}\0a") + :: + :: yex: block recomposed, with newlines + =/ yex/tape + (zing (turn (flop q.u.par) |=(a/tape (runt [(dec inr.ind) ' '] "{a}\0a")))) + :: + :: vex: parse of paragraph + =/ vex/(like tarp) + :: + :: either a one-line header or a paragraph + %. [p.u.par yex] + ?: ?=($head p.cur) + (full head:parse) + (full para:parse) + :: + :: if error, propagate correctly + ?~ q.vex + ~? verbose [%close-par p.cur yex] + ..$(err `p.vex) + :: + :: finish tag if it's a header + =< ?:(?=($head p.cur) close-item ..$) + :: + :: save good result, clear buffer + ..$(par ~, q.cur (weld p.u.q.vex q.cur)) + :: + ++ line ^+ . :: body line loop + :: + :: abort after first error + ?: !=(~ err) . + :: + :: saw: profile of this line + =/ saw look + ~? [debug=|] [%look ind=ind saw=saw txt=txt] + :: + :: if line is blank + ?~ saw + :: + :: break section + =^ a/{tape fin/(unit _err)} +<.$ read-line + ?^ fin.a + ..$(err u.fin.a) + =>(close-par line) + :: + :: line is not blank + => .(saw u.saw) + :: + :: if end of input, complete + ?: ?=($end -.sty.saw) + ..$(q.loc col.saw) + :: + =. ind ?~(out.ind [col.saw col.saw] ind) :: init indents + :: + ?: ?| ?=($~ par) :: if after a paragraph or + ?& ?=(?($down $lime $bloc) p.cur) :: unspaced new container + |(!=(%old -.sty.saw) (gth col.saw inr.ind)) + == == + => .(..$ close-par) + :: + :: if column has retreated, adjust stack + =. ..$ (back col.saw) + :: + =^ col-ok sty.saw + ?+ (sub col.saw inr.ind) [| sty.saw] :: columns advanced + $0 [& sty.saw] + $8 [& %new %poem] + == + ?. col-ok + ~? verbose [%columns-advanced col.saw inr.ind] + ..$(err `[p.loc col.saw]) + :: + =. inr.ind col.saw + :: + :: unless adding a matching item, close lists + =. ..$ + ?: ?| &(?=($lunt p.cur) !?=($lint +.sty.saw)) + &(?=($lord p.cur) !?=($lite +.sty.saw)) + == + close-item + ..$ + :: + =< line(par `[loc ~]) ^+ ..$ :: continue with para + ?- -.sty.saw + $one (read-one +.sty.saw) :: parse leaves + $new (open-item p.sty.saw) :: open containers + $old ..$ :: just text + == + :: + :: + ::- - - foo + :: detect bad block structure + ?. :: first line of container is legal + ?~ q.u.par & + ?- p.cur + :: + :: can't(/directly) contain text + ?($lord $lunt) ~|(bad-leaf-container+p.cur !!) + :: + :: only one line in a header + $head | + :: + :: indented literals need to end with a blank line + $poem (gte col.saw inr.ind) + :: + :: text tarps must continue aligned + ?($down $lunt $lime $lord $bloc) =(col.saw inr.ind) + == + ~? verbose bad-block-structure+[p.cur inr.ind col.saw] + ..$(err `[p.loc col.saw]) + :: + :: accept line and maybe continue + =^ a/{lin/tape fin/(unit _err)} +<.$ read-line + =. par par(q.u [lin.a q.u.par]) + ?^ fin.a ..$(err u.fin.a) + line + :: + ++ parse-block :: execute parser + |= fel/$-(nail (like tarp)) ^+ +> + =/ vex/(like tarp) (fel loc txt) + ?~ q.vex + ~? verbose [%parse-block txt] + +>.$(err `p.vex) + =+ [res loc txt]=u.q.vex + %_ +>.$ + loc loc + txt txt + q.cur (weld (flop `tarp`res) q.cur) :: prepend to the stack + == + :: + ++ read-one :: read %one item + |= sty/?($expr $rule $fens) ^+ +> + ?- sty + $expr (parse-block expr:parse) + $rule (parse-block hrul:parse) + $fens (parse-block (fens:parse inr.ind)) + == + :: + ++ open-item :: enter list/quote + |= saw/trig-new + =< +>.$:apex + |% + ++ apex ^+ . :: open container + ?- saw + $poem (push %poem) :: verse literal + $head (push %head) :: heading + $bloc (entr %bloc) :: blockquote line + $lint (lent %lunt) :: unordered list + $lite (lent %lord) :: ordered list + == + :: + ++ push :: push context + |=(mite +>(hac [cur hac], cur [+< ~])) + :: + ++ entr :: enter container + |= typ/mite + ^+ +> + :: + :: indent by 2 + =. inr.ind (add 2 inr.ind) + :: + :: "parse" marker + =. txt (slag (sub inr.ind q.loc) txt) + =. q.loc inr.ind + :: + (push typ) + :: + ++ lent :: list entry + |= ord/?($lord $lunt) + ^+ +> + => ?:(=(ord p.cur) +>.$ (push ord)) :: push list if new + (entr %lime) + -- + -- + :: + ++ parse :: individual parsers + |% + ++ look :: classify line + %+ cook |=(a/(unit trig) a) + ;~ pfix (star ace) + %+ here :: report indent + |=({a/pint b/?($~ trig-style)} ?~(b ~ `[q.p.a b])) + ;~ pose + (cold ~ (just `@`10)) :: blank line + :: + (full (easy [%end %done])) :: end of input + (cold [%end %stet] duz) :: == end of markdown + :: + (cold [%one %rule] ;~(plug hep hep hep)) :: --- horizontal ruler + (cold [%one %fens] ;~(plug tec tec tec)) :: ``` code fence + (cold [%one %expr] sem) :: ;sail expression + :: + (cold [%new %head] ;~(plug (star hax) ace)) :: # heading + (cold [%new %lint] ;~(plug hep ace)) :: - line item + (cold [%new %lite] ;~(plug lus ace)) :: + line item + (cold [%new %bloc] ;~(plug gar ace)) :: > block-quote + :: + (easy [%old %text]) :: anything else + == + == + :: + :: + ++ cash :: escaped fence + |* tem/rule + %- echo + %- star + ;~ pose + whit + ;~(plug bas tem) + ;~(less tem prn) + == + :: + ++ cool :: reparse + |* $: :: fex: primary parser + :: sab: secondary parser + :: + fex/rule + sab/rule + == + |= {loc/hair txt/tape} + ^+ *sab + :: + :: vex: fenced span + =/ vex/(like tape) (fex loc txt) + ?~ q.vex vex + :: + :: hav: reparse full fenced text + =/ hav ((full sab) [loc p.u.q.vex]) + :: + :: reparsed error position is always at start + ?~ q.hav [loc ~] + :: + :: the complete span with the main product + :- p.vex + `[p.u.q.hav q.u.q.vex] + :: + ::REVIEW surely there is a less hacky "first or after space" solution + ++ easy-sol :: parse start of line + |* a/* + |= b/nail + ?: =(1 q.p.b) ((easy a) b) + (fail b) + :: + ++ echo :: hoon literal + |* sab/rule + |= {loc/hair txt/tape} + ^- (like tape) + :: + :: vex: result of parsing wide twig + =/ vex (sab loc txt) + :: + :: use result of expression parser + ?~ q.vex vex + =- [p.vex `[- q.u.q.vex]] + :: + :: but replace payload with bytes consumed + |- ^- tape + ?: =(q.q.u.q.vex txt) ~ + ?~ txt ~ + [i.txt $(txt +.txt)] + :: + ++ non-empty + |* a/rule + |= tub/nail ^+ (a) + =/ vex (a tub) + ~! vex + ?~ q.vex vex + ?. =(tub q.u.q.vex) vex + (fail tub) + :: + :: + ++ word :: tarp parser + %+ knee *(list graf) |. ~+ + %+ cook |=(a/?(graf (list graf)) ?+(a a {@ *} [a]~)) + ;~ pose + :: + :: ordinary word + :: + %+ stag %text + ;~(plug ;~(pose low hig) (star ;~(pose nud low hig hep))) + :: + :: naked \escape + :: + (stag %text ;~(pfix bas (cook trip ;~(less ace prn)))) + :: + :: trailing \ to add
+ :: + (stag %expr (cold [[%br ~] ~] ;~(plug bas (just '\0a')))) + :: + :: *bold literal* + :: + (stag %bold (ifix [tar tar] (cool (cash tar) work))) + :: + :: _italic literal_ + :: + (stag %talc (ifix [cab cab] (cool (cash cab) work))) + :: + :: "quoted text" + :: + (stag %quod (ifix [doq doq] (cool (cash doq) work))) + :: + :: `classic markdown quote` + :: + (stag %code (ifix [tec tec] (cash tec))) + :: + :: ++arm + :: + (stag %code ;~(plug lus lus low (star ;~(pose nud low hep)))) + :: + :: [arbitrary *content*](url) + :: + %+ stag %link + ;~ (glue (punt whit)) + (ifix [sel ser] (cool (cash ser) work)) + (ifix [pel per] (cash per)) + == + :: + :: #twig + :: + ;~ plug + (stag %text ;~(pose (cold " " whit) (easy-sol ~))) + (stag %code ;~(pfix hax (echo wide))) + ;~(simu whit (easy ~)) + == + :: + :: direct hoon constant + :: + ;~ plug + (stag %text ;~(pose (cold " " whit) (easy-sol ~))) + :: + %+ stag %code + %- echo + ;~ pose + ::REVIEW just copy in 0x... parsers directly? + ;~(simu ;~(plug (just '0') alp) bisk:so) + :: + tash:so + ;~(pfix dot perd:so) + ;~(pfix sig ;~(pose twid:so (easy [%$ %n 0]))) + ;~(pfix cen ;~(pose sym buc pam bar qut nuck:so)) + == + :: + ;~(simu whit (easy ~)) + == + :: + :: whitespace + :: + (stag %text (cold " " whit)) + :: + :: {interpolated} sail + :: + (stag %expr inline-embed:(sail |)) + :: + :: just a byte + :: + (stag %text (cook trip ;~(less ace prn))) + == + :: + ++ work (cook zing (star word)) :: indefinite tarp + :: + ++ down :: parse inline tarp + %+ knee *tarp |. ~+ + =- (cook - work) + :: + :: collect raw tarp into xml tags + |= gaf/(list graf) + ^- tarp + =< main + |% + ++ main + ^- tarp + ?~ gaf ~ + ?. ?=($text -.i.gaf) + (weld (item i.gaf) $(gaf t.gaf)) + :: + :: fip: accumulate text blocks + =/ fip/(list tape) [p.i.gaf]~ + |- ^- tarp + ?~ t.gaf [;/((zing (flop fip))) ~] + ?. ?=($text -.i.t.gaf) + [;/((zing (flop fip))) ^$(gaf t.gaf)] + $(gaf t.gaf, fip :_(fip p.i.t.gaf)) + :: + ++ item + |= nex/graf + ^- tarp ::CHECK can be tuna:twig? + ?- -.nex + $text !! :: handled separately + $expr [p.nex]~ + $bold [[%b ~] ^$(gaf p.nex)]~ + $talc [[%i ~] ^$(gaf p.nex)]~ + $code [[%code ~] ;/(p.nex) ~]~ + $quod :: + :: smart quotes + %= ^$ + gaf + :- [%text (tufa ~-~201c. ~)] + %+ weld p.nex + `(list graf)`[%text (tufa ~-~201d. ~)]~ + == + $link [[%a [%href q.nex] ~] ^$(gaf p.nex)]~ + == + -- + :: + ++ hrul :: empty besides fence + %+ cold [[%hr ~] ~]~ + ;~(plug (star ace) hep hep hep (star hep) (just '\0a')) + :: + ++ tecs + ;~(plug tec tec tec (just '\0a')) + :: + ++ fens + |= col/@u ~+ + =/ ind (stun [(dec col) (dec col)] ace) + =/ ind-tecs ;~(plug ind tecs) + %+ cook |=(txt/tape `tarp`[[%pre ~] ;/(txt) ~]~) + :: + :: leading outdent is ok since container may + :: have already been parsed and consumed + %+ ifix [;~(plug (star ace) tecs) ind-tecs] + %^ stir "" |=({a/tape b/tape} "{a}\0a{b}") + ;~ pose + %+ ifix [ind (just '\0a')] + ;~(less tecs (star prn)) + :: + (cold "" ;~(plug (star ace) (just '\0a'))) + == + :: + ++ para :: paragraph + %+ cook + |=(a/tarp ?~(a ~ [[%p ~] a]~)) + ;~(pfix (punt whit) down) + :: + ++ expr :: expression + => (sail &) :: tall-form + %+ ifix [(star ace) ;~(simu gap (easy))] :: look-ahead for gap + (cook drop-top top-level) :: list of tags + :: + :: + ++ whit :: whitespace + (cold ' ' (plus ;~(pose (just ' ') (just '\0a')))) + :: + ++ head :: parse heading + %+ cook + |= {haxes/tape kids/tarp} ^- tarp + =/ tag (crip 'h' <(lent haxes)>) :: e.g. ### -> %h3 + =/ id (contents-to-id kids) + [[tag [%id id]~] kids]~ + :: + ;~(pfix (star ace) ;~((glue whit) (stun [1 6] hax) down)) + :: + ++ contents-to-id :: # text into elem id + |= a/(list tuna:twig) ^- tape + =; raw/tape + %+ turn raw + |= @tD + ^- @tD + ?: ?| &((gte +< 'a') (lte +< 'z')) + &((gte +< '0') (lte +< '9')) + == + +< + ?: &((gte +< 'A') (lte +< 'Z')) + (add 32 +<) + '-' + :: + :: collect all text in header tarp + |- ^- tape + ?~ a ~ + %+ weld + ^- tape + ?- i.a + {{$$ {$$ *} $~} $~} :: text node contents + (murn v.i.a.g.i.a |=(a/beer ?^(a ~ (some a)))) + {^ *} $(a c.i.a) :: concatenate children + {@ *} ~ :: ignore interpolation + == + $(a t.a) + -- + -- + ++ sail :: xml template + |= in-tall-form/? =| lin/? + |% + :: + ++ apex :: product twig + %+ cook + |= tum/(each manx marl):twig ^- twig + ?- -.tum + $& [%xmn p.tum] + $| [%xml p.tum] + == + top-level + :: + ++ top-level :: entry-point + ;~(pfix sem ?:(in-tall-form tall-top wide-top)) + :: + ++ inline-embed :: brace interpolation + %+ cook |=(a/tuna:twig a) + ;~ pose + ;~(pfix sem bracketed-elem(in-tall-form |)) + ;~(plug tuna-mode sump) + (stag %tape sump) + == + :: + ++ script-or-style :: script or style + %+ cook |=(a/marx:twig a) + ;~ plug + ;~(pose (jest %script) (jest %style)) + wide-attrs + == + :: + ++ tuna-mode :: xml node(s) kind + ;~ pose + (cold %tape hep) + (cold %manx lus) + (cold %marl tar) + (cold %call cen) + == + :: + ++ wide-top :: wide outer top + %+ knee *(each manx marl):twig |. ~+ + ;~ pose + (stag %| wide-quote) + (stag %| wide-paren-elems) + (stag %& ;~(plug tag-head wide-tail)) + == + :: + ++ wide-inner-top :: wide inner top + %+ knee *(each tuna marl):twig |. ~+ + ;~ pose + wide-top + (stag %& ;~(plug tuna-mode wide)) + == + :: + ++ wide-attrs :: wide attributes + %+ cook |=(a/(unit mart:twig) (fall a ~)) + %- punt + %+ ifix [pel per] + %+ more (jest ', ') + ;~((glue ace) a-mane hopefully-quote) + :: + ++ wide-tail :: wide elements + %+ cook |=(a/marl:twig a) + ;~(pose ;~(pfix col wrapped-elems) (cold ~ sem) (easy ~)) + :: + ++ wide-elems :: wide elements + %+ cook |=(a/marl:twig a) + %+ cook join-tops + (star ;~(pfix ace wide-inner-top)) + :: + ++ wide-paren-elems :: wide flow + %+ cook |=(a/marl:twig a) + %+ cook join-tops + (ifix [pel per] (more ace wide-inner-top)) + :: + ::+| + :: + ++ drop-top + |= a/(each tuna marl):twig ^- marl:twig + ?- -.a + $& [p.a]~ + $| p.a + == + :: + ++ join-tops + |= a/(list (each tuna marl)):twig ^- marl:twig + (zing (turn a drop-top)) + :: + ::+| + :: + ++ wide-quote :: wide quote + %+ cook |=(a/marl:twig a) + ;~ pose + ;~ less (jest '"""') + (ifix [doq doq] (cook collapse-chars quote-innards)) + == + :: + %- inde + %+ ifix [(jest '"""\0a') (jest '\0a"""')] + (cook collapse-chars quote-innards(lin |)) + == + :: + ++ quote-innards :: wide+tall flow + %+ cook |=(a/(list $@(@ tuna:twig)) a) + %- star + ;~ pose + ;~(pfix bas ;~(pose (mask "-+*%;\{") bas doq bix:ab)) + inline-embed + ;~(less bas kel ?:(in-tall-form fail doq) prn) + ?:(lin fail ;~(less (jest '\0a"""') (just '\0a'))) + == + :: + ++ bracketed-elem :: bracketed element + %+ ifix [kel ker] + ;~(plug tag-head wide-elems) + :: + ++ wrapped-elems :: wrapped tuna + %+ cook |=(a/marl:twig a) + ;~ pose + wide-paren-elems + (cook |=(@t `marl`[;/((trip +<))]~) qut) + (cook drop-top wide-top) + == + :: + ::+| + :: + ++ a-mane :: mane as twig + %+ cook + |= {a/@tas b/(unit @tas)} + ?~(b a [a u.b]) + ;~(plug sym ;~(pose (stag ~ ;~(pfix cab sym)) (easy ~))) + :: + ++ en-class + |= a/(list {$class p/term}) + ^- (unit {$class tape}) + ?~ a ~ + %- some + :- %class + |- + %+ welp (trip p.i.a) + ?~ t.a ~ + [' ' $(a t.a)] + :: + ++ tag-head :: tag head + %+ cook + =+ twig ::REVIEW rename dynamic xml types + |= {a/mane b/mart c/mart} + ^- marx + [a (weld b c)] + ;~ plug + a-mane + :: + %+ cook + |= a/(list (unit {term (list beer)})) + ^- (list {term (list beer)}) + :: discard nulls + (murn a same) + ;~ plug + (punt ;~(plug (cold %id hax) (cook trip sym))) + (cook en-class (star ;~(plug (cold %class dot) sym))) + (punt ;~(plug ;~(pose (cold %href fas) (cold %src pat)) soil)) + (easy ~) + == + :: + wide-attrs + == + :: + ::+| + :: + ++ tall-top :: tall top + %+ knee *(each manx marl):twig |. ~+ + ;~ pose + (stag %| ;~(pfix (plus ace) (cook collapse-chars quote-innards))) + (stag %& ;~(plug script-or-style script-style-tail)) + (stag %& tall-elem) + (stag %| wide-quote) + (stag %| ;~(pfix tis tall-tail)) + (stag %& ;~(pfix gar gap (stag [%div ~] cram))) + (stag %| ;~(plug ;~((glue gap) tuna-mode tall) (easy ~))) + (easy %| [;/("\0a")]~) + == + :: + ++ tall-attrs :: tall attributes + %- star + ;~ pfix ;~(plug gap tis) + ;~((glue gap) a-mane hopefully-quote) + == + :: + ++ tall-elem :: tall preface + %+ cook + =+ twig ::REVIEW rename dynamic xml types + |= {a/{p/mane q/mart} b/mart c/marl} + ^- manx + [[p.a (weld q.a b)] c] + ;~(plug tag-head tall-attrs tall-tail) + :: + ::+| + :: + ::REVIEW is there a better way to do this? + ++ hopefully-quote :: prefer "quote" form + %+ cook |=(a/(list beer) a) + %+ cook |=(a/twig ?:(?=($knit -.a) p.a [~ a]~)) + wide + :: + ++ script-style-tail :: unescaped tall tail + %+ cook |=(a/marl:twig a) + %+ ifix [gap ;~(plug gap duz)] + %+ most gap + ;~ pfix sem + %+ cook |=(a/tape ;/(a)) + ;~ pose + ;~(pfix ace (star prn)) + (easy "\0a") + == + == + :: + ++ tall-tail :: tall tail + ?> in-tall-form + %+ cook |=(a/marl:twig a) + ;~ pose + (cold ~ sem) + ;~(pfix col wrapped-elems(in-tall-form |)) + ;~(pfix col ace (cook collapse-chars(in-tall-form |) quote-innards)) + (ifix [gap ;~(plug gap duz)] tall-kids) + == + :: + ++ tall-kids :: child elements + %+ cook join-tops + :: look for sail first, or markdown if not + (most gap ;~(pose top-level (stag %| cram))) + :: + ++ collapse-chars :: group consec chars + |= reb/(list $@(@ tuna:twig)) + ^- marl:twig + =| {sim/(list @) tuz/marl:twig} + |- ^- marl:twig + ?~ reb + =. sim + ?. in-tall-form sim + [10 |-(?~(sim sim ?:(=(32 i.sim) $(sim t.sim) sim)))] + ?~(sim tuz [;/((flop sim)) tuz]) + ?@ i.reb + $(reb t.reb, sim [i.reb sim]) + ?~ sim [i.reb $(reb t.reb, sim ~)] + [;/((flop sim)) i.reb $(reb t.reb, sim ~)] + -- + :: + ++ scat !: + %+ knee *twig |. ~+ + %- stew + ^. stet ^. limo + :~ + :- ',' + ;~ pose + (stag %wing rope) + ;~(pfix com (stag %burn wide)) + == + :- '!' + ;~ pose + (stag %not ;~(pfix zap wide)) + (stag %fail (cold ~ ;~(plug zap zap))) + == + :- '_' + ;~(pfix cab (stag %shoe wide)) + :- '$' + ;~ pose + ;~ pfix buc + ;~ pose + (stag %leaf (stag %tas (cold %$ buc))) + (stag %leaf (stag %f (cold & pam))) + (stag %leaf (stag %f (cold | bar))) + (stag %leaf (stag %t qut)) + (stag %leaf (sear |=(a/coin ?:(?=($$ -.a) (some +.a) ~)) nuck:so)) + == + == + rump + == + :- '%' + ;~ pfix cen + ;~ pose + (stag %conl (sear |~({a/@ud b/tyke} (posh ~ ~ a b)) porc)) + (stag %rock (stag %tas (cold %$ buc))) + (stag %rock (stag %f (cold & pam))) + (stag %rock (stag %f (cold | bar))) + (stag %rock (stag %t qut)) + (cook (jock &) nuck:so) + (stag %conl (sear |=(a/(list) (posh ~ ~ (lent a) ~)) (star cen))) + == + == + :- '&' + ;~ pose + (cook |=(a/wing [%make a ~]) rope) + (stag %and ;~(pfix pam (ifix [pel per] (most ace wide)))) + ;~(plug (stag %rock (stag %f (cold & pam))) ;~(pfix lus wide)) + (stag %sand (stag %f (cold & pam))) + == + :- '\'' + (stag %sand (stag %t qut)) + :- '(' + (stag %call (ifix [pel per] (most ace wide))) + :- '{' + (stag %bank (ifix [kel ker] (most ace wide))) + :- '*' + ;~ pose + (stag %bunt ;~(pfix tar wide)) + (cold [%base %noun] tar) + == + :- '@' + ;~(pfix pat (stag %base (stag %atom mota))) + :- '+' + ;~ pose + (stag %bump ;~(pfix lus (ifix [pel per] wide))) + :: + %+ cook + |= a/(list (list beer)) + :- %nub + [%knit |-(^-((list beer) ?~(a ~ (weld i.a $(a t.a)))))] + (most dog ;~(pfix lus soil)) + :: + (cook |=(a/wing [%make a ~]) rope) + == + :- '-' + ;~ pose + (stag %sand tash:so) + :: + %+ cook + |= a/(list (list beer)) + [%conl (phax a)] + (most dog ;~(pfix hep soil)) + :: + (cook |=(a/wing [%make a ~]) rope) + == + :- '.' + ;~ pose + (cook (jock |) ;~(pfix dot perd:so)) + (cook |=(a/wing [%make a ~]) rope) + == + :- ['0' '9'] + %+ cook + |= {a/dime b/(unit twig)} + ?~(b [%sand a] [[%rock a] u.b]) + ;~(plug bisk:so (punt ;~(pfix lus wide))) + :- ':' + ;~ pfix col + ;~ pose + (stag %wad (ifix [pel per] (most ace wide))) + ;~(pfix fas (stag %nub wide)) + == + == + :- '=' + (stag %same ;~(pfix tis (ifix [pel per] ;~(glam wide wide)))) + :- '?' + ;~ pose + (stag %pick ;~(pfix wut (ifix [pel per] (most ace wide)))) + (cold [%base %bean] wut) + == + :- '[' + rupl + :- '^' + ;~ pose + (stag %wing rope) + (cold [%base %cell] ket) + == + :- '`' + ;~ pfix tec + ;~ pose + %+ cook + |=({a/@ta b/twig} [%like [%sand a 0] [%like [%sand %$ 0] b]]) + ;~(pfix pat ;~(plug mota ;~(pfix tec wide))) + ;~ pfix tar + (stag %cast (stag [%base %noun] ;~(pfix tec wide))) + == + (stag %cast ;~(plug wide ;~(pfix tec wide))) + (stag %like ;~(pfix lus ;~(plug wide ;~(pfix tec wide)))) + (cook |=(a/twig [[%rock %n ~] a]) wide) + == + == + :- '"' + %+ cook + |= a/(list (list beer)) + [%knit |-(^-((list beer) ?~(a ~ (weld i.a $(a t.a)))))] + (most dog soil) + :- ['a' 'z'] + rump + :- '|' + ;~ pose + (cook |=(a/wing [%make a ~]) rope) + (stag %or ;~(pfix bar (ifix [pel per] (most ace wide)))) + ;~(plug (stag %rock (stag %f (cold | bar))) ;~(pfix lus wide)) + (stag %sand (stag %f (cold | bar))) + == + :- '~' + ;~ pose + rupl + :: + ;~ pfix sig + ;~ pose + (stag %conl (ifix [sel ser] (most ace wide))) + :: + %+ stag %open + %+ ifix + [pel per] + ;~(glam rope wide (most ace wide)) + :: + (cook (jock |) twid:so) + (stag [%bust %null] ;~(pfix lus wide)) + (easy [%bust %null]) + == + == + == + :- '/' + rood + :- '<' + (ifix [gal gar] (stag %tell (most ace wide))) + :- '>' + (ifix [gar gal] (stag %yell (most ace wide))) + == + ++ soil + ;~ pose + ;~ less (jest '"""') + %+ ifix [doq doq] + %- star + ;~ pose + ;~(pfix bas ;~(pose bas doq kel bix:ab)) + ;~(less doq bas kel prn) + (stag ~ sump) + == + == + :: + %- inde %+ ifix + [(jest '"""\0a') (jest '\0a"""')] + %- star + ;~ pose + ;~(pfix bas ;~(pose bas kel bix:ab)) + ;~(less bas kel prn) + ;~(less (jest '\0a"""') (just `@`10)) + (stag ~ sump) + == + == + ++ sump (ifix [kel ker] (stag %conp (most ace wide))) + ++ norm :: rune regular form + |= tol/? + =< %- stew + ^. stet ^. limo + :~ :- '|' + ;~ pfix bar + %- stew + ^. stet ^. limo + :~ ['_' (rune cab %door expr)] + ['%' (rune cen %core expe)] + [':' (rune col %gasp expb)] + ['.' (rune dot %trap expa)] + ['/' (rune fas %door expr)] + ['-' (rune hep %loop expa)] + ['^' (rune ket %cork expr)] + ['~' (rune sig %port expb)] + ['*' (rune tar %gill expb)] + ['=' (rune tis %gate expb)] + ['?' (rune wut %tray expa)] + == + == + :- '$' + ;~ pfix buc + %- stew + ^. stet ^. limo + :~ ['@' (rune pat %claw expb)] + ['_' (rune cab %shoe expa)] + [':' (rune col %bank exps)] + ['%' (rune cen %book exps)] + ['^' (rune ket %bush expb)] + ['-' (rune hep %lamb expb)] + ['=' (rune tis %coat expg)] + ['?' (rune wut %pick exps)] + == + == + :- '%' + ;~ pfix cen + %- stew + ^. stet ^. limo + :~ ['_' (rune cab %keep exph)] + ['.' (rune dot %lace expb)] + ['^' (rune ket %calq expd)] + ['+' (rune lus %calt expc)] + ['-' (rune hep %call expk)] + ['~' (rune sig %open expu)] + ['*' (rune tar %bake expm)] + ['=' (rune tis %make exph)] + == + == + :- ':' + ;~ pfix col + ;~ pose + %- stew + ^. stet ^. limo + :~ ['_' (rune cab %scon expb)] + ['^' (rune ket %conq expd)] + ['+' (rune lus %cont expc)] + ['-' (rune hep %cons expb)] + ['~' (rune sig %conl exps)] + ['*' (rune tar %conp exps)] + == + :: + (word %door expr) + (word %core expe) + (word %gasp expb) + (word %trap expa) + (word %door expr) + (word %loop expa) + (word %cork expr) + (word %port expb) + (word %gill expb) + (word %gate expb) + (word %tray expa) + :: + (word %bunt expa) + (word %claw expb) + (word %shoe expa) + (word %bank exps) + (word %book exps) + (word %bush expb) + (word %lamb expb) + (word %coat expg) + (word %pick exps) + :: + (word %keep exph) + (word %lace expb) + (word %calq expd) + (word %calt expc) + (word %call expk) + (word %open expu) + (word %bake expm) + (word %make exph) + :: + (word %scon expb) + (word %conq expd) + (word %cont expc) + (word %cons expb) + (word %conl exps) + (word %conp exps) + :: + (word %bump expa) + (word %nock expb) + (word %same expb) + (word %deep expa) + (word %wish expn) + (word %wish expn) + :: + (word %iron expa) + (word %ward expb) + (word %cast expb) + (word %like expb) + (word %zinc expa) + (word %burn expa) + (word %name expg) + (word %lead expa) + :: + (word %show expb) + (word %poll expf) + (word %lurk expb) + (word %fast hind) + (word %funk hine) + (word %thin hinb) + (word %hint hinb) + (word %memo hinc) + (word %dump hinf) + (word %warn hing) + (word %ddup expb) + (word %peep expb) + :: + (word %wad expi) + (word %nub expa) + (word %dip expi) + (word %fry expb) + :: + (word %new expb) + (word %set expq) + (word %sip expt) + (word %fix expp) + (word %rap expb) + (word %var expo) + (word %rev expo) + (word %per expb) + (word %nip expb) + (word %aka expl) + (word %pin expb) + (word %tow expi) + :: + (word %or exps) + (word %if expc) + (word %lest expc) + (word %deny expb) + (word %sure expb) + ;~(pfix (jest %case) (toad tkhp)) + ;~(pfix (jest %ifcl) (toad tkkt)) + ;~(pfix (jest %fits) (toad tkts)) + ;~(pfix (jest %deft) (toad tkls)) + (word %and exps) + ;~(pfix (jest %ifat) (toad tkpt)) + ;~(pfix (jest %ifno) (toad tksg)) + (word %not expa) + :: + (word %twig expb) + (word %spit expb) + (word %wrap expa) + (word %code expa) + (word %need hinh) + moar + == + == + :- '.' + ;~ pfix dot + %- stew + ^. stet ^. limo + :~ ['+' (rune lus %bump expa)] + ['*' (rune tar %nock expb)] + ['=' (rune tis %same expb)] + ['?' (rune wut %deep expa)] + ['^' (rune ket %wish expn)] + == + == + :- '^' + ;~ pfix ket + %- stew + ^. stet ^. limo + :~ ['|' (rune bar %iron expa)] + ['.' (rune dot %ward expb)] + ['-' (rune hep %cast expb)] + ['+' (rune lus %like expb)] + ['&' (rune pam %zinc expa)] + ['~' (rune sig %burn expa)] + ['=' (rune tis %name expg)] + ['?' (rune wut %lead expa)] + == + == + :- '~' + ;~ pfix sig + %- stew + ^. stet ^. limo + :~ ['|' (rune bar %show expb)] + ['$' (rune buc %poll expg)] + ['_' (rune cab %lurk expb)] + ['%' (rune cen %fast hind)] + ['/' (rune fas %funk hine)] + ['<' (rune gal %thin hinb)] + ['>' (rune gar %hint hinb)] + ['+' (rune lus %memo hinc)] + ['&' (rune pam %dump hinf)] + ['?' (rune wut %warn hing)] + ['=' (rune tis %ddup expb)] + ['!' (rune zap %peep expb)] + == + == + :- ';' + ;~ pfix sem + %- stew + ^. stet ^. limo + :~ [':' (rune col %wad expi)] + ['/' (rune fas %nub expa)] + ['~' (rune sig %dip expi)] + [';' (rune sem %fry expb)] + == + == + :- '=' + ;~ pfix tis + %- stew + ^. stet ^. limo + :~ ['|' (rune bar %new expb)] + ['.' (rune dot %set expq)] + ['^' (rune ket %sip expt)] + [':' (rune col %fix expp)] + ['/' (rune fas %var expo)] + [';' (rune sem %rev expo)] + ['<' (rune gal %rap expb)] + ['>' (rune gar %per expb)] + ['-' (rune hep %nip expb)] + ['*' (rune tar %aka expl)] + ['+' (rune lus %pin expb)] + ['~' (rune sig %tow expi)] + == + == + :- '?' + ;~ pfix wut + %- stew + ^. stet ^. limo + :~ ['|' (rune bar %or exps)] + [':' (rune col %if expc)] + ['.' (rune dot %lest expc)] + ['<' (rune gal %deny expb)] + ['>' (rune gar %sure expb)] + ['-' ;~(pfix hep (toad tkhp))] + ['^' ;~(pfix ket (toad tkkt))] + ['=' ;~(pfix tis (toad tkts))] + ['+' ;~(pfix lus (toad tkls))] + ['&' (rune pam %and exps)] + ['@' ;~(pfix pat (toad tkpt))] + ['~' ;~(pfix sig (toad tksg))] + ['!' (rune zap %not expa)] + == + == + :- '!' + ;~ pfix zap + %- stew + ^. stet ^. limo + :~ [':' ;~(pfix col (toad expz))] + ['.' ;~(pfix dot (toad |.(loaf(bug |))))] + [',' (rune com %twig expb)] + [';' (rune sem %spit expb)] + ['>' (rune gar %wrap expa)] + ['=' (rune tis %code expa)] + ['?' (rune wut %need hinh)] + == + == + == + |% + ++ boog :: core arms + %+ knee [p=*term q=*foot] |. ~+ + ;~ pfix lus + ;~ pose + %+ cook + |=({a/$ash b/term c/twig} [b a c]) + ;~ gunk + (cold %ash (just '+')) + ;~(pose (cold %$ buc) sym) + loaf + == + :: + %+ cook + |=({a/$elm b/term c/twig} [b a c]) + ;~ gunk + (cold %elm (just '-')) + ;~(pose (cold %$ buc) sym) + loaf + == + == + == + :: + ++ wisp :: core tail + %- ulva + %+ sear + |= a/(list (pair term foot)) + =| b/(map term foot) + |- ^- (unit _b) + ?~ a `b + ?: (~(has by b) p.i.a) + ~&(duplicate-arm+p.i.a ~) + $(a t.a, b (~(put by b) p.i.a q.i.a)) + (most muck boog) + :: + ++ toad :: untrap parser exp + |* har/_expa + =+ dur=(ifix [pel per] $:har(tol |)) + ?:(tol ;~(pose ;~(pfix gap $:har(tol &)) dur) dur) + :: + ++ rune :: build rune + |* {dif/rule tuq/* har/_expa} + ;~(pfix dif (stag tuq (toad har))) + :: + ++ word :: build keyword + |* {key/cord har/_expa} + ;~(pfix (jest key) (stag key (toad har))) + :: + ++ moar :: :moar hack + %+ cook + |= {a/(list) b/(list (pair wing twig))} + ^- twig + [%make [[%| (lent a) `%$] ~] b] + ;~(pfix (jest %moar) ;~(plug (star (jest %r)) (toad |.((butt rick))))) + :: + ++ glop ~+((glue mash)) :: separated by space + ++ gunk ~+((glue muck)) :: separated list + ++ butt |* zor/rule :: closing == if tall + ?:(tol ;~(sfix zor ;~(plug gap duz)) zor) + ++ ulva |* zor/rule :: closing -- and tall + ?.(tol fail ;~(sfix zor ;~(plug gap dun))) + ++ hank (most muck loaf) :: gapped twigs + ++ loaf ?:(tol tall wide) :: hoon, current width + ++ mash ?:(tol gap ;~(plug com ace)) :: list separator + ++ muck ?:(tol gap ace) :: general separator + ++ teak %+ knee *tiki |. ~+ :: wing or twig + =+ ^= gub + |= {a/term b/$%({$& p/wing} {$| p/twig})} + ^- tiki + ?-(-.b $& [%& [~ a] p.b], $| [%| [~ a] p.b]) + =+ ^= wyp + ;~ pose + %+ cook gub + ;~ plug + sym + ;~(pfix tis ;~(pose (stag %& rope) (stag %| wide))) + == + :: + (stag %& (stag ~ rope)) + (stag %| (stag ~ wide)) + == + ?. tol wyp + ;~ pose + wyp + :: + ;~ pfix + ;~(plug ket tis gap) + %+ cook gub + ;~ plug + sym + ;~(pfix gap ;~(pose (stag %& rope) (stag %| tall))) + == + == + :: + (stag %| (stag ~ tall)) + == + ++ rack (most mash ;~(gunk loaf loaf)) :: list [twig twig] + ++ rick (most mash ;~(gunk rope loaf)) :: list [wing twig] + :: + :: rune contents + :: + ++ expa |.(loaf) :: one twig + ++ expb |.(;~(gunk loaf loaf)) :: two twigs + ++ expc |.(;~(gunk loaf loaf loaf)) :: three twigs + ++ expd |.(;~(gunk loaf loaf loaf loaf)) :: four twigs + ++ expe |.(wisp) :: core tail + ++ expf |.(;~(gunk ;~(pfix cen sym) loaf)) :: %term and twig + ++ expg |.(;~(gunk sym loaf)) :: term and twig + ++ exph |.((butt ;~(gunk rope rick))) :: wing, [tile twig]s + ++ expi |.((butt ;~(gunk loaf hank))) :: one or more twigs + ++ expj |.(;~(gunk sym rope loaf)) :: term, wing, and twig + ++ expk |.(;~(gunk loaf ;~(plug loaf (easy ~)))) :: list of two twigs + ++ expl |.(;~(gunk sym loaf loaf)) :: term, two twigs + ++ expm |.((butt ;~(gunk rope loaf rick))) :: several [tile twig]s + ++ expn |.(;~(gunk loaf (stag %conp (butt hank)))):: autoconsed twigs + ++ expo |.(;~(gunk wise loaf loaf)) :: =; + ++ expp |.(;~(gunk (butt rick) loaf)) :: [wing twig]s, twig + ++ expq |.(;~(gunk rope loaf loaf)) :: wing and two twigs + ++ expr |.(;~(gunk loaf wisp)) :: twig and core tail + ++ exps |.((butt hank)) :: closed gapped twigs + ++ expt |.(;~(gunk wise rope loaf loaf)) :: =^ + ++ expu |.(;~(gunk rope loaf (butt hank))) :: wing, twig, twigs + ++ expv |.((butt rick)) :: just changes + ++ expz |.(loaf(bug &)) :: twig with tracing + :: + :: tiki expansion for %wt runes + :: + ++ tkhp |. %+ cook |= {a/tiki b/(list (pair twig twig))} + (~(wthp ah a) b) + (butt ;~(gunk teak rack)) + ++ tkkt |. %+ cook |= {a/tiki b/twig c/twig} + (~(wtkt ah a) b c) + ;~(gunk teak loaf loaf) + ++ tkls |. %+ cook |= {a/tiki b/twig c/(list (pair twig twig))} + (~(wtls ah a) b c) + (butt ;~(gunk teak loaf rack)) + ++ tkpt |. %+ cook |= {a/tiki b/twig c/twig} + (~(wtpt ah a) b c) + ;~(gunk teak loaf loaf) + ++ tksg |. %+ cook |= {a/tiki b/twig c/twig} + (~(wtsg ah a) b c) + ;~(gunk teak loaf loaf) + ++ tkts |. %+ cook |= {a/twig b/tiki} + (~(wtts ah b) a) + ;~(gunk loaf teak) + :: + :: hint syntax + :: + ++ hinb |.(;~(gunk bont loaf)) :: hint and twig + ++ hinc |. :: optional =en, twig + ;~(pose ;~(gunk bony loaf) ;~(plug (easy ~) loaf)) + ++ hind |.(;~(gunk bonk loaf bonz loaf)) :: jet twig "bon"s twig + ++ hine |.(;~(gunk bonk loaf)) :: jet-hint and twig + ++ hinf |. :: 0-3 >s, two twigs + ;~ pose + ;~(gunk (cook lent (stun [1 3] gar)) loaf loaf) + (stag 0 ;~(gunk loaf loaf)) + == + ++ hing |. :: 0-3 >s, three twigs + ;~ pose + ;~(gunk (cook lent (stun [1 3] gar)) loaf loaf loaf) + (stag 0 ;~(gunk loaf loaf loaf)) + == + ++ bonk :: jet signature + ;~ pfix cen + ;~ pose + ;~(plug sym ;~(pfix col ;~(plug sym ;~(pfix dot ;~(pfix dot dem))))) + ;~(plug sym ;~(pfix col ;~(plug sym ;~(pfix dot dem)))) + ;~(plug sym ;~(pfix dot dem)) + sym + == + == + ++ hinh |. :: 1/2 numbers, twig + ;~ gunk + ;~ pose + dem + (ifix [sel ser] ;~(plug dem ;~(pfix ace dem))) + == + loaf + == + ++ bont ;~ (bend) :: term, optional twig + ;~(pfix cen sym) + ;~(pfix dot ;~(pose wide ;~(pfix muck loaf))) + == + ++ bony (cook |=(a/(list) (lent a)) (plus tis)) :: base 1 =en count + ++ bonz :: term-labelled twigs + ;~ pose + (cold ~ sig) + %+ ifix + ?:(tol [;~(plug duz gap) ;~(plug gap duz)] [pel per]) + (more mash ;~(gunk ;~(pfix cen sym) loaf)) + == + -- + :: + ++ lang :: lung sample + $: ros/twig :: XX translation + $= vil + $% {$tis p/twig} + {$col p/twig} + {$ket p/twig} + {$fas p/twig} + {$pel p/(list (pair wing twig))} + == + == + :: + ++ lung + ~+ + %- bend + |= lang + ^- (unit twig) + ?- -.vil + $col ?:(=([%base %bean] ros) ~ [~ %rap ros p.vil]) + $pel (bind ~(reek ap ros) |=(hyp/wing [%make hyp p.vil])) + $ket [~ ros p.vil] + $fas =+ tog=~(hock ap ros) + ?.(?=(@ tog) ~ [~ %coat tog p.vil]) + $tis =+ tog=~(hock ap ros) + ?:(=([%0 ~] tog) ~ [~ %name tog p.vil]) + == + :: + ++ long + %+ knee *twig |. ~+ + ;~ lung + scat + ;~ pose + ;~(plug (cold %tis tis) wide) + ;~(plug (cold %col col) wide) + ;~(plug (cold %ket ket) wide) + ;~(plug (cold %fas fas) wide) + ;~ plug + (easy %pel) + (ifix [pel per] lobo) + == + == + == + :: + ++ lobo (most ;~(plug com ace) ;~(glam rope wide)) + ++ loon (most ;~(plug com ace) ;~(glam wide wide)) + ++ lute :: tall [] noun + ~+ + %+ stag %conp + %+ ifix + [;~(plug sel gap) ;~(plug gap ser)] + (most gap tall) + :: + ++ rope :: wing form + %+ knee *wing + |. ~+ + %+ (slug |=({a/limb b/wing} [a b])) + dot + ;~ pose + (cold [%| 0 ~] com) + %+ cook + |=({a/(list) b/term} ?~(a b [%| (lent a) `b])) + ;~(plug (star ket) ;~(pose sym (cold %$ buc))) + :: + %+ cook + |=(a/axis [%& a]) + ;~ pose + ;~(pfix lus dim:ag) + ;~(pfix pam (cook |=(a/@ ?:(=(0 a) 0 (mul 2 +($(a (dec a)))))) dim:ag)) + ;~(pfix bar (cook |=(a/@ ?:(=(0 a) 1 +((mul 2 $(a (dec a)))))) dim:ag)) + ven + (cold 1 dot) + == + == + :: + ++ wise %+ cook + |=({a/term b/(unit twig)} ?~(b a [a u.b])) + ;~(plug sym (punt ;~(pfix fas wide))) + ++ tall :: full tall form + %+ knee *twig + |.(~+((wart ;~(pose (norm &) long lute apex:(sail &))))) + :: + ++ wide :: full wide form + %+ knee *twig + |.(~+((wart ;~(pose (norm |) long apex:(sail |))))) + :: + ++ wart + |* zor/rule + %+ here + |= {a/pint b/twig} + ?:(bug [%dbug [wer a] b] b) + zor + -- + diff --git a/mar/umd.hoon b/mar/umd.hoon new file mode 100644 index 000000000..5fc03f646 --- /dev/null +++ b/mar/umd.hoon @@ -0,0 +1,24 @@ +:: +:::: /hoon/umd/mar + :: +/? 310 +:: +|_ mud/@t +++ grow + |% + ++ mime [/text/x-unmark (taco mud)] + ++ txt + (lore mud) + ++ elem + ^- manx + [/div ~(shut ap %xml (rash mud fenced:cram:vast))] + -- +++ grab + |% + ++ mime |=({p/mite q/octs} q.q) + ++ noun @t + ++ txt role + -- +++ grad %txt +++ garb /down +-- diff --git a/ren/tree/body.hoon b/ren/tree/body.hoon index 15c501941..3a76e80ec 100644 --- a/ren/tree/body.hoon +++ b/ren/tree/body.hoon @@ -8,8 +8,18 @@ == =, format =, html +:: +|% +++ script-safe + !. + |= a/tape ^- tape + ?~ a a + ?. ?=({$'<' $'/' *} a) [i.a $(a t.a)] + ['<' '\\' '/' $(a t.t.a)] +-- +:: ^- marl -=/ tree (en-json (pairs:enjs data+dat sein+dat-sen ~)) +=/ tree (script-safe (en-json (pairs:enjs data+dat sein+dat-sen ~))) ;= ;script(type "text/javascript"): window.tree = {tree} ;div#tree; == diff --git a/sys/hoon.hoon b/sys/hoon.hoon index 2573a3b22..230aa608e 100644 --- a/sys/hoon.hoon +++ b/sys/hoon.hoon @@ -4293,8 +4293,8 @@ ?. =(10 -.res) [-.res $(res +.res)] (welp [`@t`10 (trip lev)] $(res +.res)) :: -++ ifix - |* {fel/{rule rule} hof/rule} +++ ifix :: prefix and suffix + |* {fel/{rule rule} hof/rule} :: surround hof ~! +< ~! +<:-.fel ~! +<:+.fel @@ -9880,6 +9880,672 @@ ?~ sim [i.reb $(reb t.reb, sim ~)] [;/((flop sim)) i.reb $(reb t.reb, sim ~)] -- + :: + ++ cram :: parse unmark + => |% + ++ item (pair mite marl:twig) :: xml node generator + ++ colm @ud :: column + ++ flow marl:twig :: node or generator + ++ mite :: context + $? $down :: outer embed + $rule :: horizontal ruler + $list :: unordered list + $lime :: list item + $lord :: ordered list + $poem :: verse + $bloc :: blockquote + $code :: preformatted code + $head :: heading + $expr :: dynamic expression + == :: + ++ trig :: line style + $: col/@ud :: start column + sty/trig-style :: style + == :: + ++ trig-style :: type of parsed line + $? $done :: end of input + $rule :: --- horizontal ruler + $lint :: + line item + $lite :: - line item + $head :: # heading + $bloc :: > block-quote + $expr :: ;sail expression + $text :: anything else + == :: + ++ graf :: paragraph element + $% {$bold p/(list graf)} :: *bold* + {$talc p/(list graf)} :: _italics_ + {$quod p/(list graf)} :: "double quote" + {$code p/tape} :: code literal + {$text p/tape} :: text symbol + {$link p/(list graf) q/tape} :: URL + {$expr p/tuna:twig} :: interpolated hoon + == + -- + =< apex + |% + ++ apex + =; fel |=(nail (fel +<)) + :(stag %xmn [%div ~] fenced) + :: + ++ fenced + :: + :: top: original indentation level + :: + |= {{@u top/@u} tape} + %+ pfix (hrul:parse +<) + |= nail ^- (like marl:twig) + ~($ main top +<) + :: + ++ main + :: + :: state of the parsing loop. we maintain a construction + :: stack for elements and a line stack for lines in the + :: current block. a blank line causes the current block + :: to be parsed and thrown in the current element. when + :: the indent column retreats, the element stack rolls up. + :: + :: err: error position + :: col: current control column + :: hac: stack of items under construction + :: cur: current item under construction + :: lub: current block being read in + :: + =| err/(unit hair) + =| col/@ud + =| hac/(list item) + =/ cur/item [%down ~] + =| lub/(unit (pair hair (list tape))) + |_ {top/@ud naz/hair los/tape} + :: + ++ $ :: resolve + ^- (like flow) + => line + :: + :: if error position is set, produce error + ?. =(~ err) [+.err ~] + :: + :: all data was consumed + =- [naz `[- [naz los]]] + => made + |- ^- flow + :: + :: fold all the way to top + ?~ hac fine + $(..^$ fold) + :: + ::+| + :: + ++ cur-indent + ?- p.cur + $down 2 + $rule 0 + $head 0 + $expr 2 + $list 0 + $lime 2 + $lord 0 + $poem 8 + $code 4 + $bloc 2 + == + :: + ++ back :: column retreat + |= luc/@ud + ^+ +> + ?: =(luc col) +> + :: + :: nex: next backward step that terminates this context + =/ nex/@ud cur-indent ::REVIEW code and poem blocks are handled elsewhere + ?: (gth nex (sub col luc)) + :: + :: indenting pattern violation + ::~& indent-pattern-violation+[p.cur nex col luc] + ..^$(col luc, err `[p.naz luc]) + =. ..^$ fold + $(col (sub col nex)) + :: + ++ fine :: item to flow + ^- flow + ?: ?=(?($down $head $expr) p.cur) + (flop q.cur) + =- [[- ~] (flop q.cur)]~ + ?- p.cur + $rule %hr + $list %ul + $lord %ol + $lime %li + $code %pre + $poem %div ::REVIEW actual container element? + $bloc %blockquote + == + :: + ++ fold ^+ . :: complete and pop + ?~ hac . + %= . + hac t.hac + cur [p.i.hac (concat-code (weld fine q.i.hac))] + == + :: + ++ concat-code :: merge continuous pre + |= a/flow + ?~ a a + ?. ?=({$pre *} -.i.a) a + |- + ?~ t.a a + ?. ?=({$pre $~} -.i.t.a) a + :: add blank line between blocks + $(t.a t.t.a, c.i.a (welp c.i.t.a ;/("\0a") c.i.a)) + :: + ++ snap :: capture raw line + =| nap/tape + |- ^+ [nap +>] + :: + :: no unterminated lines + ?~ los + ::~& %unterminated-line + [~ +>(err `naz)] + ?. =(`@`10 i.los) + ?: (gth col q.naz) + ?. =(' ' i.los) + ::~& expected-indent+[col naz los] + [~ +>(err `naz)] + $(los t.los, q.naz +(q.naz)) + :: + :: save byte and repeat + $(los t.los, q.naz +(q.naz), nap [i.los nap]) + :: + :: consume newline + :_ +>(los t.los, naz [+(p.naz) 1]) + :: + :: trim trailing spaces + |- ^- tape + ?: ?=({$' ' *} nap) + $(nap t.nap) + (flop nap) + :: + ++ skip :: discard line + |- ^+ + + :: + :: no unterminated lines + ?~ los + ::~& %unterminated-line + +(err `naz) + ?. =(`@`10 i.los) + :: + :: eat byte and repeat + $(los t.los) + :: + :: consume newline + +(los t.los, naz [+(p.naz) 1]) + :: + ++ look :: inspect line + ^- (unit trig) + (wonk (look:parse naz los)) + :: + ++ made :: compose block + ^+ . + :: + :: empty block, no action + ?~ lub . + :: + :: if block is preformatted code + ?: ?=($code p.cur) + =- fold(lub ~, q.cur (weld - q.cur), col (sub col 4)) + %+ turn q.u.lub + |= tape ^- mars + :: + :: each line is text data with its newline + ;/("{+<}\0a") + :: + :: if block is verse + ?: ?=($poem p.cur) + :: + :: add break between stanzas + =. q.cur ?~(q.cur q.cur [[[%br ~] ~] q.cur]) + =- fold(lub ~, q.cur (weld - q.cur), col (sub col 8)) + %+ turn q.u.lub + |= tape ^- manx + :: + :: each line is a paragraph + :- [%p ~] + :_ ~ + ;/("{+<}\0a") + :: + :: yex: block recomposed, with newlines + =/ yex/tape + (zing (turn (flop q.u.lub) |=(a/tape (runt [(dec col) ' '] "{a}\0a")))) + :: + :: vex: parse of paragraph + =/ vex/(like marl:twig) + :: + :: either a one-line header or a paragraph + %. [p.u.lub yex] + %- full + ?- p.cur + $rule =<(;~(pfix (punt whit) hrul) parse) + $expr expr:parse + $head head:parse + @ para:parse + == + :: + :: if error, propagate correctly + ?~ q.vex ..$(err `p.vex) + :: + :: finish tag if it's a header or rule + =< ?:(?=(?($head $rule) p.cur) fold ..$) + :: + :: save good result, clear buffer + ..$(lub ~, q.cur (weld p.u.q.vex q.cur)) + :: + ++ line ^+ . :: body line loop + :: + =. col ?~(col top col) + :: + :: abort after first error + ?: !=(~ err) . + :: + :: pic: profile of this line + =/ pic look + :: + :: if line is blank + ?~ pic + :: + :: break section + line:made:skip + :: + :: line is not blank + => .(pic u.pic) + :: + :: if end of input, complete + ?: |(?=($done sty.pic) (lth col.pic top)) + ..$(q.naz col.pic) + :: + :: bal: inspection copy of lub, current section + =/ bal lub + :: + :: if within section + ?~ bal (new-container pic) + :: + :: detect unspaced new containers + ?: ?& ?=(?($down $lime $bloc) p.cur) + |(!=(%text sty.pic) (gth col.pic col)) + == + (new-container:made pic) + :: + :: first line of container is legal + ?~ q.u.bal + =^ nap ..$ snap + line(lub bal(q.u [nap q.u.bal])) + :: + :: detect bad block structure + ?. ?- p.cur + :: + :: can't(/directly) contain text + ?($lord $list) ~|(bad-leaf-container+p.cur !!) + :: + :: only one line in a header/break + ?($head $rule) | + :: + :: literals need to end with a blank line + ?($code $poem $expr) (gte col.pic col) + :: + :: text flows must continue aligned + ?($down $list $lime $lord $bloc) =(col.pic col) + == + ::~& bad-block-structure+[p.cur col col.pic] + ..$(err `[p.naz col.pic]) + :: + :: accept line and continue + =^ nap ..$ snap + line(lub bal(q.u [nap q.u.bal])) + :: + ++ new-container :: enter list/quote + |= pic/trig + :: + :: if column has retreated, adjust stack + =. +>.$ ?. (lth col.pic col) +>.$ (back col.pic) + :: + :: dif: columns advanced + :: erp: error position + :: + =/ dif (sub col.pic col) + =/ erp [p.naz col.pic] + =. col col.pic + :: + :: execute appropriate paragraph form + =< line:abet:apex + |% + :: + ++ abet :: accept line + :: + :: nap: take first line + ..$(lub `[naz ~]) + :: + ++ apex ^+ . :: by column offset + ?+ dif fail :: + $0 apse :: unindented forms + $4 (push %code) :: code literal + $8 (push %poem) :: verse literal + == + :: + ++ apse ^+ . :: by prefix style + ?- sty.pic + $done !! :: blank + $rule (push %rule) :: horizontal ruler + $head (push %head) :: heading + $bloc (entr %bloc) :: blockquote line + $expr (entr %expr) :: hoon expression + $lite (lent %list) :: unnumbered list + $lint (lent %lord) :: numbered list + $text text :: anything else + == + :: + ++ fail .(err `erp) :: set error position + ++ push |=(mite +>(hac [cur hac], cur [+< ~])):: push context + ++ entr :: enter container + |= typ/mite + ^+ +> + :: + :: indent by 2 + =. col (add 2 col) + :: + :: "parse" marker + =. los (slag (sub col q.naz) los) + =. q.naz col + :: + (push typ) + :: + ++ lent :: list entry + |= ord/?($lord $list) + ^+ +> + :: can't switch list types + ?: =(?-(ord $list %lord, $lord %list) p.cur) + fail + :: + :: push list item + =< (entr %lime) + :: + :: push list context, unless we're in list + ?:(=(ord p.cur) ..push (push ord)) + :: + ++ text :: plain text + ^+ . + :: + :: only in lists, fold + ?. ?=(?($list $lord) p.cur) . + .(^$ fold) + -- + -- + :: + ++ parse :: individual parsers + |% + ++ look :: classify line + %+ cook |=(a/(unit trig) a) + ;~ pfix (star ace) + %+ here + |=({a/pint b/?($~ trig-style)} ?~(b ~ `[q.p.a b])) + ;~ pose + (full (easy %done)) :: end of input + (cold ~ (just `@`10)) :: blank line + (cold %rule ;~(plug hep hep hep)) :: --- horizontal ruler + (cold %head ;~(plug (star hax) ace)) :: # heading + (cold %lite ;~(plug hep ace)) :: - line item + (cold %lint ;~(plug lus ace)) :: + line item + (cold %bloc ;~(plug gar ace)) :: > block-quote + (cold %expr sem) :: ;sail expression + (easy %text) :: anything else + == + == + :: + ++ cash :: escaped fence + |* tem/rule + %- echo + %- star + ;~ pose + whit + ;~(plug bas tem) + ;~(less tem prn) + == + :: + ++ cool :: reparse + |* $: :: fex: primary parser + :: sab: secondary parser + :: + fex/rule + sab/rule + == + |= {naz/hair los/tape} + ^+ *sab + :: + :: vex: fenced span + =/ vex/(like tape) (fex naz los) + ?~ q.vex vex + :: + :: hav: reparse full fenced text + =/ hav ((full sab) [naz p.u.q.vex]) + :: + :: reparsed error position is always at start + ?~ q.hav [naz ~] + :: + :: the complete span with the main product + :- p.vex + `[p.u.q.hav q.u.q.vex] + :: + ::REVIEW surely there is a less hacky "first or after space" solution + ++ easy-sol :: parse start of line + |* a/* + |= b/nail + ?: =(1 q.p.b) ((easy a) b) + (fail b) + :: + ++ echo :: hoon literal + |* sab/rule + |= {naz/hair los/tape} + ^- (like tape) + :: + :: vex: result of parsing wide twig + =/ vex (sab naz los) + :: + :: use result of expression parser + ?~ q.vex vex + =- [p.vex `[- q.u.q.vex]] + :: + :: but replace payload with bytes consumed + |- ^- tape + ?: =(q.q.u.q.vex los) ~ + ?~ los ~ + [i.los $(los +.los)] + :: + ++ word :: flow parser + %+ knee *(list graf) |. ~+ + %+ cook |=(a/?(graf (list graf)) ?+(a a {@ *} [a]~)) + ;~ pose + :: + :: ordinary word + :: + %+ stag %text + ;~(plug ;~(pose low hig) (star ;~(pose nud low hig hep))) + :: + :: naked \escape + :: + (stag %text ;~(pfix bas (cook trip ;~(less ace prn)))) + :: + :: *bold literal* + :: + (stag %bold (ifix [tar tar] (cool (cash tar) work))) + :: + :: _italic literal_ + :: + (stag %talc (ifix [cab cab] (cool (cash cab) work))) + :: + :: "quoted text" + :: + (stag %quod (ifix [doq doq] (cool (cash doq) work))) + :: + :: `classic markdown quote` + :: + (stag %code (ifix [tec tec] (cash tec))) + :: + :: ++arm + :: + (stag %code ;~(plug lus lus low (star ;~(pose nud low hep)))) + :: + :: [arbitrary *content*](url) + :: + %+ stag %link + ;~ (glue (punt whit)) + (ifix [sel ser] (cool (cash ser) work)) + (ifix [pel per] (cash per)) + == + :: + :: #twig + :: + ;~ plug + (stag %text ;~(pose (cold " " whit) (easy-sol ~))) + (stag %code ;~(pfix hax (echo wide))) + ;~(simu whit (easy ~)) + == + :: + :: direct hoon constant + :: + ;~ plug + (stag %text ;~(pose (cold " " whit) (easy-sol ~))) + :: + %+ stag %code + %- echo + ;~ pose + ::REVIEW just copy in 0x... parsers directly? + ;~(simu ;~(plug (just '0') alp) bisk:so) + :: + tash:so + ;~(pfix dot perd:so) + ;~(pfix sig ;~(pose twid:so (easy [%$ %n 0]))) + ;~(pfix cen ;~(pose sym buc pam bar qut nuck:so)) + == + :: + ;~(simu whit (easy ~)) + == + :: + :: whitespace + :: + (stag %text (cold " " whit)) + :: + :: {interpolated} sail + :: + (stag %expr inline-embed:(sail |)) + :: + :: just a byte + :: + (stag %text (cook trip ;~(less ace prn))) + == + :: + ++ work (cook zing (star word)) :: indefinite flow + :: + ++ down :: parse inline flow + %+ knee *flow |. ~+ + =- (cook - work) + :: + :: collect raw flow into xml tags + |= gaf/(list graf) + ^- flow + =< main + |% + ++ main + ^- flow + ?~ gaf ~ + ?. ?=($text -.i.gaf) + (weld (item i.gaf) $(gaf t.gaf)) + :: + :: fip: accumulate text blocks + =/ fip/(list tape) [p.i.gaf]~ + |- ^- flow + ?~ t.gaf [;/((zing (flop fip))) ~] + ?. ?=($text -.i.t.gaf) + [;/((zing (flop fip))) ^$(gaf t.gaf)] + $(gaf t.gaf, fip :_(fip p.i.t.gaf)) + :: + ++ item + |= nex/graf + ^- flow ::CHECK can be tuna:twig? + ?- -.nex + $text !! :: handled separately + $expr [p.nex]~ + $bold [[%b ~] ^$(gaf p.nex)]~ + $talc [[%i ~] ^$(gaf p.nex)]~ + $code [[%code ~] ;/(p.nex) ~]~ + $quod :: + :: smart quotes + %= ^$ + gaf + :- [%text (tufa ~-~201c. ~)] + %+ weld p.nex + `(list graf)`[%text (tufa ~-~201d. ~)]~ + == + $link [[%a [%href q.nex] ~] ^$(gaf p.nex)]~ + == + -- + :: + ++ hrul :: empty besides fence + (cold ~ ;~(plug hep hep hep (star hep) (just '\0a'))) + :: + ++ para :: paragraph + %+ cook + |=(a/flow ?~(a ~ [[%p ~] a]~)) + ;~(pfix (punt whit) down) + :: + ++ expr :: expression + %+ ifix [(punt whit) (punt whit)] :: whitespace surround + => (sail &) :: tall-form + (cook drop-top top-level) :: list of tags + :: + :: + ++ whit :: whitespace + (cold ' ' (plus ;~(pose (just ' ') (just '\0a')))) + :: + ++ head :: parse heading + %+ cook + |= a/manx:twig ^- marl:twig + =. a.g.a :_(a.g.a [%id (sanitize-to-id c.a)]) + [a]~ + :: + ;~ plug + :: + :: # -> 1 -> %h1, ### -> 3 -> %h3, etc + :(cook |=(a/@u /(crip "h{}")) lent (stun [1 6] hax)) + :: + ;~(pfix whit down) + == + :: + ++ sanitize-to-id :: # text into elem id + |= a/(list tuna:twig) ^- tape + =; raw/tape + %+ turn raw + |= @tD + ^- @tD + ?: ?| &((gte +< 'a') (lte +< 'z')) + &((gte +< '0') (lte +< '9')) + == + +< + ?: &((gte +< 'A') (lte +< 'Z')) + (add 32 +<) + '-' + :: + :: collect all text in header flow + |- ^- tape + ?~ a ~ + %+ weld + ^- tape + ?- i.a + {{$$ {$$ *} $~} $~} :: text node contents + (murn v.i.a.g.i.a |=(a/beer:twig ?^(a ~ (some a)))) + {^ *} $(a c.i.a) :: concatenate children + {@ *} ~ :: ignore interpolation + == + $(a t.a) + -- + -- + :: ++ scab %+ cook |= a/(list wing) ^- twig @@ -10699,14 +11365,18 @@ fel apse:docs == - ++ tall %+ knee *twig :: full tall form - |.(~+((wart (wrap ;~(pose (norm | &) long lute apex:(sail &)))))) - ++ till %+ knee *root :: full tall form - |.(~+((wart (wrap ;~(pose (norm & &) scad))))) - ++ wide %+ knee *twig :: full wide form - |.(~+((wart ;~(pose (norm | |) long apex:(sail |))))) - ++ wyde %+ knee *root :: full wide form - |.(~+((wart ;~(pose (norm & |) scad)))) + ++ tall :: full tall form + %+ knee *twig + |.(~+((wart (wrap ;~(pose (norm | &) cram long lute apex:(sail &)))))) + ++ till :: mold tall form + %+ knee *root + |.(~+((wart (wrap ;~(pose (norm & &) scad))))) + ++ wide :: full wide form + %+ knee *twig + |.(~+((wart ;~(pose (norm | |) long apex:(sail |))))) + ++ wyde :: mold wide form + %+ knee *root + |.(~+((wart ;~(pose (norm & |) scad)))) ++ wart |* zor/rule %+ here diff --git a/web/static.md b/web/static.umd similarity index 100% rename from web/static.md rename to web/static.umd diff --git a/web/unmark/1.txt b/web/unmark/1.txt new file mode 100644 index 000000000..127e60a35 --- /dev/null +++ b/web/unmark/1.txt @@ -0,0 +1,3 @@ +The quick *brown fox* jumped over #(add 2 2) +their owner's "extremely lazy" dogs. + diff --git a/web/unmark/10.txt b/web/unmark/10.txt new file mode 100644 index 000000000..ca045e46f --- /dev/null +++ b/web/unmark/10.txt @@ -0,0 +1,3 @@ +;style:'#test-style {transform: skew(25deg)}' + +### Test style diff --git a/web/unmark/11.txt b/web/unmark/11.txt new file mode 100644 index 000000000..12144175e --- /dev/null +++ b/web/unmark/11.txt @@ -0,0 +1,12 @@ +;+ + ;> + foo *some style* + +outdent + +;= ;div; == + +;= +moar markdown +== + diff --git a/web/unmark/2.txt b/web/unmark/2.txt new file mode 100644 index 000000000..1647cc495 --- /dev/null +++ b/web/unmark/2.txt @@ -0,0 +1,11 @@ +The quick brown fox jumped _over +the_ extremely lazy dogs. + +Then a horse arrived. It was extremely angry. +Outside, two bears [were fighting](http://google.com) each other. + +Also present at the scene were: + +- an Armenian. + +Everything was soon back to normal. diff --git a/web/unmark/3.txt b/web/unmark/3.txt new file mode 100644 index 000000000..e8f27cd5e --- /dev/null +++ b/web/unmark/3.txt @@ -0,0 +1,52 @@ +#(add 2 2) is a hoon expression + +un*bearably* + +0b1100 + +--- + + +## This is a header + +The quick brown fox jumped over +the extremely lazy dogs. + +Then a horse arrived. It was extremely angry. +Outside, two bears [were fighting](http://google.com) each other. + +Also present at _the intense %hoon scene_ were: + +- an Armenian. + +- a haberdasher. + + A haberdasher is someone who makes hats. There are quite + a few kinds of hats: + + - fedoras + + - borsalinos + + - sombreros + + - baseball caps + + All these devices will protect your bald spot from the rain. + + It is _sometimes difficult_ to be a bald man when it's raining. + +We sometimes speak in %hoon We also say 0xdead.beef things like ~ and #`@`2. + +We don't care if we sound funny, and sometimes we !@#$%%#^? cuss. + +``` +We also sometimes put +in +code +looks + +like +this. +``` + diff --git a/web/unmark/4.txt b/web/unmark/4.txt new file mode 100644 index 000000000..d4a3eecf4 --- /dev/null +++ b/web/unmark/4.txt @@ -0,0 +1,18 @@ +## A digital home base + +What you need is a digital home base. What is that computer? Is +it (a) your phone, (b) your browser, (c) your PC or laptop, (d) +your AWS instance, (e) your RasPi or other custom home computer? + +Here are three obvious features your digital home base needs. +(1) it should be infinitely secure and persistent -- at the level +of Amazon S3, Gmail, your bank, etc. (2) it should be a server, +not just a client. (3) it should be usable by ordinary people. + +Everything except (d) falls far short of (1) and/or (2). (d) +falls far short of (3). + +The missing piece is a practical _personal server_ -- a virtual +computer in the cloud, with persistence guarantees comparable to +cloud storage services, that's as completely yours as a RasPi. + diff --git a/web/unmark/6.txt b/web/unmark/6.txt new file mode 100644 index 000000000..72093cf8f --- /dev/null +++ b/web/unmark/6.txt @@ -0,0 +1,6 @@ +*brown fox* ;{s "ignoreme"} ;{a(name "foo")} jumped over + +;div#test: hello world + +- - foo + - bar diff --git a/web/unmark/8.txt b/web/unmark/8.txt new file mode 100644 index 000000000..834187def --- /dev/null +++ b/web/unmark/8.txt @@ -0,0 +1,37 @@ +> xyz + abc + + ``` + code at the beginning of the line + ``` + + zyxxy + +> bar + + poe + m + +> baz + > bal + +- - bleh +- blah + + one + + two + +1 + +> > bel +> what did you just say about me + +... + +``` +code + still code? +``` + +> > foo + + not-code diff --git a/web/unmark/9.txt b/web/unmark/9.txt new file mode 100644 index 000000000..00e946534 --- /dev/null +++ b/web/unmark/9.txt @@ -0,0 +1 @@ +> - + ;div.test: nesting diff --git a/web/unmark/all.hoon b/web/unmark/all.hoon new file mode 100644 index 000000000..4c46dab73 --- /dev/null +++ b/web/unmark/all.hoon @@ -0,0 +1,89 @@ +:: Render all %%/{@u}.txt test cases +:: +:::: /hoon/all/unmark/web + :: +/- down, markdown +/+ vast2 +:: +/= cor /^ (list {@ud wain}) + /: /%%/ /_ @ud /txt/ +/= mad /: /%%/cm-spec /down/ +:: +|% +++ rolt |=(a/wall `tape`?~(a ~ ?~(t.a i.a :(weld i.a "\0a" $(a t.a))))) +++ wush + |= {wid/@u tan/tang} ^- tape + (rolt (zing (turn tan |=(a/tank (wash 0^wid a))))) +:: +++ mads + |= a/wain ^- marl + =/ try (mule |.(~(shut ap (rash (nule ';>' a) apex:(sail &):vast2)))) + ?- -.try + $& p.try + $| ;= ;div + ;h3: ERROR + ;pre: {(wush 120 p.try)} + == == == +:: +++ split-on + =| hed/wain + |= {mid/@t all/wain} ^+ [hed all] + ?~ all !! + ?: =(mid i.all) [(flop hed) t.all] + $(all t.all, hed :_(hed i.all)) +:: +++ strip + |= a/manx ^- manx + :_ (turn c.a ..$) + ?+ g.a g.a + {@ {$id *} *} g.a(a t.a.g.a) + {$$ {$$ *} $~} + =< g.a(v.i.a (tufa (turn (tuba v.i.a.g.a) .))) + |=(b/@c `@`?+(b b $~-~201c. '"', $~-~201d. '"')) + == +-- +:: +^- manx +;ul + ;li + ;h2: Core + ;ul + ;* ^- marl + %+ turn cor + |= {num/@u txt/wain} + ;li: ;{p -[]} *{(mads txt)} ;{hr} + == + == + ;li + ;h2: CommonMark + ;ol + ;* ?: [disabled=&] ; DISABLED + ^- marl + %+ murn `down`mad + |= a/elem:markdown + ?: ?=($head -.a) + ?. ?=({{$$ *} $~} q.a) + ~ + (some /(crip "h{}") ;"{p.i.q.a}") + ?. ?=({$code ^ *} a) ~ + ?. =("example" r.u.p.a) ~ + %- some + ^- manx + |- + =+ [inp out]=(split-on '.' q.a) + =/ mar c:(snag 0 (mads inp)) + ;li + ;pre: {(trip (role inp))} + ;p: => + ;pre: {(trip (role out))} + ;p: vs + ;pre: {(many:poxo mar "")} + ;p + ;- =/ pox (rush (role out) many:poxa) + ?~ pox "INVALID" + ?: =(u.pox mar) "EQUIVALENT" + ?: =(u.pox (turn mar strip)) "COMPATIBLE" + "DIVERGE" + == + == == +== == diff --git a/web/unmark/cm-spec.md b/web/unmark/cm-spec.md new file mode 100644 index 000000000..d617d80dd --- /dev/null +++ b/web/unmark/cm-spec.md @@ -0,0 +1,9413 @@ +--- +title: CommonMark Spec +author: John MacFarlane +version: 0.28 +date: '2017-08-01' +license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +--- + +# Introduction + +## What is Markdown? + +Markdown is a plain text format for writing structured documents, +based on conventions for indicating formatting in email +and usenet posts. It was developed by John Gruber (with +help from Aaron Swartz) and released in 2004 in the form of a +[syntax description](http://daringfireball.net/projects/markdown/syntax) +and a Perl script (`Markdown.pl`) for converting Markdown to +HTML. In the next decade, dozens of implementations were +developed in many languages. Some extended the original +Markdown syntax with conventions for footnotes, tables, and +other document elements. Some allowed Markdown documents to be +rendered in formats other than HTML. Websites like Reddit, +StackOverflow, and GitHub had millions of people using Markdown. +And Markdown started to be used beyond the web, to author books, +articles, slide shows, letters, and lecture notes. + +What distinguishes Markdown from many other lightweight markup +syntaxes, which are often easier to write, is its readability. +As Gruber writes: + +> The overriding design goal for Markdown's formatting syntax is +> to make it as readable as possible. The idea is that a +> Markdown-formatted document should be publishable as-is, as +> plain text, without looking like it's been marked up with tags +> or formatting instructions. +> () + +The point can be illustrated by comparing a sample of +[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +an equivalent sample of Markdown. Here is a sample of +AsciiDoc from the AsciiDoc manual: + +``` +1. List item one. ++ +List item one continued with a second paragraph followed by an +Indented block. ++ +................. +$ ls *.sh +$ mv *.sh ~/tmp +................. ++ +List item continued with a third paragraph. + +2. List item two continued with an open block. ++ +-- +This paragraph is part of the preceding list item. + +a. This list is nested and does not require explicit item +continuation. ++ +This paragraph is part of the preceding list item. + +b. List item b. + +This paragraph belongs to item two of the outer list. +-- +``` + +And here is the equivalent in Markdown: +``` +1. List item one. + + List item one continued with a second paragraph followed by an + Indented block. + + $ ls *.sh + $ mv *.sh ~/tmp + + List item continued with a third paragraph. + +2. List item two continued with an open block. + + This paragraph is part of the preceding list item. + + 1. This list is nested and does not require explicit item continuation. + + This paragraph is part of the preceding list item. + + 2. List item b. + + This paragraph belongs to item two of the outer list. +``` + +The AsciiDoc version is, arguably, easier to write. You don't need +to worry about indentation. But the Markdown version is much easier +to read. The nesting of list items is apparent to the eye in the +source, not just in the processed document. + +## Why is a spec needed? + +John Gruber's [canonical description of Markdown's +syntax](http://daringfireball.net/projects/markdown/syntax) +does not specify the syntax unambiguously. Here are some examples of +questions it does not answer: + +1. How much indentation is needed for a sublist? The spec says that + continuation paragraphs need to be indented four spaces, but is + not fully explicit about sublists. It is natural to think that + they, too, must be indented four spaces, but `Markdown.pl` does + not require that. This is hardly a "corner case," and divergences + between implementations on this issue often lead to surprises for + users in real documents. (See [this comment by John + Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + +2. Is a blank line needed before a block quote or heading? + Most implementations do not require the blank line. However, + this can lead to unexpected results in hard-wrapped text, and + also to ambiguities in parsing (note that some implementations + put the heading inside the blockquote, while others do not). + (John Gruber has also spoken [in favor of requiring the blank + lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + +3. Is a blank line needed before an indented code block? + (`Markdown.pl` requires it, but this is not mentioned in the + documentation, and some implementations do not require it.) + + ``` markdown + paragraph + code? + ``` + +4. What is the exact rule for determining when list items get + wrapped in `

` tags? Can a list be partially "loose" and partially + "tight"? What should we do with a list like this? + + ``` markdown + 1. one + + 2. two + 3. three + ``` + + Or this? + + ``` markdown + 1. one + - a + + - b + 2. two + ``` + + (There are some relevant comments by John Gruber + [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + +5. Can list markers be indented? Can ordered list markers be right-aligned? + + ``` markdown + 8. item 1 + 9. item 2 + 10. item 2a + ``` + +6. Is this one list with a thematic break in its second item, + or two lists separated by a thematic break? + + ``` markdown + * a + * * * * * + * b + ``` + +7. When list markers change from numbers to bullets, do we have + two lists or one? (The Markdown syntax description suggests two, + but the perl scripts and many other implementations produce one.) + + ``` markdown + 1. fee + 2. fie + - foe + - fum + ``` + +8. What are the precedence rules for the markers of inline structure? + For example, is the following a valid link, or does the code span + take precedence ? + + ``` markdown + [a backtick (`)](/url) and [another backtick (`)](/url). + ``` + +9. What are the precedence rules for markers of emphasis and strong + emphasis? For example, how should the following be parsed? + + ``` markdown + *foo *bar* baz* + ``` + +10. What are the precedence rules between block-level and inline-level + structure? For example, how should the following be parsed? + + ``` markdown + - `a long code span can contain a hyphen like this + - and it can screw things up` + ``` + +11. Can list items include section headings? (`Markdown.pl` does not + allow this, but does allow blockquotes to include headings.) + + ``` markdown + - # Heading + ``` + +12. Can list items be empty? + + ``` markdown + * a + * + * b + ``` + +13. Can link references be defined inside block quotes or list items? + + ``` markdown + > Blockquote [foo]. + > + > [foo]: /url + ``` + +14. If there are multiple definitions for the same reference, which takes + precedence? + + ``` markdown + [foo]: /url1 + [foo]: /url2 + + [foo][] + ``` + +In the absence of a spec, early implementers consulted `Markdown.pl` +to resolve these ambiguities. But `Markdown.pl` was quite buggy, and +gave manifestly bad results in many cases, so it was not a +satisfactory replacement for a spec. + +Because there is no unambiguous spec, implementations have diverged +considerably. As a result, users are often surprised to find that +a document that renders one way on one system (say, a github wiki) +renders differently on another (say, converting to docbook using +pandoc). To make matters worse, because nothing in Markdown counts +as a "syntax error," the divergence often isn't discovered right away. + +## About this document + +This document attempts to specify Markdown syntax unambiguously. +It contains many examples with side-by-side Markdown and +HTML. These are intended to double as conformance tests. An +accompanying script `spec_tests.py` can be used to run the tests +against any Markdown program: + + python test/spec_tests.py --spec spec.txt --program PROGRAM + +Since this document describes how Markdown is to be parsed into +an abstract syntax tree, it would have made sense to use an abstract +representation of the syntax tree instead of HTML. But HTML is capable +of representing the structural distinctions we need to make, and the +choice of HTML for the tests makes it possible to run the tests against +an implementation without writing an abstract syntax tree renderer. + +This document is generated from a text file, `spec.txt`, written +in Markdown with a small extension for the side-by-side tests. +The script `tools/makespec.py` can be used to convert `spec.txt` into +HTML or CommonMark (which can then be converted into other formats). + +In the examples, the `→` character is used to represent tabs. + +# Preliminaries + +## Characters and lines + +Any sequence of [characters] is a valid CommonMark +document. + +A [character](@) is a Unicode code point. Although some +code points (for example, combining accents) do not correspond to +characters in an intuitive sense, all code points count as characters +for purposes of this spec. + +This spec does not specify an encoding; it thinks of lines as composed +of [characters] rather than bytes. A conforming parser may be limited +to a certain encoding. + +A [line](@) is a sequence of zero or more [characters] +other than newline (`U+000A`) or carriage return (`U+000D`), +followed by a [line ending] or by the end of file. + +A [line ending](@) is a newline (`U+000A`), a carriage return +(`U+000D`) not followed by a newline, or a carriage return and a +following newline. + +A line containing no characters, or a line containing only spaces +(`U+0020`) or tabs (`U+0009`), is called a [blank line](@). + +The following definitions of character classes will be used in this spec: + +A [whitespace character](@) is a space +(`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`), +form feed (`U+000C`), or carriage return (`U+000D`). + +[Whitespace](@) is a sequence of one or more [whitespace +characters]. + +A [Unicode whitespace character](@) is +any code point in the Unicode `Zs` general category, or a tab (`U+0009`), +carriage return (`U+000D`), newline (`U+000A`), or form feed +(`U+000C`). + +[Unicode whitespace](@) is a sequence of one +or more [Unicode whitespace characters]. + +A [space](@) is `U+0020`. + +A [non-whitespace character](@) is any character +that is not a [whitespace character]. + +An [ASCII punctuation character](@) +is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, +`*`, `+`, `,`, `-`, `.`, `/`, `:`, `;`, `<`, `=`, `>`, `?`, `@`, +`[`, `\`, `]`, `^`, `_`, `` ` ``, `{`, `|`, `}`, or `~`. + +A [punctuation character](@) is an [ASCII +punctuation character] or anything in +the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. + +## Tabs + +Tabs in lines are not expanded to [spaces]. However, +in contexts where whitespace helps to define block structure, +tabs behave as if they were replaced by spaces with a tab stop +of 4 characters. + +Thus, for example, a tab can be used instead of four spaces +in an indented code block. (Note, however, that internal +tabs are passed through as literal tabs, not expanded to +spaces.) + +```````````````````````````````` example +→foo→baz→→bim +. +

foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + →foo→baz→→bim +. +
foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + a→a + ὐ→a +. +
a→a
+ὐ→a
+
+```````````````````````````````` + +In the following example, a continuation paragraph of a list +item is indented with a tab; this has exactly the same effect +as indentation with four spaces would: + +```````````````````````````````` example + - foo + +→bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +- foo + +→→bar +. +
    +
  • +

    foo

    +
      bar
    +
    +
  • +
+```````````````````````````````` + +Normally the `>` that begins a block quote may be followed +optionally by a space, which is not considered part of the +content. In the following case `>` is followed by a tab, +which is treated as if it were expanded into three spaces. +Since one of these spaces is considered part of the +delimiter, `foo` is considered to be indented six spaces +inside the block quote context, so we get an indented +code block starting with two spaces. + +```````````````````````````````` example +>→→foo +. +
+
  foo
+
+
+```````````````````````````````` + +```````````````````````````````` example +-→→foo +. +
    +
  • +
      foo
    +
    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example + foo +→bar +. +
foo
+bar
+
+```````````````````````````````` + +```````````````````````````````` example + - foo + - bar +→ - baz +. +
    +
  • foo +
      +
    • bar +
        +
      • baz
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +#→Foo +. +

Foo

+```````````````````````````````` + +```````````````````````````````` example +*→*→*→ +. +
+```````````````````````````````` + + +## Insecure characters + +For security reasons, the Unicode character `U+0000` must be replaced +with the REPLACEMENT CHARACTER (`U+FFFD`). + +# Blocks and inlines + +We can think of a document as a sequence of +[blocks](@)---structural elements like paragraphs, block +quotations, lists, headings, rules, and code blocks. Some blocks (like +block quotes and list items) contain other blocks; others (like +headings and paragraphs) contain [inline](@) content---text, +links, emphasized text, images, code spans, and so on. + +## Precedence + +Indicators of block structure always take precedence over indicators +of inline structure. So, for example, the following is a list with +two items, not a list with one item containing a code span: + +```````````````````````````````` example +- `one +- two` +. +
    +
  • `one
  • +
  • two`
  • +
+```````````````````````````````` + + +This means that parsing can proceed in two steps: first, the block +structure of the document can be discerned; second, text lines inside +paragraphs, headings, and other block constructs can be parsed for inline +structure. The second step requires information about link reference +definitions that will be available only at the end of the first +step. Note that the first step requires processing lines in sequence, +but the second can be parallelized, since the inline parsing of +one block element does not affect the inline parsing of any other. + +## Container blocks and leaf blocks + +We can divide blocks into two types: +[container block](@)s, +which can contain other blocks, and [leaf block](@)s, +which cannot. + +# Leaf blocks + +This section describes the different kinds of leaf block that make up a +Markdown document. + +## Thematic breaks + +A line consisting of 0-3 spaces of indentation, followed by a sequence +of three or more matching `-`, `_`, or `*` characters, each followed +optionally by any number of spaces, forms a +[thematic break](@). + +```````````````````````````````` example +*** +--- +___ +. +
+
+
+```````````````````````````````` + + +Wrong characters: + +```````````````````````````````` example ++++ +. +

+++

+```````````````````````````````` + + +```````````````````````````````` example +=== +. +

===

+```````````````````````````````` + + +Not enough characters: + +```````````````````````````````` example +-- +** +__ +. +

-- +** +__

+```````````````````````````````` + + +One to three spaces indent are allowed: + +```````````````````````````````` example + *** + *** + *** +. +
+
+
+```````````````````````````````` + + +Four spaces is too many: + +```````````````````````````````` example + *** +. +
***
+
+```````````````````````````````` + + +```````````````````````````````` example +Foo + *** +. +

Foo +***

+```````````````````````````````` + + +More than three characters may be used: + +```````````````````````````````` example +_____________________________________ +. +
+```````````````````````````````` + + +Spaces are allowed between the characters: + +```````````````````````````````` example + - - - +. +
+```````````````````````````````` + + +```````````````````````````````` example + ** * ** * ** * ** +. +
+```````````````````````````````` + + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +Spaces are allowed at the end: + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +However, no other characters may occur in the line: + +```````````````````````````````` example +_ _ _ _ a + +a------ + +---a--- +. +

_ _ _ _ a

+

a------

+

---a---

+```````````````````````````````` + + +It is required that all of the [non-whitespace characters] be the same. +So, this is not a thematic break: + +```````````````````````````````` example + *-* +. +

-

+```````````````````````````````` + + +Thematic breaks do not need blank lines before or after: + +```````````````````````````````` example +- foo +*** +- bar +. +
    +
  • foo
  • +
+
+
    +
  • bar
  • +
+```````````````````````````````` + + +Thematic breaks can interrupt a paragraph: + +```````````````````````````````` example +Foo +*** +bar +. +

Foo

+
+

bar

+```````````````````````````````` + + +If a line of dashes that meets the above conditions for being a +thematic break could also be interpreted as the underline of a [setext +heading], the interpretation as a +[setext heading] takes precedence. Thus, for example, +this is a setext heading, not a paragraph followed by a thematic break: + +```````````````````````````````` example +Foo +--- +bar +. +

Foo

+

bar

+```````````````````````````````` + + +When both a thematic break and a list item are possible +interpretations of a line, the thematic break takes precedence: + +```````````````````````````````` example +* Foo +* * * +* Bar +. +
    +
  • Foo
  • +
+
+
    +
  • Bar
  • +
+```````````````````````````````` + + +If you want a thematic break in a list item, use a different bullet: + +```````````````````````````````` example +- Foo +- * * * +. +
    +
  • Foo
  • +
  • +
    +
  • +
+```````````````````````````````` + + +## ATX headings + +An [ATX heading](@) +consists of a string of characters, parsed as inline content, between an +opening sequence of 1--6 unescaped `#` characters and an optional +closing sequence of any number of unescaped `#` characters. +The opening sequence of `#` characters must be followed by a +[space] or by the end of line. The optional closing sequence of `#`s must be +preceded by a [space] and may be followed by spaces only. The opening +`#` character may be indented 0-3 spaces. The raw contents of the +heading are stripped of leading and trailing spaces before being parsed +as inline content. The heading level is equal to the number of `#` +characters in the opening sequence. + +Simple headings: + +```````````````````````````````` example +# foo +## foo +### foo +#### foo +##### foo +###### foo +. +

foo

+

foo

+

foo

+

foo

+
foo
+
foo
+```````````````````````````````` + + +More than six `#` characters is not a heading: + +```````````````````````````````` example +####### foo +. +

####### foo

+```````````````````````````````` + + +At least one space is required between the `#` characters and the +heading's contents, unless the heading is empty. Note that many +implementations currently do not require the space. However, the +space was required by the +[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), +and it helps prevent things like the following from being parsed as +headings: + +```````````````````````````````` example +#5 bolt + +#hashtag +. +

#5 bolt

+

#hashtag

+```````````````````````````````` + + +This is not a heading, because the first `#` is escaped: + +```````````````````````````````` example +\## foo +. +

## foo

+```````````````````````````````` + + +Contents are parsed as inlines: + +```````````````````````````````` example +# foo *bar* \*baz\* +. +

foo bar *baz*

+```````````````````````````````` + + +Leading and trailing blanks are ignored in parsing inline content: + +```````````````````````````````` example +# foo +. +

foo

+```````````````````````````````` + + +One to three spaces indentation are allowed: + +```````````````````````````````` example + ### foo + ## foo + # foo +. +

foo

+

foo

+

foo

+```````````````````````````````` + + +Four spaces are too much: + +```````````````````````````````` example + # foo +. +
# foo
+
+```````````````````````````````` + + +```````````````````````````````` example +foo + # bar +. +

foo +# bar

+```````````````````````````````` + + +A closing sequence of `#` characters is optional: + +```````````````````````````````` example +## foo ## + ### bar ### +. +

foo

+

bar

+```````````````````````````````` + + +It need not be the same length as the opening sequence: + +```````````````````````````````` example +# foo ################################## +##### foo ## +. +

foo

+
foo
+```````````````````````````````` + + +Spaces are allowed after the closing sequence: + +```````````````````````````````` example +### foo ### +. +

foo

+```````````````````````````````` + + +A sequence of `#` characters with anything but [spaces] following it +is not a closing sequence, but counts as part of the contents of the +heading: + +```````````````````````````````` example +### foo ### b +. +

foo ### b

+```````````````````````````````` + + +The closing sequence must be preceded by a space: + +```````````````````````````````` example +# foo# +. +

foo#

+```````````````````````````````` + + +Backslash-escaped `#` characters do not count as part +of the closing sequence: + +```````````````````````````````` example +### foo \### +## foo #\## +# foo \# +. +

foo ###

+

foo ###

+

foo #

+```````````````````````````````` + + +ATX headings need not be separated from surrounding content by blank +lines, and they can interrupt paragraphs: + +```````````````````````````````` example +**** +## foo +**** +. +
+

foo

+
+```````````````````````````````` + + +```````````````````````````````` example +Foo bar +# baz +Bar foo +. +

Foo bar

+

baz

+

Bar foo

+```````````````````````````````` + + +ATX headings can be empty: + +```````````````````````````````` example +## +# +### ### +. +

+

+

+```````````````````````````````` + + +## Setext headings + +A [setext heading](@) consists of one or more +lines of text, each containing at least one [non-whitespace +character], with no more than 3 spaces indentation, followed by +a [setext heading underline]. The lines of text must be such +that, were they not followed by the setext heading underline, +they would be interpreted as a paragraph: they cannot be +interpretable as a [code fence], [ATX heading][ATX headings], +[block quote][block quotes], [thematic break][thematic breaks], +[list item][list items], or [HTML block][HTML blocks]. + +A [setext heading underline](@) is a sequence of +`=` characters or a sequence of `-` characters, with no more than 3 +spaces indentation and any number of trailing spaces. If a line +containing a single `-` can be interpreted as an +empty [list items], it should be interpreted this way +and not as a [setext heading underline]. + +The heading is a level 1 heading if `=` characters are used in +the [setext heading underline], and a level 2 heading if `-` +characters are used. The contents of the heading are the result +of parsing the preceding lines of text as CommonMark inline +content. + +In general, a setext heading need not be preceded or followed by a +blank line. However, it cannot interrupt a paragraph, so when a +setext heading comes after a paragraph, a blank line is needed between +them. + +Simple examples: + +```````````````````````````````` example +Foo *bar* +========= + +Foo *bar* +--------- +. +

Foo bar

+

Foo bar

+```````````````````````````````` + + +The content of the header may span more than one line: + +```````````````````````````````` example +Foo *bar +baz* +==== +. +

Foo bar +baz

+```````````````````````````````` + + +The underlining can be any length: + +```````````````````````````````` example +Foo +------------------------- + +Foo += +. +

Foo

+

Foo

+```````````````````````````````` + + +The heading content can be indented up to three spaces, and need +not line up with the underlining: + +```````````````````````````````` example + Foo +--- + + Foo +----- + + Foo + === +. +

Foo

+

Foo

+

Foo

+```````````````````````````````` + + +Four spaces indent is too much: + +```````````````````````````````` example + Foo + --- + + Foo +--- +. +
Foo
+---
+
+Foo
+
+
+```````````````````````````````` + + +The setext heading underline can be indented up to three spaces, and +may have trailing spaces: + +```````````````````````````````` example +Foo + ---- +. +

Foo

+```````````````````````````````` + + +Four spaces is too much: + +```````````````````````````````` example +Foo + --- +. +

Foo +---

+```````````````````````````````` + + +The setext heading underline cannot contain internal spaces: + +```````````````````````````````` example +Foo += = + +Foo +--- - +. +

Foo += =

+

Foo

+
+```````````````````````````````` + + +Trailing spaces in the content line do not cause a line break: + +```````````````````````````````` example +Foo +----- +. +

Foo

+```````````````````````````````` + + +Nor does a backslash at the end: + +```````````````````````````````` example +Foo\ +---- +. +

Foo\

+```````````````````````````````` + + +Since indicators of block structure take precedence over +indicators of inline structure, the following are setext headings: + +```````````````````````````````` example +`Foo +---- +` + +
+. +

`Foo

+

`

+

<a title="a lot

+

of dashes"/>

+```````````````````````````````` + + +The setext heading underline cannot be a [lazy continuation +line] in a list item or block quote: + +```````````````````````````````` example +> Foo +--- +. +
+

Foo

+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +bar +=== +. +
+

foo +bar +===

+
+```````````````````````````````` + + +```````````````````````````````` example +- Foo +--- +. + +
+```````````````````````````````` + + +A blank line is needed between a paragraph and a following +setext heading, since otherwise the paragraph becomes part +of the heading's content: + +```````````````````````````````` example +Foo +Bar +--- +. +

Foo +Bar

+```````````````````````````````` + + +But in general a blank line is not required before or after +setext headings: + +```````````````````````````````` example +--- +Foo +--- +Bar +--- +Baz +. +
+

Foo

+

Bar

+

Baz

+```````````````````````````````` + + +Setext headings cannot be empty: + +```````````````````````````````` example + +==== +. +

====

+```````````````````````````````` + + +Setext heading text lines must not be interpretable as block +constructs other than paragraphs. So, the line of dashes +in these examples gets interpreted as a thematic break: + +```````````````````````````````` example +--- +--- +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +- foo +----- +. + +
+```````````````````````````````` + + +```````````````````````````````` example + foo +--- +. +
foo
+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +----- +. +
+

foo

+
+
+```````````````````````````````` + + +If you want a heading with `> foo` as its literal text, you can +use backslash escapes: + +```````````````````````````````` example +\> foo +------ +. +

> foo

+```````````````````````````````` + + +**Compatibility note:** Most existing Markdown implementations +do not allow the text of setext headings to span multiple lines. +But there is no consensus about how to interpret + +``` markdown +Foo +bar +--- +baz +``` + +One can find four different interpretations: + +1. paragraph "Foo", heading "bar", paragraph "baz" +2. paragraph "Foo bar", thematic break, paragraph "baz" +3. paragraph "Foo bar --- baz" +4. heading "Foo bar", paragraph "baz" + +We find interpretation 4 most natural, and interpretation 4 +increases the expressive power of CommonMark, by allowing +multiline headings. Authors who want interpretation 1 can +put a blank line after the first paragraph: + +```````````````````````````````` example +Foo + +bar +--- +baz +. +

Foo

+

bar

+

baz

+```````````````````````````````` + + +Authors who want interpretation 2 can put blank lines around +the thematic break, + +```````````````````````````````` example +Foo +bar + +--- + +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +or use a thematic break that cannot count as a [setext heading +underline], such as + +```````````````````````````````` example +Foo +bar +* * * +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +Authors who want interpretation 3 can use backslash escapes: + +```````````````````````````````` example +Foo +bar +\--- +baz +. +

Foo +bar +--- +baz

+```````````````````````````````` + + +## Indented code blocks + +An [indented code block](@) is composed of one or more +[indented chunks] separated by blank lines. +An [indented chunk](@) is a sequence of non-blank lines, +each indented four or more spaces. The contents of the code block are +the literal contents of the lines, including trailing +[line endings], minus four spaces of indentation. +An indented code block has no [info string]. + +An indented code block cannot interrupt a paragraph, so there must be +a blank line between a paragraph and a following indented code block. +(A blank line is not needed, however, between a code block and a following +paragraph.) + +```````````````````````````````` example + a simple + indented code block +. +
a simple
+  indented code block
+
+```````````````````````````````` + + +If there is any ambiguity between an interpretation of indentation +as a code block and as indicating that material belongs to a [list +item][list items], the list item interpretation takes precedence: + +```````````````````````````````` example + - foo + + bar +. + +```````````````````````````````` + + +```````````````````````````````` example +1. foo + + - bar +. +
    +
  1. +

    foo

    +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + + +The contents of a code block are literal text, and do not get parsed +as Markdown: + +```````````````````````````````` example +
+ *hi* + + - one +. +
<a/>
+*hi*
+
+- one
+
+```````````````````````````````` + + +Here we have three chunks separated by blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 + + + + chunk3 +. +
chunk1
+
+chunk2
+
+
+
+chunk3
+
+```````````````````````````````` + + +Any initial spaces beyond four will be included in the content, even +in interior blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 +. +
chunk1
+  
+  chunk2
+
+```````````````````````````````` + + +An indented code block cannot interrupt a paragraph. (This +allows hanging indents and the like.) + +```````````````````````````````` example +Foo + bar + +. +

Foo +bar

+```````````````````````````````` + + +However, any non-blank line with fewer than four leading spaces ends +the code block immediately. So a paragraph may occur immediately +after indented code: + +```````````````````````````````` example + foo +bar +. +
foo
+
+

bar

+```````````````````````````````` + + +And indented code can occur immediately before and after other kinds of +blocks: + +```````````````````````````````` example +# Heading + foo +Heading +------ + foo +---- +. +

Heading

+
foo
+
+

Heading

+
foo
+
+
+```````````````````````````````` + + +The first line can be indented more than four spaces: + +```````````````````````````````` example + foo + bar +. +
    foo
+bar
+
+```````````````````````````````` + + +Blank lines preceding or following an indented code block +are not included in it: + +```````````````````````````````` example + + + foo + + +. +
foo
+
+```````````````````````````````` + + +Trailing spaces are included in the code block's content: + +```````````````````````````````` example + foo +. +
foo  
+
+```````````````````````````````` + + + +## Fenced code blocks + +A [code fence](@) is a sequence +of at least three consecutive backtick characters (`` ` ``) or +tildes (`~`). (Tildes and backticks cannot be mixed.) +A [fenced code block](@) +begins with a code fence, indented no more than three spaces. + +The line with the opening code fence may optionally contain some text +following the code fence; this is trimmed of leading and trailing +spaces and called the [info string](@). +The [info string] may not contain any backtick +characters. (The reason for this restriction is that otherwise +some inline code would be incorrectly interpreted as the +beginning of a fenced code block.) + +The content of the code block consists of all subsequent lines, until +a closing [code fence] of the same type as the code block +began with (backticks or tildes), and with at least as many backticks +or tildes as the opening code fence. If the leading code fence is +indented N spaces, then up to N spaces of indentation are removed from +each line of the content (if present). (If a content line is not +indented, it is preserved unchanged. If it is indented less than N +spaces, all of the indentation is removed.) + +The closing code fence may be indented up to three spaces, and may be +followed only by spaces, which are ignored. If the end of the +containing block (or document) is reached and no closing code fence +has been found, the code block contains all of the lines after the +opening code fence until the end of the containing block (or +document). (An alternative spec would require backtracking in the +event that a closing code fence is not found. But this makes parsing +much less efficient, and there seems to be no real down side to the +behavior described here.) + +A fenced code block may interrupt a paragraph, and does not require +a blank line either before or after. + +The content of a code fence is treated as literal text, not parsed +as inlines. The first word of the [info string] is typically used to +specify the language of the code sample, and rendered in the `class` +attribute of the `code` tag. However, this spec does not mandate any +particular treatment of the [info string]. + +Here is a simple example with backticks: + +```````````````````````````````` example +``` +< + > +``` +. +
<
+ >
+
+```````````````````````````````` + + +With tildes: + +```````````````````````````````` example +~~~ +< + > +~~~ +. +
<
+ >
+
+```````````````````````````````` + +Fewer than three backticks is not enough: + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` + +The closing code fence must use the same character as the opening +fence: + +```````````````````````````````` example +``` +aaa +~~~ +``` +. +
aaa
+~~~
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +aaa +``` +~~~ +. +
aaa
+```
+
+```````````````````````````````` + + +The closing code fence must be at least as long as the opening fence: + +```````````````````````````````` example +```` +aaa +``` +`````` +. +
aaa
+```
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ +aaa +~~~ +~~~~ +. +
aaa
+~~~
+
+```````````````````````````````` + + +Unclosed code blocks are closed by the end of the document +(or the enclosing [block quote][block quotes] or [list item][list items]): + +```````````````````````````````` example +``` +. +
+```````````````````````````````` + + +```````````````````````````````` example +````` + +``` +aaa +. +

+```
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +> aaa + +bbb +. +
+
aaa
+
+
+

bbb

+```````````````````````````````` + + +A code block can have all empty lines as its content: + +```````````````````````````````` example +``` + + +``` +. +

+  
+
+```````````````````````````````` + + +A code block can be empty: + +```````````````````````````````` example +``` +``` +. +
+```````````````````````````````` + + +Fences can be indented. If the opening fence is indented, +content lines will have equivalent opening indentation removed, +if present: + +```````````````````````````````` example + ``` + aaa +aaa +``` +. +
aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + aaa +aaa + ``` +. +
aaa
+aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` + aaa + aaa + aaa + ``` +. +
aaa
+ aaa
+aaa
+
+```````````````````````````````` + + +Four spaces indentation produces an indented code block: + +```````````````````````````````` example + ``` + aaa + ``` +. +
```
+aaa
+```
+
+```````````````````````````````` + + +Closing fences may be indented by 0-3 spaces, and their indentation +need not match that of the opening fence: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +This is not a closing fence, because it is indented 4 spaces: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+    ```
+
+```````````````````````````````` + + + +Code fences (opening and closing) cannot contain internal spaces: + +```````````````````````````````` example +``` ``` +aaa +. +

+aaa

+```````````````````````````````` + + +```````````````````````````````` example +~~~~~~ +aaa +~~~ ~~ +. +
aaa
+~~~ ~~
+
+```````````````````````````````` + + +Fenced code blocks can interrupt paragraphs, and can be followed +directly by paragraphs, without a blank line between: + +```````````````````````````````` example +foo +``` +bar +``` +baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +Other blocks can also occur before and after fenced code blocks +without an intervening blank line: + +```````````````````````````````` example +foo +--- +~~~ +bar +~~~ +# baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +An [info string] can be provided after the opening code fence. +Opening and closing spaces will be stripped, and the first word, prefixed +with `language-`, is used as the value for the `class` attribute of the +`code` element within the enclosing `pre` element. + +```````````````````````````````` example +```ruby +def foo(x) + return 3 +end +``` +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +````; +```` +. +
+```````````````````````````````` + + +[Info strings] for backtick code blocks cannot contain backticks: + +```````````````````````````````` example +``` aa ``` +foo +. +

aa +foo

+```````````````````````````````` + + +Closing code fences cannot have [info strings]: + +```````````````````````````````` example +``` +``` aaa +``` +. +
``` aaa
+
+```````````````````````````````` + + + +## HTML blocks + +An [HTML block](@) is a group of lines that is treated +as raw HTML (and will not be escaped in HTML output). + +There are seven kinds of [HTML block], which can be defined +by their start and end conditions. The block begins with a line that +meets a [start condition](@) (after up to three spaces +optional indentation). It ends with the first subsequent line that +meets a matching [end condition](@), or the last line of +the document or other [container block]), if no line is encountered that meets the +[end condition]. If the first line meets both the [start condition] +and the [end condition], the block will contain just that line. + +1. **Start condition:** line begins with the string ``, or the end of the line.\ +**End condition:** line contains an end tag +``, ``, or `` (case-insensitive; it +need not match the start tag). + +2. **Start condition:** line begins with the string ``. + +3. **Start condition:** line begins with the string ``. + +4. **Start condition:** line begins with the string ``. + +5. **Start condition:** line begins with the string +``. + +6. **Start condition:** line begins the string `<` or ``, or +the string `/>`.\ +**End condition:** line is followed by a [blank line]. + +7. **Start condition:** line begins with a complete [open tag] +or [closing tag] (with any [tag name] other than `script`, +`style`, or `pre`) followed only by [whitespace] +or the end of the line.\ +**End condition:** line is followed by a [blank line]. + +HTML blocks continue until they are closed by their appropriate +[end condition], or the last line of the document or other [container block]. +This means any HTML **within an HTML block** that might otherwise be recognised +as a start condition will be ignored by the parser and passed through as-is, +without changing the parser's state. + +For instance, `
` within a HTML block started by `` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
+
+```````````````````````````````` example
+
+
+**Hello**,
+
+_world_.
+
+
+. +
+
+**Hello**,
+

world. +

+
+```````````````````````````````` + +In this case, the HTML block is terminated by the newline — the `**hello**` +text remains verbatim — and regular parsing resumes, with a paragraph, +emphasised `world` and inline and block HTML following. + +All types of [HTML blocks] except type 7 may interrupt +a paragraph. Blocks of type 7 may not interrupt a paragraph. +(This restriction is intended to prevent unwanted interpretation +of long tags inside a wrapped paragraph as starting HTML blocks.) + +Some simple examples follow. Here are some basic HTML blocks +of type 6: + +```````````````````````````````` example + + + + +
+ hi +
+ +okay. +. + + + + +
+ hi +
+

okay.

+```````````````````````````````` + + +```````````````````````````````` example +
+ *hello* + +. +
+ *hello* + +```````````````````````````````` + + +A block can also start with a closing tag: + +```````````````````````````````` example +
+*foo* +. +
+*foo* +```````````````````````````````` + + +Here we have two HTML blocks with a Markdown paragraph between them: + +```````````````````````````````` example +
+ +*Markdown* + +
+. +
+

Markdown

+
+```````````````````````````````` + + +The tag on the first line can be partial, as long +as it is split where there would be whitespace: + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +An open tag need not be closed: +```````````````````````````````` example +
+*foo* + +*bar* +. +
+*foo* +

bar

+```````````````````````````````` + + + +A partial tag need not even be completed (garbage +in, garbage out): + +```````````````````````````````` example +
+. +
*foo*
+```````````````````````````````` + + +```````````````````````````````` example +
+foo +
+. +
+foo +
+```````````````````````````````` + + +Everything until the next blank line or end of document +gets included in the HTML block. So, in the following +example, what looks like a Markdown code block +is actually part of the HTML block, which continues until a blank +line or the end of the document is reached: + +```````````````````````````````` example +
+``` c +int x = 33; +``` +. +
+``` c +int x = 33; +``` +```````````````````````````````` + + +To start an [HTML block] with a tag that is *not* in the +list of block-level tags in (6), you must put the tag by +itself on the first line (and it must be complete): + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +In type 7 blocks, the [tag name] can be anything: + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* +. + +*bar* +```````````````````````````````` + + +These rules are designed to allow us to work with tags that +can function as either block-level or inline-level tags. +The `` tag is a nice example. We can surround content with +`` tags in three different ways. In this case, we get a raw +HTML block, because the `` tag is on a line by itself: + +```````````````````````````````` example + +*foo* + +. + +*foo* + +```````````````````````````````` + + +In this case, we get a raw HTML block that just includes +the `` tag (because it ends with the following blank +line). So the contents get interpreted as CommonMark: + +```````````````````````````````` example + + +*foo* + + +. + +

foo

+
+```````````````````````````````` + + +Finally, in this case, the `` tags are interpreted +as [raw HTML] *inside* the CommonMark paragraph. (Because +the tag is not on a line by itself, we get inline HTML +rather than an [HTML block].) + +```````````````````````````````` example +*foo* +. +

foo

+```````````````````````````````` + + +HTML tags designed to contain literal content +(`script`, `style`, `pre`), comments, processing instructions, +and declarations are treated somewhat differently. +Instead of ending at the first blank line, these blocks +end at the first line containing a corresponding end tag. +As a result, these blocks can contain blank lines: + +A pre tag (type 1): + +```````````````````````````````` example +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+okay +. +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+

okay

+```````````````````````````````` + + +A script tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +A style tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +If there is no matching end tag, the block will end at the +end of the document (or the enclosing [block quote][block quotes] +or [list item][list items]): + +```````````````````````````````` example + +*foo* +. + +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*bar* +*baz* +. +*bar* +

baz

+```````````````````````````````` + + +Note that anything on the last line after the +end tag will be included in the [HTML block]: + +```````````````````````````````` example +1. *bar* +. +1. *bar* +```````````````````````````````` + + +A comment (type 2): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + + +A processing instruction (type 3): + +```````````````````````````````` example +'; + +?> +okay +. +'; + +?> +

okay

+```````````````````````````````` + + +A declaration (type 4): + +```````````````````````````````` example + +. + +```````````````````````````````` + + +CDATA (type 5): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +The opening tag can be indented 1-3 spaces, but not 4: + +```````````````````````````````` example + + + +. + +
<!-- foo -->
+
+```````````````````````````````` + + +```````````````````````````````` example +
+ +
+. +
+
<div>
+
+```````````````````````````````` + + +An HTML block of types 1--6 can interrupt a paragraph, and need not be +preceded by a blank line. + +```````````````````````````````` example +Foo +
+bar +
+. +

Foo

+
+bar +
+```````````````````````````````` + + +However, a following blank line is needed, except at the end of +a document, and except for blocks of types 1--5, above: + +```````````````````````````````` example +
+bar +
+*foo* +. +
+bar +
+*foo* +```````````````````````````````` + + +HTML blocks of type 7 cannot interrupt a paragraph: + +```````````````````````````````` example +Foo + +baz +. +

Foo + +baz

+```````````````````````````````` + + +This rule differs from John Gruber's original Markdown syntax +specification, which says: + +> The only restrictions are that block-level HTML elements — +> e.g. `
`, ``, `
`, `

`, etc. — must be separated from +> surrounding content by blank lines, and the start and end tags of the +> block should not be indented with tabs or spaces. + +In some ways Gruber's rule is more restrictive than the one given +here: + +- It requires that an HTML block be preceded by a blank line. +- It does not allow the start tag to be indented. +- It requires a matching end tag, which it also does not allow to + be indented. + +Most Markdown implementations (including some of Gruber's own) do not +respect all of these restrictions. + +There is one respect, however, in which Gruber's rule is more liberal +than the one given here, since it allows blank lines to occur inside +an HTML block. There are two reasons for disallowing them here. +First, it removes the need to parse balanced tags, which is +expensive and can require backtracking from the end of the document +if no matching end tag is found. Second, it provides a very simple +and flexible way of including Markdown content inside HTML tags: +simply separate the Markdown from the HTML using blank lines: + +Compare: + +```````````````````````````````` example +

+ +*Emphasized* text. + +
+. +
+

Emphasized text.

+
+```````````````````````````````` + + +```````````````````````````````` example +
+*Emphasized* text. +
+. +
+*Emphasized* text. +
+```````````````````````````````` + + +Some Markdown implementations have adopted a convention of +interpreting content inside tags as text if the open tag has +the attribute `markdown=1`. The rule given above seems a simpler and +more elegant way of achieving the same expressive power, which is also +much simpler to parse. + +The main potential drawback is that one can no longer paste HTML +blocks into Markdown documents with 100% reliability. However, +*in most cases* this will work fine, because the blank lines in +HTML are usually followed by HTML block tags. For example: + +```````````````````````````````` example +
+ + + + + + + +
+Hi +
+. + + + + +
+Hi +
+```````````````````````````````` + + +There are problems, however, if the inner tags are indented +*and* separated by spaces, as then they will be interpreted as +an indented code block: + +```````````````````````````````` example + + + + + + + + +
+ Hi +
+. + + +
<td>
+  Hi
+</td>
+
+ +
+```````````````````````````````` + + +Fortunately, blank lines are usually not necessary and can be +deleted. The exception is inside `
` tags, but as described
+above, raw HTML blocks starting with `
` *can* contain blank
+lines.
+
+## Link reference definitions
+
+A [link reference definition](@)
+consists of a [link label], indented up to three spaces, followed
+by a colon (`:`), optional [whitespace] (including up to one
+[line ending]), a [link destination],
+optional [whitespace] (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by [whitespace].
+No further [non-whitespace characters] may occur on the line.
+
+A [link reference definition]
+does not correspond to a structural element of a document.  Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document.  [Link
+reference definitions] can come either before or after the links that use
+them.
+
+```````````````````````````````` example
+[foo]: /url "title"
+
+[foo]
+.
+

foo

+```````````````````````````````` + + +```````````````````````````````` example + [foo]: + /url + 'the title' + +[foo] +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[Foo*bar\]]:my_(url) 'title (with parens)' + +[Foo*bar\]] +. +

Foo*bar]

+```````````````````````````````` + + +```````````````````````````````` example +[Foo bar]: + +'title' + +[Foo bar] +. +

Foo bar

+```````````````````````````````` + + +The title may extend over multiple lines: + +```````````````````````````````` example +[foo]: /url ' +title +line1 +line2 +' + +[foo] +. +

foo

+```````````````````````````````` + + +However, it may not contain a [blank line]: + +```````````````````````````````` example +[foo]: /url 'title + +with blank line' + +[foo] +. +

[foo]: /url 'title

+

with blank line'

+

[foo]

+```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[foo]: +/url + +[foo] +. +

foo

+```````````````````````````````` + + +The link destination may not be omitted: + +```````````````````````````````` example +[foo]: + +[foo] +. +

[foo]:

+

[foo]

+```````````````````````````````` + + +Both title and destination can contain backslash escapes +and literal backslashes: + +```````````````````````````````` example +[foo]: /url\bar\*baz "foo\"bar\baz" + +[foo] +. +

foo

+```````````````````````````````` + + +A link can come before its corresponding definition: + +```````````````````````````````` example +[foo] + +[foo]: url +. +

foo

+```````````````````````````````` + + +If there are several matching definitions, the first one takes +precedence: + +```````````````````````````````` example +[foo] + +[foo]: first +[foo]: second +. +

foo

+```````````````````````````````` + + +As noted in the section on [Links], matching of labels is +case-insensitive (see [matches]). + +```````````````````````````````` example +[FOO]: /url + +[Foo] +. +

Foo

+```````````````````````````````` + + +```````````````````````````````` example +[ΑΓΩ]: /φου + +[αγω] +. +

αγω

+```````````````````````````````` + + +Here is a link reference definition with no corresponding link. +It contributes nothing to the document. + +```````````````````````````````` example +[foo]: /url +. +```````````````````````````````` + + +Here is another one: + +```````````````````````````````` example +[ +foo +]: /url +bar +. +

bar

+```````````````````````````````` + + +This is not a link reference definition, because there are +[non-whitespace characters] after the title: + +```````````````````````````````` example +[foo]: /url "title" ok +. +

[foo]: /url "title" ok

+```````````````````````````````` + + +This is a link reference definition, but it has no title: + +```````````````````````````````` example +[foo]: /url +"title" ok +. +

"title" ok

+```````````````````````````````` + + +This is not a link reference definition, because it is indented +four spaces: + +```````````````````````````````` example + [foo]: /url "title" + +[foo] +. +
[foo]: /url "title"
+
+

[foo]

+```````````````````````````````` + + +This is not a link reference definition, because it occurs inside +a code block: + +```````````````````````````````` example +``` +[foo]: /url +``` + +[foo] +. +
[foo]: /url
+
+

[foo]

+```````````````````````````````` + + +A [link reference definition] cannot interrupt a paragraph. + +```````````````````````````````` example +Foo +[bar]: /baz + +[bar] +. +

Foo +[bar]: /baz

+

[bar]

+```````````````````````````````` + + +However, it can directly follow other block elements, such as headings +and thematic breaks, and it need not be followed by a blank line. + +```````````````````````````````` example +# [Foo] +[foo]: /url +> bar +. +

Foo

+
+

bar

+
+```````````````````````````````` + + +Several [link reference definitions] +can occur one after another, without intervening blank lines. + +```````````````````````````````` example +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url + +[foo], +[bar], +[baz] +. +

foo, +bar, +baz

+```````````````````````````````` + + +[Link reference definitions] can occur +inside block containers, like lists and block quotations. They +affect the entire document, not just the container in which they +are defined: + +```````````````````````````````` example +[foo] + +> [foo]: /url +. +

foo

+
+
+```````````````````````````````` + + + +## Paragraphs + +A sequence of non-blank lines that cannot be interpreted as other +kinds of blocks forms a [paragraph](@). +The contents of the paragraph are the result of parsing the +paragraph's raw content as inlines. The paragraph's raw content +is formed by concatenating the lines and removing initial and final +[whitespace]. + +A simple example with two paragraphs: + +```````````````````````````````` example +aaa + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Paragraphs can contain multiple lines, but no blank lines: + +```````````````````````````````` example +aaa +bbb + +ccc +ddd +. +

aaa +bbb

+

ccc +ddd

+```````````````````````````````` + + +Multiple blank lines between paragraph have no effect: + +```````````````````````````````` example +aaa + + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Leading spaces are skipped: + +```````````````````````````````` example + aaa + bbb +. +

aaa +bbb

+```````````````````````````````` + + +Lines after the first may be indented any amount, since indented +code blocks cannot interrupt paragraphs. + +```````````````````````````````` example +aaa + bbb + ccc +. +

aaa +bbb +ccc

+```````````````````````````````` + + +However, the first line may be indented at most three spaces, +or an indented code block will be triggered: + +```````````````````````````````` example + aaa +bbb +. +

aaa +bbb

+```````````````````````````````` + + +```````````````````````````````` example + aaa +bbb +. +
aaa
+
+

bbb

+```````````````````````````````` + + +Final spaces are stripped before inline parsing, so a paragraph +that ends with two or more spaces will not end with a [hard line +break]: + +```````````````````````````````` example +aaa +bbb +. +

aaa
+bbb

+```````````````````````````````` + + +## Blank lines + +[Blank lines] between block-level elements are ignored, +except for the role they play in determining whether a [list] +is [tight] or [loose]. + +Blank lines at the beginning and end of the document are also ignored. + +```````````````````````````````` example + + +aaa + + +# aaa + + +. +

aaa

+

aaa

+```````````````````````````````` + + + +# Container blocks + +A [container block] is a block that has other +blocks as its contents. There are two basic kinds of container blocks: +[block quotes] and [list items]. +[Lists] are meta-containers for [list items]. + +We define the syntax for container blocks recursively. The general +form of the definition is: + +> If X is a sequence of blocks, then the result of +> transforming X in such-and-such a way is a container of type Y +> with these blocks as its content. + +So, we explain what counts as a block quote or list item by explaining +how these can be *generated* from their contents. This should suffice +to define the syntax, although it does not give a recipe for *parsing* +these constructions. (A recipe is provided below in the section entitled +[A parsing strategy](#appendix-a-parsing-strategy).) + +## Block quotes + +A [block quote marker](@) +consists of 0-3 spaces of initial indent, plus (a) the character `>` together +with a following space, or (b) a single character `>` not followed by a space. + +The following rules define [block quotes]: + +1. **Basic case.** If a string of lines *Ls* constitute a sequence + of blocks *Bs*, then the result of prepending a [block quote + marker] to the beginning of each line in *Ls* + is a [block quote](#block-quotes) containing *Bs*. + +2. **Laziness.** If a string of lines *Ls* constitute a [block + quote](#block-quotes) with contents *Bs*, then the result of deleting + the initial [block quote marker] from one or + more lines in which the next [non-whitespace character] after the [block + quote marker] is [paragraph continuation + text] is a block quote with *Bs* as its content. + [Paragraph continuation text](@) is text + that will be parsed as part of the content of a paragraph, but does + not occur at the beginning of the paragraph. + +3. **Consecutiveness.** A document cannot contain two [block + quotes] in a row unless there is a [blank line] between them. + +Nothing else counts as a [block quote](#block-quotes). + +Here is a simple example: + +```````````````````````````````` example +> # Foo +> bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The spaces after the `>` characters can be omitted: + +```````````````````````````````` example +># Foo +>bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The `>` characters can be indented 1-3 spaces: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +Four spaces gives us a code block: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
> # Foo
+> bar
+> baz
+
+```````````````````````````````` + + +The Laziness clause allows us to omit the `>` before +[paragraph continuation text]: + +```````````````````````````````` example +> # Foo +> bar +baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +A block quote can contain some lazy and some non-lazy +continuation lines: + +```````````````````````````````` example +> bar +baz +> foo +. +
+

bar +baz +foo

+
+```````````````````````````````` + + +Laziness only applies to lines that would have been continuations of +paragraphs had they been prepended with [block quote markers]. +For example, the `> ` cannot be omitted in the second line of + +``` markdown +> foo +> --- +``` + +without changing the meaning: + +```````````````````````````````` example +> foo +--- +. +
+

foo

+
+
+```````````````````````````````` + + +Similarly, if we omit the `> ` in the second line of + +``` markdown +> - foo +> - bar +``` + +then the block quote ends after the first line: + +```````````````````````````````` example +> - foo +- bar +. +
+
    +
  • foo
  • +
+
+
    +
  • bar
  • +
+```````````````````````````````` + + +For the same reason, we can't omit the `> ` in front of +subsequent lines of an indented or fenced code block: + +```````````````````````````````` example +> foo + bar +. +
+
foo
+
+
+
bar
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +foo +``` +. +
+
+
+

foo

+
+```````````````````````````````` + + +Note that in the following case, we have a [lazy +continuation line]: + +```````````````````````````````` example +> foo + - bar +. +
+

foo +- bar

+
+```````````````````````````````` + + +To see why, note that in + +```markdown +> foo +> - bar +``` + +the `- bar` is indented too far to start a list, and can't +be an indented code block because indented code blocks cannot +interrupt paragraphs, so it is [paragraph continuation text]. + +A block quote can be empty: + +```````````````````````````````` example +> +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> +> +> +. +
+
+```````````````````````````````` + + +A block quote can have initial or final blank lines: + +```````````````````````````````` example +> +> foo +> +. +
+

foo

+
+```````````````````````````````` + + +A blank line always separates block quotes: + +```````````````````````````````` example +> foo + +> bar +. +
+

foo

+
+
+

bar

+
+```````````````````````````````` + + +(Most current Markdown implementations, including John Gruber's +original `Markdown.pl`, will parse this example as a single block quote +with two paragraphs. But it seems better to allow the author to decide +whether two block quotes or one are wanted.) + +Consecutiveness means that if we put these block quotes together, +we get a single block quote: + +```````````````````````````````` example +> foo +> bar +. +
+

foo +bar

+
+```````````````````````````````` + + +To get a block quote with two paragraphs, use: + +```````````````````````````````` example +> foo +> +> bar +. +
+

foo

+

bar

+
+```````````````````````````````` + + +Block quotes can interrupt paragraphs: + +```````````````````````````````` example +foo +> bar +. +

foo

+
+

bar

+
+```````````````````````````````` + + +In general, blank lines are not needed before or after block +quotes: + +```````````````````````````````` example +> aaa +*** +> bbb +. +
+

aaa

+
+
+
+

bbb

+
+```````````````````````````````` + + +However, because of laziness, a blank line is needed between +a block quote and a following paragraph: + +```````````````````````````````` example +> bar +baz +. +
+

bar +baz

+
+```````````````````````````````` + + +```````````````````````````````` example +> bar + +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +```````````````````````````````` example +> bar +> +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +It is a consequence of the Laziness rule that any number +of initial `>`s may be omitted on a continuation line of a +nested block quote: + +```````````````````````````````` example +> > > foo +bar +. +
+
+
+

foo +bar

+
+
+
+```````````````````````````````` + + +```````````````````````````````` example +>>> foo +> bar +>>baz +. +
+
+
+

foo +bar +baz

+
+
+
+```````````````````````````````` + + +When including an indented code block in a block quote, +remember that the [block quote marker] includes +both the `>` and a following space. So *five spaces* are needed after +the `>`: + +```````````````````````````````` example +> code + +> not code +. +
+
code
+
+
+
+

not code

+
+```````````````````````````````` + + + +## List items + +A [list marker](@) is a +[bullet list marker] or an [ordered list marker]. + +A [bullet list marker](@) +is a `-`, `+`, or `*` character. + +An [ordered list marker](@) +is a sequence of 1--9 arabic digits (`0-9`), followed by either a +`.` character or a `)` character. (The reason for the length +limit is that with 10 digits we start seeing integer overflows +in some browsers.) + +The following rules define [list items]: + +1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of + blocks *Bs* starting with a [non-whitespace character] and not separated + from each other by more than one blank line, and *M* is a list + marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result + of prepending *M* and the following spaces to the first line of + *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + list item with *Bs* as its contents. The type of the list item + (bullet or ordered) is determined by the type of its list marker. + If the list item is ordered, then it is also assigned a start + number, based on the ordered list marker. + + Exceptions: + + 1. When the first list item in a [list] interrupts + a paragraph---that is, when it starts on a line that would + otherwise count as [paragraph continuation text]---then (a) + the lines *Ls* must not begin with a blank line, and (b) if + the list item is ordered, the start number must be 1. + 2. If any line is a [thematic break][thematic breaks] then + that line is not a list item. + +For example, let *Ls* be the lines + +```````````````````````````````` example +A paragraph +with two lines. + + indented code + +> A block quote. +. +

A paragraph +with two lines.

+
indented code
+
+
+

A block quote.

+
+```````````````````````````````` + + +And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says +that the following is an ordered list item with start number 1, +and the same contents as *Ls*: + +```````````````````````````````` example +1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +The most important thing to notice is that the position of +the text after the list marker determines how much indentation +is needed in subsequent blocks in the list item. If the list +marker takes up two spaces, and there are three spaces between +the list marker and the next [non-whitespace character], then blocks +must be indented five spaces in order to fall under the list +item. + +Here are some examples showing how far content must be indented to be +put under the list item: + +```````````````````````````````` example +- one + + two +. +
    +
  • one
  • +
+

two

+```````````````````````````````` + + +```````````````````````````````` example +- one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • one
  • +
+
 two
+
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +It is tempting to think of this in terms of columns: the continuation +blocks must be indented at least to the column of the first +[non-whitespace character] after the list marker. However, that is not quite right. +The spaces after the list marker determine how much relative indentation +is needed. Which column this indentation reaches will depend on +how the list item is embedded in other constructions, as shown by +this example: + +```````````````````````````````` example + > > 1. one +>> +>> two +. +
+
+
    +
  1. +

    one

    +

    two

    +
  2. +
+
+
+```````````````````````````````` + + +Here `two` occurs in the same column as the list marker `1.`, +but is actually contained in the list item, because there is +sufficient indentation after the last containing blockquote marker. + +The converse is also possible. In the following example, the word `two` +occurs far to the right of the initial text of the list item, `one`, but +it is not considered part of the list item, because it is not indented +far enough past the blockquote marker: + +```````````````````````````````` example +>>- one +>> + > > two +. +
+
+
    +
  • one
  • +
+

two

+
+
+```````````````````````````````` + + +Note that at least one space is needed between the list marker and +any following content, so these are not list items: + +```````````````````````````````` example +-one + +2.two +. +

-one

+

2.two

+```````````````````````````````` + + +A list item may contain blocks that are separated by more than +one blank line. + +```````````````````````````````` example +- foo + + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +A list item may contain any kind of block: + +```````````````````````````````` example +1. foo + + ``` + bar + ``` + + baz + + > bam +. +
    +
  1. +

    foo

    +
    bar
    +
    +

    baz

    +
    +

    bam

    +
    +
  2. +
+```````````````````````````````` + + +A list item that contains an indented code block will preserve +empty lines within the code block verbatim. + +```````````````````````````````` example +- Foo + + bar + + + baz +. +
    +
  • +

    Foo

    +
    bar
    +
    +
    +baz
    +
    +
  • +
+```````````````````````````````` + +Note that ordered list start numbers must be nine digits or less: + +```````````````````````````````` example +123456789. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +1234567890. not ok +. +

1234567890. not ok

+```````````````````````````````` + + +A start number may begin with 0s: + +```````````````````````````````` example +0. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +003. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +A start number may not be negative: + +```````````````````````````````` example +-1. not ok +. +

-1. not ok

+```````````````````````````````` + + + +2. **Item starting with indented code.** If a sequence of lines *Ls* + constitute a sequence of blocks *Bs* starting with an indented code + block and not separated from each other by more than one blank line, + and *M* is a list marker of width *W* followed by + one space, then the result of prepending *M* and the following + space to the first line of *Ls*, and indenting subsequent lines of + *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +An indented code block will have to be indented four spaces beyond +the edge of the region where text will be included in the list item. +In the following case that is 6 spaces: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +
    bar
    +
    +
  • +
+```````````````````````````````` + + +And in this case it is 11 spaces: + +```````````````````````````````` example + 10. foo + + bar +. +
    +
  1. +

    foo

    +
    bar
    +
    +
  2. +
+```````````````````````````````` + + +If the *first* block in the list item is an indented code block, +then by rule #2, the contents must be indented *one* space after the +list marker: + +```````````````````````````````` example + indented code + +paragraph + + more code +. +
indented code
+
+

paragraph

+
more code
+
+```````````````````````````````` + + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
    indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that an additional space indent is interpreted as space +inside the code block: + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
     indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that rules #1 and #2 only apply to two cases: (a) cases +in which the lines to be included in a list item begin with a +[non-whitespace character], and (b) cases in which +they begin with an indented code +block. In a case like the following, where the first block begins with +a three-space indent, the rules do not allow us to form a list item by +indenting the whole thing and prepending a list marker: + +```````````````````````````````` example + foo + +bar +. +

foo

+

bar

+```````````````````````````````` + + +```````````````````````````````` example +- foo + + bar +. +
    +
  • foo
  • +
+

bar

+```````````````````````````````` + + +This is not a significant restriction, because when a block begins +with 1-3 spaces indent, the indentation can always be removed without +a change in interpretation, allowing rule #1 to be applied. So, in +the above case: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +3. **Item starting with a blank line.** If a sequence of lines *Ls* + starting with a single [blank line] constitute a (possibly empty) + sequence of blocks *Bs*, not separated from each other by more than + one blank line, and *M* is a list marker of width *W*, + then the result of prepending *M* to the first line of *Ls*, and + indenting subsequent lines of *Ls* by *W + 1* spaces, is a list + item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +Here are some list items that start with a blank line but are not empty: + +```````````````````````````````` example +- + foo +- + ``` + bar + ``` +- + baz +. +
    +
  • foo
  • +
  • +
    bar
    +
    +
  • +
  • +
    baz
    +
    +
  • +
+```````````````````````````````` + +When the list item starts with a blank line, the number of spaces +following the list marker doesn't change the required indentation: + +```````````````````````````````` example +- + foo +. +
    +
  • foo
  • +
+```````````````````````````````` + + +A list item can begin with at most one blank line. +In the following example, `foo` is not part of the list +item: + +```````````````````````````````` example +- + + foo +. +
    +
  • +
+

foo

+```````````````````````````````` + + +Here is an empty bullet list item: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +It does not matter whether there are spaces following the [list marker]: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +Here is an empty ordered list item: + +```````````````````````````````` example +1. foo +2. +3. bar +. +
    +
  1. foo
  2. +
  3. +
  4. bar
  5. +
+```````````````````````````````` + + +A list may start or end with an empty list item: + +```````````````````````````````` example +* +. +
    +
  • +
+```````````````````````````````` + +However, an empty list item cannot interrupt a paragraph: + +```````````````````````````````` example +foo +* + +foo +1. +. +

foo +*

+

foo +1.

+```````````````````````````````` + + +4. **Indentation.** If a sequence of lines *Ls* constitutes a list item + according to rule #1, #2, or #3, then the result of indenting each line + of *Ls* by 1-3 spaces (the same for each line) also constitutes a + list item with the same contents and attributes. If a line is + empty, then it need not be indented. + +Indented one space: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented two spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented three spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Four spaces indent gives a code block: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
1.  A paragraph
+    with two lines.
+
+        indented code
+
+    > A block quote.
+
+```````````````````````````````` + + + +5. **Laziness.** If a string of lines *Ls* constitute a [list + item](#list-items) with contents *Bs*, then the result of deleting + some or all of the indentation from one or more lines in which the + next [non-whitespace character] after the indentation is + [paragraph continuation text] is a + list item with the same contents and attributes. The unindented + lines are called + [lazy continuation line](@)s. + +Here is an example with [lazy continuation lines]: + +```````````````````````````````` example + 1. A paragraph +with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indentation can be partially deleted: + +```````````````````````````````` example + 1. A paragraph + with two lines. +. +
    +
  1. A paragraph +with two lines.
  2. +
+```````````````````````````````` + + +These examples show how laziness can work in nested structures: + +```````````````````````````````` example +> 1. > Blockquote +continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> 1. > Blockquote +> continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + + +6. **That's all.** Nothing that is not counted as a list item by rules + #1--5 counts as a [list item](#list-items). + +The rules for sublists follow from the general rules above. A sublist +must be indented the same number of spaces a paragraph would need to be +in order to be included in the list item. + +So, in this case we need two spaces indent: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo +
      +
    • bar +
        +
      • baz +
          +
        • boo
        • +
        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +One is not enough: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo
  • +
  • bar
  • +
  • baz
  • +
  • boo
  • +
+```````````````````````````````` + + +Here we need four, because the list marker is wider: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + +Three is not enough: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo
  2. +
+
    +
  • bar
  • +
+```````````````````````````````` + + +A list may be the first block in a list item: + +```````````````````````````````` example +- - foo +. +
    +
  • +
      +
    • foo
    • +
    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. - 2. foo +. +
    +
  1. +
      +
    • +
        +
      1. foo
      2. +
      +
    • +
    +
  2. +
+```````````````````````````````` + + +A list item can contain a heading: + +```````````````````````````````` example +- # Foo +- Bar + --- + baz +. +
    +
  • +

    Foo

    +
  • +
  • +

    Bar

    +baz
  • +
+```````````````````````````````` + + +### Motivation + +John Gruber's Markdown spec says the following about list items: + +1. "List markers typically start at the left margin, but may be indented + by up to three spaces. List markers must be followed by one or more + spaces or a tab." + +2. "To make lists look nice, you can wrap items with hanging indents.... + But if you don't want to, you don't have to." + +3. "List items may consist of multiple paragraphs. Each subsequent + paragraph in a list item must be indented by either 4 spaces or one + tab." + +4. "It looks nice if you indent every line of the subsequent paragraphs, + but here again, Markdown will allow you to be lazy." + +5. "To put a blockquote within a list item, the blockquote's `>` + delimiters need to be indented." + +6. "To put a code block within a list item, the code block needs to be + indented twice — 8 spaces or two tabs." + +These rules specify that a paragraph under a list item must be indented +four spaces (presumably, from the left margin, rather than the start of +the list marker, but this is not said), and that code under a list item +must be indented eight spaces instead of the usual four. They also say +that a block quote must be indented, but not by how much; however, the +example given has four spaces indentation. Although nothing is said +about other kinds of block-level content, it is certainly reasonable to +infer that *all* block elements under a list item, including other +lists, must be indented four spaces. This principle has been called the +*four-space rule*. + +The four-space rule is clear and principled, and if the reference +implementation `Markdown.pl` had followed it, it probably would have +become the standard. However, `Markdown.pl` allowed paragraphs and +sublists to start with only two spaces indentation, at least on the +outer level. Worse, its behavior was inconsistent: a sublist of an +outer-level list needed two spaces indentation, but a sublist of this +sublist needed three spaces. It is not surprising, then, that different +implementations of Markdown have developed very different rules for +determining what comes under a list item. (Pandoc and python-Markdown, +for example, stuck with Gruber's syntax description and the four-space +rule, while discount, redcarpet, marked, PHP Markdown, and others +followed `Markdown.pl`'s behavior more closely.) + +Unfortunately, given the divergences between implementations, there +is no way to give a spec for list items that will be guaranteed not +to break any existing documents. However, the spec given here should +correctly handle lists formatted with either the four-space rule or +the more forgiving `Markdown.pl` behavior, provided they are laid out +in a way that is natural for a human to read. + +The strategy here is to let the width and indentation of the list marker +determine the indentation necessary for blocks to fall under the list +item, rather than having a fixed and arbitrary number. The writer can +think of the body of the list item as a unit which gets indented to the +right enough to fit the list marker (and any indentation on the list +marker). (The laziness rule, #5, then allows continuation lines to be +unindented if needed.) + +This rule is superior, we claim, to any rule requiring a fixed level of +indentation from the margin. The four-space rule is clear but +unnatural. It is quite unintuitive that + +``` markdown +- foo + + bar + + - baz +``` + +should be parsed as two lists with an intervening paragraph, + +``` html +
    +
  • foo
  • +
+

bar

+
    +
  • baz
  • +
+``` + +as the four-space rule demands, rather than a single list, + +``` html +
    +
  • +

    foo

    +

    bar

    +
      +
    • baz
    • +
    +
  • +
+``` + +The choice of four spaces is arbitrary. It can be learned, but it is +not likely to be guessed, and it trips up beginners regularly. + +Would it help to adopt a two-space rule? The problem is that such +a rule, together with the rule allowing 1--3 spaces indentation of the +initial list marker, allows text that is indented *less than* the +original list marker to be included in the list item. For example, +`Markdown.pl` parses + +``` markdown + - one + + two +``` + +as a single list item, with `two` a continuation paragraph: + +``` html +
    +
  • +

    one

    +

    two

    +
  • +
+``` + +and similarly + +``` markdown +> - one +> +> two +``` + +as + +``` html +
+
    +
  • +

    one

    +

    two

    +
  • +
+
+``` + +This is extremely unintuitive. + +Rather than requiring a fixed indent from the margin, we could require +a fixed indent (say, two spaces, or even one space) from the list marker (which +may itself be indented). This proposal would remove the last anomaly +discussed. Unlike the spec presented above, it would count the following +as a list item with a subparagraph, even though the paragraph `bar` +is not indented as far as the first paragraph `foo`: + +``` markdown + 10. foo + + bar +``` + +Arguably this text does read like a list item with `bar` as a subparagraph, +which may count in favor of the proposal. However, on this proposal indented +code would have to be indented six spaces after the list marker. And this +would break a lot of existing Markdown, which has the pattern: + +``` markdown +1. foo + + indented code +``` + +where the code is indented eight spaces. The spec above, by contrast, will +parse this text as expected, since the code block's indentation is measured +from the beginning of `foo`. + +The one case that needs special treatment is a list item that *starts* +with indented code. How much indentation is required in that case, since +we don't have a "first paragraph" to measure from? Rule #2 simply stipulates +that in such cases, we require one space indentation from the list marker +(and then the normal four spaces for the indented code). This will match the +four-space rule in cases where the list marker plus its initial indentation +takes four spaces (a common case), but diverge in other cases. + +## Lists + +A [list](@) is a sequence of one or more +list items [of the same type]. The list items +may be separated by any number of blank lines. + +Two list items are [of the same type](@) +if they begin with a [list marker] of the same type. +Two list markers are of the +same type if (a) they are bullet list markers using the same character +(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same +delimiter (either `.` or `)`). + +A list is an [ordered list](@) +if its constituent list items begin with +[ordered list markers], and a +[bullet list](@) if its constituent list +items begin with [bullet list markers]. + +The [start number](@) +of an [ordered list] is determined by the list number of +its initial list item. The numbers of subsequent list items are +disregarded. + +A list is [loose](@) if any of its constituent +list items are separated by blank lines, or if any of its constituent +list items directly contain two block-level elements with a blank line +between them. Otherwise a list is [tight](@). +(The difference in HTML output is that paragraphs in a loose list are +wrapped in `

` tags, while paragraphs in a tight list are not.) + +Changing the bullet or ordered list delimiter starts a new list: + +```````````````````````````````` example +- foo +- bar ++ baz +. +

    +
  • foo
  • +
  • bar
  • +
+
    +
  • baz
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. foo +2. bar +3) baz +. +
    +
  1. foo
  2. +
  3. bar
  4. +
+
    +
  1. baz
  2. +
+```````````````````````````````` + + +In CommonMark, a list can interrupt a paragraph. That is, +no blank line is needed to separate a paragraph from a following +list: + +```````````````````````````````` example +Foo +- bar +- baz +. +

Foo

+
    +
  • bar
  • +
  • baz
  • +
+```````````````````````````````` + +`Markdown.pl` does not allow this, through fear of triggering a list +via a numeral in a hard-wrapped line: + +``` markdown +The number of windows in my house is +14. The number of doors is 6. +``` + +Oddly, though, `Markdown.pl` *does* allow a blockquote to +interrupt a paragraph, even though the same considerations might +apply. + +In CommonMark, we do allow lists to interrupt paragraphs, for +two reasons. First, it is natural and not uncommon for people +to start lists without blank lines: + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +Second, we are attracted to a + +> [principle of uniformity](@): +> if a chunk of text has a certain +> meaning, it will continue to have the same meaning when put into a +> container block (such as a list item or blockquote). + +(Indeed, the spec for [list items] and [block quotes] presupposes +this principle.) This principle implies that if + +``` markdown + * I need to buy + - new shoes + - a coat + - a plane ticket +``` + +is a list item containing a paragraph followed by a nested sublist, +as all Markdown implementations agree it is (though the paragraph +may be rendered without `

` tags, since the list is "tight"), +then + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +by itself should be a paragraph followed by a nested sublist. + +Since it is well established Markdown practice to allow lists to +interrupt paragraphs inside list items, the [principle of +uniformity] requires us to allow this outside list items as +well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +takes a different approach, requiring blank lines before lists +even inside other list items.) + +In order to solve of unwanted lists in paragraphs with +hard-wrapped numerals, we allow only lists starting with `1` to +interrupt paragraphs. Thus, + +```````````````````````````````` example +The number of windows in my house is +14. The number of doors is 6. +. +

The number of windows in my house is +14. The number of doors is 6.

+```````````````````````````````` + +We may still get an unintended result in cases like + +```````````````````````````````` example +The number of windows in my house is +1. The number of doors is 6. +. +

The number of windows in my house is

+
    +
  1. The number of doors is 6.
  2. +
+```````````````````````````````` + +but this rule should prevent most spurious list captures. + +There can be any number of blank lines between items: + +```````````````````````````````` example +- foo + +- bar + + +- baz +. +
    +
  • +

    foo

    +
  • +
  • +

    bar

    +
  • +
  • +

    baz

    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +- foo + - bar + - baz + + + bim +. +
    +
  • foo +
      +
    • bar +
        +
      • +

        baz

        +

        bim

        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +To separate consecutive lists of the same type, or to separate a +list from an indented code block that would otherwise be parsed +as a subparagraph of the final list item, you can insert a blank HTML +comment: + +```````````````````````````````` example +- foo +- bar + + + +- baz +- bim +. +
    +
  • foo
  • +
  • bar
  • +
+ +
    +
  • baz
  • +
  • bim
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- foo + + notcode + +- foo + + + + code +. +
    +
  • +

    foo

    +

    notcode

    +
  • +
  • +

    foo

    +
  • +
+ +
code
+
+```````````````````````````````` + + +List items need not be indented to the same level. The following +list items will be treated as items at the same list level, +since none is indented enough to belong to the previous list +item: + +```````````````````````````````` example +- a + - b + - c + - d + - e + - f + - g + - h +- i +. +
    +
  • a
  • +
  • b
  • +
  • c
  • +
  • d
  • +
  • e
  • +
  • f
  • +
  • g
  • +
  • h
  • +
  • i
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
    +
  1. +

    a

    +
  2. +
  3. +

    b

    +
  4. +
  5. +

    c

    +
  6. +
+```````````````````````````````` + + +This is a loose list, because there is a blank line between +two of the list items: + +```````````````````````````````` example +- a +- b + +- c +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +So is this, with a empty second item: + +```````````````````````````````` example +* a +* + +* c +. +
    +
  • +

    a

    +
  • +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +These are loose lists, even though there is no space between the items, +because one of the items directly contains two block-level elements +with a blank line between them: + +```````````````````````````````` example +- a +- b + + c +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +

    c

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a +- b + + [ref]: /url +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +This is a tight list, because the blank lines are in a code block: + +```````````````````````````````` example +- a +- ``` + b + + + ``` +- c +. +
    +
  • a
  • +
  • +
    b
    +
    +
    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is between two +paragraphs of a sublist. So the sublist is loose while +the outer list is tight: + +```````````````````````````````` example +- a + - b + + c +- d +. +
    +
  • a +
      +
    • +

      b

      +

      c

      +
    • +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is inside the +block quote: + +```````````````````````````````` example +* a + > b + > +* c +. +
    +
  • a +
    +

    b

    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This list is tight, because the consecutive block elements +are not separated by blank lines: + +```````````````````````````````` example +- a + > b + ``` + c + ``` +- d +. +
    +
  • a +
    +

    b

    +
    +
    c
    +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +A single-paragraph list is tight: + +```````````````````````````````` example +- a +. +
    +
  • a
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b +. +
    +
  • a +
      +
    • b
    • +
    +
  • +
+```````````````````````````````` + + +This list is loose, because of the blank line between the +two block elements in the list item: + +```````````````````````````````` example +1. ``` + foo + ``` + + bar +. +
    +
  1. +
    foo
    +
    +

    bar

    +
  2. +
+```````````````````````````````` + + +Here the outer list is loose, the inner list tight: + +```````````````````````````````` example +* foo + * bar + + baz +. +
    +
  • +

    foo

    +
      +
    • bar
    • +
    +

    baz

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b + - c + +- d + - e + - f +. +
    +
  • +

    a

    +
      +
    • b
    • +
    • c
    • +
    +
  • +
  • +

    d

    +
      +
    • e
    • +
    • f
    • +
    +
  • +
+```````````````````````````````` + + +# Inlines + +Inlines are parsed sequentially from the beginning of the character +stream to the end (left to right, in left-to-right languages). +Thus, for example, in + +```````````````````````````````` example +`hi`lo` +. +

hilo`

+```````````````````````````````` + + +`hi` is parsed as code, leaving the backtick at the end as a literal +backtick. + +## Backslash escapes + +Any ASCII punctuation character may be backslash-escaped: + +```````````````````````````````` example +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ +. +

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

+```````````````````````````````` + + +Backslashes before other characters are treated as literal +backslashes: + +```````````````````````````````` example +\→\A\a\ \3\φ\« +. +

\→\A\a\ \3\φ\«

+```````````````````````````````` + + +Escaped characters are treated as regular characters and do +not have their usual Markdown meanings: + +```````````````````````````````` example +\*not emphasized* +\
not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a heading +\[foo]: /url "not a reference" +. +

*not emphasized* +<br/> not a tag +[not a link](/foo) +`not code` +1. not a list +* not a list +# not a heading +[foo]: /url "not a reference"

+```````````````````````````````` + + +If a backslash is itself escaped, the following character is not: + +```````````````````````````````` example +\\*emphasis* +. +

\emphasis

+```````````````````````````````` + + +A backslash at the end of the line is a [hard line break]: + +```````````````````````````````` example +foo\ +bar +. +

foo
+bar

+```````````````````````````````` + + +Backslash escapes do not work in code blocks, code spans, autolinks, or +raw HTML: + +```````````````````````````````` example +`` \[\` `` +. +

\[\`

+```````````````````````````````` + + +```````````````````````````````` example + \[\] +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +\[\] +~~~ +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://example.com?find=\*

+```````````````````````````````` + + +```````````````````````````````` example + +. + +```````````````````````````````` + + +But they work in all other contexts, including URLs and link titles, +link references, and [info strings] in [fenced code blocks]: + +```````````````````````````````` example +[foo](/bar\* "ti\*tle") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /bar\* "ti\*tle" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` foo\+bar +foo +``` +. +
foo
+
+```````````````````````````````` + + + +## Entity and numeric character references + +All valid HTML entity references and numeric character +references, except those occuring in code blocks and code spans, +are recognized as such and treated as equivalent to the +corresponding Unicode characters. Conforming CommonMark parsers +need not store information about whether a particular character +was represented in the source using a Unicode character or +an entity reference. + +[Entity references](@) consist of `&` + any of the valid +HTML5 entity names + `;`. The +document +is used as an authoritative source for the valid entity +references and their corresponding code points. + +```````````````````````````````` example +  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸ +. +

& © Æ Ď +¾ ℋ ⅆ +∲ ≧̸

+```````````````````````````````` + + +[Decimal numeric character +references](@) +consist of `&#` + a string of 1--8 arabic digits + `;`. A +numeric character reference is parsed as the corresponding +Unicode character. Invalid Unicode code points will be replaced by +the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, +the code point `U+0000` will also be replaced by `U+FFFD`. + +```````````````````````````````` example +# Ӓ Ϡ � � +. +

# Ӓ Ϡ � �

+```````````````````````````````` + + +[Hexadecimal numeric character +references](@) consist of `&#` + +either `X` or `x` + a string of 1-8 hexadecimal digits + `;`. +They too are parsed as the corresponding Unicode character (this +time specified with a hexadecimal numeral instead of decimal). + +```````````````````````````````` example +" ആ ಫ +. +

" ആ ಫ

+```````````````````````````````` + + +Here are some nonentities: + +```````````````````````````````` example +  &x; &#; &#x; +&ThisIsNotDefined; &hi?; +. +

&nbsp &x; &#; &#x; +&ThisIsNotDefined; &hi?;

+```````````````````````````````` + + +Although HTML5 does accept some entity references +without a trailing semicolon (such as `©`), these are not +recognized here, because it makes the grammar too ambiguous: + +```````````````````````````````` example +© +. +

&copy

+```````````````````````````````` + + +Strings that are not on the list of HTML5 named entities are not +recognized as entity references either: + +```````````````````````````````` example +&MadeUpEntity; +. +

&MadeUpEntity;

+```````````````````````````````` + + +Entity and numeric character references are recognized in any +context besides code spans or code blocks, including +URLs, [link titles], and [fenced code block][] [info strings]: + +```````````````````````````````` example + +. + +```````````````````````````````` + + +```````````````````````````````` example +[foo](/föö "föö") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /föö "föö" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` föö +foo +``` +. +
foo
+
+```````````````````````````````` + + +Entity and numeric character references are treated as literal +text in code spans and code blocks: + +```````````````````````````````` example +`föö` +. +

f&ouml;&ouml;

+```````````````````````````````` + + +```````````````````````````````` example + föfö +. +
f&ouml;f&ouml;
+
+```````````````````````````````` + + +## Code spans + +A [backtick string](@) +is a string of one or more backtick characters (`` ` ``) that is neither +preceded nor followed by a backtick. + +A [code span](@) begins with a backtick string and ends with +a backtick string of equal length. The contents of the code span are +the characters between the two backtick strings, with leading and +trailing spaces and [line endings] removed, and +[whitespace] collapsed to single spaces. + +This is a simple code span: + +```````````````````````````````` example +`foo` +. +

foo

+```````````````````````````````` + + +Here two backticks are used, because the code contains a backtick. +This example also illustrates stripping of leading and trailing spaces: + +```````````````````````````````` example +`` foo ` bar `` +. +

foo ` bar

+```````````````````````````````` + + +This example shows the motivation for stripping leading and trailing +spaces: + +```````````````````````````````` example +` `` ` +. +

``

+```````````````````````````````` + + +[Line endings] are treated like spaces: + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` + + +Interior spaces and [line endings] are collapsed into +single spaces, just as they would be by a browser: + +```````````````````````````````` example +`foo bar + baz` +. +

foo bar baz

+```````````````````````````````` + + +Not all [Unicode whitespace] (for instance, non-breaking space) is +collapsed, however: + +```````````````````````````````` example +`a b` +. +

a b

+```````````````````````````````` + + +Q: Why not just leave the spaces, since browsers will collapse them +anyway? A: Because we might be targeting a non-HTML format, and we +shouldn't rely on HTML-specific rendering assumptions. + +(Existing implementations differ in their treatment of internal +spaces and [line endings]. Some, including `Markdown.pl` and +`showdown`, convert an internal [line ending] into a +`
` tag. But this makes things difficult for those who like to +hard-wrap their paragraphs, since a line break in the midst of a code +span will cause an unintended line break in the output. Others just +leave internal spaces as they are, which is fine if only HTML is being +targeted.) + +```````````````````````````````` example +`foo `` bar` +. +

foo `` bar

+```````````````````````````````` + + +Note that backslash escapes do not work in code spans. All backslashes +are treated literally: + +```````````````````````````````` example +`foo\`bar` +. +

foo\bar`

+```````````````````````````````` + + +Backslash escapes are never needed, because one can always choose a +string of *n* backtick characters as delimiters, where the code does +not contain any strings of exactly *n* backtick characters. + +Code span backticks have higher precedence than any other inline +constructs except HTML tags and autolinks. Thus, for example, this is +not parsed as emphasized text, since the second `*` is part of a code +span: + +```````````````````````````````` example +*foo`*` +. +

*foo*

+```````````````````````````````` + + +And this is not parsed as a link: + +```````````````````````````````` example +[not a `link](/foo`) +. +

[not a link](/foo)

+```````````````````````````````` + + +Code spans, HTML tags, and autolinks have the same precedence. +Thus, this is code: + +```````````````````````````````` example +`` +. +

<a href="">`

+```````````````````````````````` + + +But this is an HTML tag: + +```````````````````````````````` example +
` +. +

`

+```````````````````````````````` + + +And this is code: + +```````````````````````````````` example +`` +. +

<http://foo.bar.baz>`

+```````````````````````````````` + + +But this is an autolink: + +```````````````````````````````` example +` +. +

http://foo.bar.`baz`

+```````````````````````````````` + + +When a backtick string is not closed by a matching backtick string, +we just have literal backticks: + +```````````````````````````````` example +```foo`` +. +

```foo``

+```````````````````````````````` + + +```````````````````````````````` example +`foo +. +

`foo

+```````````````````````````````` + +The following case also illustrates the need for opening and +closing backtick strings to be equal in length: + +```````````````````````````````` example +`foo``bar`` +. +

`foobar

+```````````````````````````````` + + +## Emphasis and strong emphasis + +John Gruber's original [Markdown syntax +description](http://daringfireball.net/projects/markdown/syntax#em) says: + +> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML +> `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` +> tag. + +This is enough for most users, but these rules leave much undecided, +especially when it comes to nested emphasis. The original +`Markdown.pl` test suite makes it clear that triple `***` and +`___` delimiters can be used for strong emphasis, and most +implementations have also allowed the following patterns: + +``` markdown +***strong emph*** +***strong** in emph* +***emph* in strong** +**in strong *emph*** +*in emph **strong*** +``` + +The following patterns are less widely supported, but the intent +is clear and they are useful (especially in contexts like bibliography +entries): + +``` markdown +*emph *with emph* in it* +**strong **with strong** in it** +``` + +Many implementations have also restricted intraword emphasis to +the `*` forms, to avoid unwanted emphasis in words containing +internal underscores. (It is best practice to put these in code +spans, but users often do not.) + +``` markdown +internal emphasis: foo*bar*baz +no emphasis: foo_bar_baz +``` + +The rules given below capture all of these patterns, while allowing +for efficient parsing strategies that do not backtrack. + +First, some definitions. A [delimiter run](@) is either +a sequence of one or more `*` characters that is not preceded or +followed by a non-backslash-escaped `*` character, or a sequence +of one or more `_` characters that is not preceded or followed by +a non-backslash-escaped `_` character. + +A [left-flanking delimiter run](@) is +a [delimiter run] that is (a) not followed by [Unicode whitespace], +and (b) not followed by a [punctuation character], or +preceded by [Unicode whitespace] or a [punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +A [right-flanking delimiter run](@) is +a [delimiter run] that is (a) not preceded by [Unicode whitespace], +and (b) not preceded by a [punctuation character], or +followed by [Unicode whitespace] or a [punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +Here are some examples of delimiter runs. + + - left-flanking but not right-flanking: + + ``` + ***abc + _abc + **"abc" + _"abc" + ``` + + - right-flanking but not left-flanking: + + ``` + abc*** + abc_ + "abc"** + "abc"_ + ``` + + - Both left and right-flanking: + + ``` + abc***def + "abc"_"def" + ``` + + - Neither left nor right-flanking: + + ``` + abc *** def + a _ b + ``` + +(The idea of distinguishing left-flanking and right-flanking +delimiter runs based on the character before and the character +after comes from Roopesh Chander's +[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +vfmd uses the terminology "emphasis indicator string" instead of "delimiter +run," and its rules for distinguishing left- and right-flanking runs +are a bit more complex than the ones given here.) + +The following rules define emphasis and strong emphasis: + +1. A single `*` character [can open emphasis](@) + iff (if and only if) it is part of a [left-flanking delimiter run]. + +2. A single `_` character [can open emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by punctuation. + +3. A single `*` character [can close emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +4. A single `_` character [can close emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by punctuation. + +5. A double `**` [can open strong emphasis](@) + iff it is part of a [left-flanking delimiter run]. + +6. A double `__` [can open strong emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by punctuation. + +7. A double `**` [can close strong emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +8. A double `__` [can close strong emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by punctuation. + +9. Emphasis begins with a delimiter that [can open emphasis] and ends + with a delimiter that [can close emphasis], and that uses the same + character (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both + open and close emphasis, then the sum of the lengths of the + delimiter runs containing the opening and closing delimiters + must not be a multiple of 3. + +10. Strong emphasis begins with a delimiter that + [can open strong emphasis] and ends with a delimiter that + [can close strong emphasis], and that uses the same character + (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both open + and close strong emphasis, then the sum of the lengths of + the delimiter runs containing the opening and closing + delimiters must not be a multiple of 3. + +11. A literal `*` character cannot occur at the beginning or end of + `*`-delimited emphasis or `**`-delimited strong emphasis, unless it + is backslash-escaped. + +12. A literal `_` character cannot occur at the beginning or end of + `_`-delimited emphasis or `__`-delimited strong emphasis, unless it + is backslash-escaped. + +Where rules 1--12 above are compatible with multiple parsings, +the following principles resolve ambiguity: + +13. The number of nestings should be minimized. Thus, for example, + an interpretation `...` is always preferred to + `...`. + +14. An interpretation `...` is always + preferred to `...`. + +15. When two potential emphasis or strong emphasis spans overlap, + so that the second begins before the first ends and ends after + the first ends, the first takes precedence. Thus, for example, + `*foo _bar* baz_` is parsed as `foo _bar baz_` rather + than `*foo bar* baz`. + +16. When there are two potential emphasis or strong emphasis spans + with the same closing delimiter, the shorter one (the one that + opens later) takes precedence. Thus, for example, + `**foo **bar baz**` is parsed as `**foo bar baz` + rather than `foo **bar baz`. + +17. Inline code spans, links, images, and HTML tags group more tightly + than emphasis. So, when there is a choice between an interpretation + that contains one of these elements and one that does not, the + former always wins. Thus, for example, `*[foo*](bar)` is + parsed as `*foo*` rather than as + `[foo](bar)`. + +These rules can be illustrated through a series of examples. + +Rule 1: + +```````````````````````````````` example +*foo bar* +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is followed by +whitespace, and hence not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a * foo bar* +. +

a * foo bar*

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a*"foo"* +. +

a*"foo"*

+```````````````````````````````` + + +Unicode nonbreaking spaces count as whitespace, too: + +```````````````````````````````` example +* a * +. +

* a *

+```````````````````````````````` + + +Intraword emphasis with `*` is permitted: + +```````````````````````````````` example +foo*bar* +. +

foobar

+```````````````````````````````` + + +```````````````````````````````` example +5*6*78 +. +

5678

+```````````````````````````````` + + +Rule 2: + +```````````````````````````````` example +_foo bar_ +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is followed by +whitespace: + +```````````````````````````````` example +_ foo bar_ +. +

_ foo bar_

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a_"foo"_ +. +

a_"foo"_

+```````````````````````````````` + + +Emphasis with `_` is not allowed inside words: + +```````````````````````````````` example +foo_bar_ +. +

foo_bar_

+```````````````````````````````` + + +```````````````````````````````` example +5_6_78 +. +

5_6_78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням_стремятся_ +. +

пристаням_стремятся_

+```````````````````````````````` + + +Here `_` does not generate emphasis, because the first delimiter run +is right-flanking and the second left-flanking: + +```````````````````````````````` example +aa_"bb"_cc +. +

aa_"bb"_cc

+```````````````````````````````` + + +This is emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-_(bar)_ +. +

foo-(bar)

+```````````````````````````````` + + +Rule 3: + +This is not emphasis, because the closing delimiter does +not match the opening delimiter: + +```````````````````````````````` example +_foo* +. +

_foo*

+```````````````````````````````` + + +This is not emphasis, because the closing `*` is preceded by +whitespace: + +```````````````````````````````` example +*foo bar * +. +

*foo bar *

+```````````````````````````````` + + +A newline also counts as whitespace: + +```````````````````````````````` example +*foo bar +* +. +

*foo bar +*

+```````````````````````````````` + + +This is not emphasis, because the second `*` is +preceded by punctuation and followed by an alphanumeric +(hence it is not part of a [right-flanking delimiter run]: + +```````````````````````````````` example +*(*foo) +. +

*(*foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +*(*foo*)* +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis with `*` is allowed: + +```````````````````````````````` example +*foo*bar +. +

foobar

+```````````````````````````````` + + + +Rule 4: + +This is not emphasis, because the closing `_` is preceded by +whitespace: + +```````````````````````````````` example +_foo bar _ +. +

_foo bar _

+```````````````````````````````` + + +This is not emphasis, because the second `_` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +_(_foo) +. +

_(_foo)

+```````````````````````````````` + + +This is emphasis within emphasis: + +```````````````````````````````` example +_(_foo_)_ +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis is disallowed for `_`: + +```````````````````````````````` example +_foo_bar +. +

_foo_bar

+```````````````````````````````` + + +```````````````````````````````` example +_пристаням_стремятся +. +

_пристаням_стремятся

+```````````````````````````````` + + +```````````````````````````````` example +_foo_bar_baz_ +. +

foo_bar_baz

+```````````````````````````````` + + +This is emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +_(bar)_. +. +

(bar).

+```````````````````````````````` + + +Rule 5: + +```````````````````````````````` example +**foo bar** +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +** foo bar** +. +

** foo bar**

+```````````````````````````````` + + +This is not strong emphasis, because the opening `**` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a**"foo"** +. +

a**"foo"**

+```````````````````````````````` + + +Intraword strong emphasis with `**` is permitted: + +```````````````````````````````` example +foo**bar** +. +

foobar

+```````````````````````````````` + + +Rule 6: + +```````````````````````````````` example +__foo bar__ +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +__ foo bar__ +. +

__ foo bar__

+```````````````````````````````` + + +A newline counts as whitespace: +```````````````````````````````` example +__ +foo bar__ +. +

__ +foo bar__

+```````````````````````````````` + + +This is not strong emphasis, because the opening `__` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a__"foo"__ +. +

a__"foo"__

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +foo__bar__ +. +

foo__bar__

+```````````````````````````````` + + +```````````````````````````````` example +5__6__78 +. +

5__6__78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням__стремятся__ +. +

пристаням__стремятся__

+```````````````````````````````` + + +```````````````````````````````` example +__foo, __bar__, baz__ +. +

foo, bar, baz

+```````````````````````````````` + + +This is strong emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-__(bar)__ +. +

foo-(bar)

+```````````````````````````````` + + + +Rule 7: + +This is not strong emphasis, because the closing delimiter is preceded +by whitespace: + +```````````````````````````````` example +**foo bar ** +. +

**foo bar **

+```````````````````````````````` + + +(Nor can it be interpreted as an emphasized `*foo bar *`, because of +Rule 11.) + +This is not strong emphasis, because the second `**` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +**(**foo) +. +

**(**foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with these examples: + +```````````````````````````````` example +*(**foo**)* +. +

(foo)

+```````````````````````````````` + + +```````````````````````````````` example +**Gomphocarpus (*Gomphocarpus physocarpus*, syn. +*Asclepias physocarpa*)** +. +

Gomphocarpus (Gomphocarpus physocarpus, syn. +Asclepias physocarpa)

+```````````````````````````````` + + +```````````````````````````````` example +**foo "*bar*" foo** +. +

foo "bar" foo

+```````````````````````````````` + + +Intraword emphasis: + +```````````````````````````````` example +**foo**bar +. +

foobar

+```````````````````````````````` + + +Rule 8: + +This is not strong emphasis, because the closing delimiter is +preceded by whitespace: + +```````````````````````````````` example +__foo bar __ +. +

__foo bar __

+```````````````````````````````` + + +This is not strong emphasis, because the second `__` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +__(__foo) +. +

__(__foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +_(__foo__)_ +. +

(foo)

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +__foo__bar +. +

__foo__bar

+```````````````````````````````` + + +```````````````````````````````` example +__пристаням__стремятся +. +

__пристаням__стремятся

+```````````````````````````````` + + +```````````````````````````````` example +__foo__bar__baz__ +. +

foo__bar__baz

+```````````````````````````````` + + +This is strong emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +__(bar)__. +. +

(bar).

+```````````````````````````````` + + +Rule 9: + +Any nonempty sequence of inline elements can be the contents of an +emphasized span. + +```````````````````````````````` example +*foo [bar](/url)* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo +bar* +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside emphasis: + +```````````````````````````````` example +_foo __bar__ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +_foo _bar_ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ bar_ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar** baz* +. +

foo bar baz

+```````````````````````````````` + +```````````````````````````````` example +*foo**bar**baz* +. +

foobarbaz

+```````````````````````````````` + +Note that in the preceding case, the interpretation + +``` markdown +

foobarbaz

+``` + + +is precluded by the condition that a delimiter that +can both open and close (like the `*` after `foo`) +cannot form emphasis if the sum of the lengths of +the delimiter runs containing the opening and +closing delimiters is a multiple of 3. + +The same condition ensures that the following +cases are all strong emphasis nested inside +emphasis, even when the interior spaces are +omitted: + + +```````````````````````````````` example +***foo** bar* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar*** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo**bar*** +. +

foobar

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +*foo **bar *baz* bim** bop* +. +

foo bar baz bim bop

+```````````````````````````````` + + +```````````````````````````````` example +*foo [*bar*](/url)* +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +** is not an empty emphasis +. +

** is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +**** is not an empty strong emphasis +. +

**** is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 10: + +Any nonempty sequence of inline elements can be the contents of an +strongly emphasized span. + +```````````````````````````````` example +**foo [bar](/url)** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo +bar** +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside strong emphasis: + +```````````````````````````````` example +__foo _bar_ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo __bar__ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +____foo__ bar__ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo **bar**** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar* baz** +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +**foo*bar*baz** +. +

foobarbaz

+```````````````````````````````` + + +```````````````````````````````` example +***foo* bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar*** +. +

foo bar

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +**foo *bar **baz** +bim* bop** +. +

foo bar baz +bim bop

+```````````````````````````````` + + +```````````````````````````````` example +**foo [*bar*](/url)** +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +__ is not an empty emphasis +. +

__ is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +____ is not an empty strong emphasis +. +

____ is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 11: + +```````````````````````````````` example +foo *** +. +

foo ***

+```````````````````````````````` + + +```````````````````````````````` example +foo *\** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo *_* +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo ***** +. +

foo *****

+```````````````````````````````` + + +```````````````````````````````` example +foo **\*** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo **_** +. +

foo _

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 11 determines +that the excess literal `*` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +**foo* +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +*foo** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +***foo** +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +****foo* +. +

***foo

+```````````````````````````````` + + +```````````````````````````````` example +**foo*** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +*foo**** +. +

foo***

+```````````````````````````````` + + + +Rule 12: + +```````````````````````````````` example +foo ___ +. +

foo ___

+```````````````````````````````` + + +```````````````````````````````` example +foo _\__ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo _*_ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo _____ +. +

foo _____

+```````````````````````````````` + + +```````````````````````````````` example +foo __\___ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo __*__ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ +. +

_foo

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 12 determines +that the excess literal `_` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +_foo__ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +___foo__ +. +

_foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo_ +. +

___foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo___ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +_foo____ +. +

foo___

+```````````````````````````````` + + +Rule 13 implies that if you want emphasis nested directly inside +emphasis, you must use different delimiters: + +```````````````````````````````` example +**foo** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*_foo_* +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo__ +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_*foo*_ +. +

foo

+```````````````````````````````` + + +However, strong emphasis within strong emphasis is possible without +switching delimiters: + +```````````````````````````````` example +****foo**** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo____ +. +

foo

+```````````````````````````````` + + + +Rule 13 can be applied to arbitrarily long sequences of +delimiters: + +```````````````````````````````` example +******foo****** +. +

foo

+```````````````````````````````` + + +Rule 14: + +```````````````````````````````` example +***foo*** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_____foo_____ +. +

foo

+```````````````````````````````` + + +Rule 15: + +```````````````````````````````` example +*foo _bar* baz_ +. +

foo _bar baz_

+```````````````````````````````` + + +```````````````````````````````` example +*foo __bar *baz bim__ bam* +. +

foo bar *baz bim bam

+```````````````````````````````` + + +Rule 16: + +```````````````````````````````` example +**foo **bar baz** +. +

**foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar baz* +. +

*foo bar baz

+```````````````````````````````` + + +Rule 17: + +```````````````````````````````` example +*[bar*](/url) +. +

*bar*

+```````````````````````````````` + + +```````````````````````````````` example +_foo [bar_](/url) +. +

_foo bar_

+```````````````````````````````` + + +```````````````````````````````` example +* +. +

*

+```````````````````````````````` + + +```````````````````````````````` example +** +. +

**

+```````````````````````````````` + + +```````````````````````````````` example +__ +. +

__

+```````````````````````````````` + + +```````````````````````````````` example +*a `*`* +. +

a *

+```````````````````````````````` + + +```````````````````````````````` example +_a `_`_ +. +

a _

+```````````````````````````````` + + +```````````````````````````````` example +**a +. +

**ahttp://foo.bar/?q=**

+```````````````````````````````` + + +```````````````````````````````` example +__a +. +

__ahttp://foo.bar/?q=__

+```````````````````````````````` + + + +## Links + +A link contains [link text] (the visible text), a [link destination] +(the URI that is the link destination), and optionally a [link title]. +There are two basic kinds of links in Markdown. In [inline links] the +destination and title are given immediately after the link text. In +[reference links] the destination and title are defined elsewhere in +the document. + +A [link text](@) consists of a sequence of zero or more +inline elements enclosed by square brackets (`[` and `]`). The +following rules apply: + +- Links may not contain other links, at any level of nesting. If + multiple otherwise valid link definitions appear nested inside each + other, the inner-most definition is used. + +- Brackets are allowed in the [link text] only if (a) they + are backslash-escaped or (b) they appear as a matched pair of brackets, + with an open bracket `[`, a sequence of zero or more inlines, and + a close bracket `]`. + +- Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly + than the brackets in link text. Thus, for example, + `` [foo`]` `` could not be a link text, since the second `]` + is part of a code span. + +- The brackets in link text bind more tightly than markers for + [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link. + +A [link destination](@) consists of either + +- a sequence of zero or more characters between an opening `<` and a + closing `>` that contains no spaces, line breaks, or unescaped + `<` or `>` characters, or + +- a nonempty sequence of characters that does not include + ASCII space or control characters, and includes parentheses + only if (a) they are backslash-escaped or (b) they are part of + a balanced pair of unescaped parentheses. (Implementations + may impose limits on parentheses nesting to avoid performance + issues, but at least three levels of nesting should be supported.) + +A [link title](@) consists of either + +- a sequence of zero or more characters between straight double-quote + characters (`"`), including a `"` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between straight single-quote + characters (`'`), including a `'` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between matching parentheses + (`(...)`), including a `)` character only if it is backslash-escaped. + +Although [link titles] may span multiple lines, they may not contain +a [blank line]. + +An [inline link](@) consists of a [link text] followed immediately +by a left parenthesis `(`, optional [whitespace], an optional +[link destination], an optional [link title] separated from the link +destination by [whitespace], optional [whitespace], and a right +parenthesis `)`. The link's text consists of the inlines contained +in the [link text] (excluding the enclosing square brackets). +The link's URI consists of the link destination, excluding enclosing +`<...>` if present, with backslash-escapes in effect as described +above. The link's title consists of the link title, excluding its +enclosing delimiters, with backslash-escapes in effect as described +above. + +Here is a simple inline link: + +```````````````````````````````` example +[link](/uri "title") +. +

link

+```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[link](/uri) +. +

link

+```````````````````````````````` + + +Both the title and the destination may be omitted: + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link](<>) +. +

link

+```````````````````````````````` + + +The destination cannot contain spaces or line breaks, +even if enclosed in pointy brackets: + +```````````````````````````````` example +[link](/my uri) +. +

[link](/my uri)

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

[link](</my uri>)

+```````````````````````````````` + + +```````````````````````````````` example +[link](foo +bar) +. +

[link](foo +bar)

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

[link]()

+```````````````````````````````` + +Parentheses inside the link destination may be escaped: + +```````````````````````````````` example +[link](\(foo\)) +. +

link

+```````````````````````````````` + +Any number of parentheses are allowed without escaping, as long as they are +balanced: + +```````````````````````````````` example +[link](foo(and(bar))) +. +

link

+```````````````````````````````` + +However, if you have unbalanced parentheses, you need to escape or use the +`<...>` form: + +```````````````````````````````` example +[link](foo\(and\(bar\)) +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +Parentheses and other symbols can also be escaped, as usual +in Markdown: + +```````````````````````````````` example +[link](foo\)\:) +. +

link

+```````````````````````````````` + + +A link can contain fragment identifiers and queries: + +```````````````````````````````` example +[link](#fragment) + +[link](http://example.com#fragment) + +[link](http://example.com?foo=3#frag) +. +

link

+

link

+

link

+```````````````````````````````` + + +Note that a backslash before a non-escapable character is +just a backslash: + +```````````````````````````````` example +[link](foo\bar) +. +

link

+```````````````````````````````` + + +URL-escaping should be left alone inside the destination, as all +URL-escaped characters are also valid URL characters. Entity and +numerical character references in the destination will be parsed +into the corresponding Unicode code points, as usual. These may +be optionally URL-escaped when written as HTML, but this spec +does not enforce any particular policy for rendering URLs in +HTML or other formats. Renderers may make different decisions +about how to escape or normalize URLs in the output. + +```````````````````````````````` example +[link](foo%20bä) +. +

link

+```````````````````````````````` + + +Note that, because titles can often be parsed as destinations, +if you try to omit the destination and keep the title, you'll +get unexpected results: + +```````````````````````````````` example +[link]("title") +. +

link

+```````````````````````````````` + + +Titles may be in single quotes, double quotes, or parentheses: + +```````````````````````````````` example +[link](/url "title") +[link](/url 'title') +[link](/url (title)) +. +

link +link +link

+```````````````````````````````` + + +Backslash escapes and entity and numeric character references +may be used in titles: + +```````````````````````````````` example +[link](/url "title \""") +. +

link

+```````````````````````````````` + + +Titles must be separated from the link using a [whitespace]. +Other [Unicode whitespace] like non-breaking space doesn't work. + +```````````````````````````````` example +[link](/url "title") +. +

link

+```````````````````````````````` + + +Nested balanced quotes are not allowed without escaping: + +```````````````````````````````` example +[link](/url "title "and" title") +. +

[link](/url "title "and" title")

+```````````````````````````````` + + +But it is easy to work around this by using a different quote type: + +```````````````````````````````` example +[link](/url 'title "and" title') +. +

link

+```````````````````````````````` + + +(Note: `Markdown.pl` did allow double quotes inside a double-quoted +title, and its test suite included a test demonstrating this. +But it is hard to see a good rationale for the extra complexity this +brings, since there are already many ways---backslash escaping, +entity and numeric character references, or using a different +quote type for the enclosing title---to write titles containing +double quotes. `Markdown.pl`'s handling of titles has a number +of other strange features. For example, it allows single-quoted +titles in inline links, but not reference links. And, in +reference links but not inline links, it allows a title to begin +with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows +titles with no closing quotation mark, though 1.0.2b8 does not. +It seems preferable to adopt a simple, rational rule that works +the same way in inline links and link reference definitions.) + +[Whitespace] is allowed around the destination and title: + +```````````````````````````````` example +[link]( /uri + "title" ) +. +

link

+```````````````````````````````` + + +But it is not allowed between the link text and the +following parenthesis: + +```````````````````````````````` example +[link] (/uri) +. +

[link] (/uri)

+```````````````````````````````` + + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]](/uri) +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link] bar](/uri) +. +

[link] bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[link [bar](/uri) +. +

[link bar

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar](/uri) +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*](/uri) +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)](/uri) +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)](/uri) +. +

[foo bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo *[bar [baz](/uri)](/uri)*](/uri) +. +

[foo [bar baz](/uri)](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +![[[foo](uri1)](uri2)](uri3) +. +

[foo](uri2)

+```````````````````````````````` + + +These cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*](/uri) +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar](baz*) +. +

foo *bar

+```````````````````````````````` + + +Note that brackets that *aren't* part of links do not take +precedence: + +```````````````````````````````` example +*foo [bar* baz] +. +

foo [bar baz]

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`](/uri)` +. +

[foo](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo +. +

[foohttp://example.com/?search=](uri)

+```````````````````````````````` + + +There are three kinds of [reference link](@)s: +[full](#full-reference-link), [collapsed](#collapsed-reference-link), +and [shortcut](#shortcut-reference-link). + +A [full reference link](@) +consists of a [link text] immediately followed by a [link label] +that [matches] a [link reference definition] elsewhere in the document. + +A [link label](@) begins with a left bracket (`[`) and ends +with the first right bracket (`]`) that is not backslash-escaped. +Between these brackets there must be at least one [non-whitespace character]. +Unescaped square bracket characters are not allowed inside the +opening and closing square brackets of [link labels]. A link +label can have at most 999 characters inside the square +brackets. + +One label [matches](@) +another just in case their normalized forms are equal. To normalize a +label, strip off the opening and closing brackets, +perform the *Unicode case fold*, strip leading and trailing +[whitespace] and collapse consecutive internal +[whitespace] to a single space. If there are multiple +matching reference link definitions, the one that comes first in the +document is used. (It is desirable in such cases to emit a warning.) + +The contents of the first link label are parsed as inlines, which are +used as the link's text. The link's URI and title are provided by the +matching [link reference definition]. + +Here is a simple example: + +```````````````````````````````` example +[foo][bar] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +The rules for the [link text] are the same as with +[inline links]. Thus: + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]][ref] + +[ref]: /uri +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar][ref] + +[ref]: /uri +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*][ref] + +[ref]: /uri +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)][ref] + +[ref]: /uri +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)][ref] + +[ref]: /uri +. +

[foo bar]ref

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar [baz][ref]*][ref] + +[ref]: /uri +. +

[foo bar baz]ref

+```````````````````````````````` + + +(In the examples above, we have two [shortcut reference links] +instead of one [full reference link].) + +The following cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*][ref] + +[ref]: /uri +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar][ref] + +[ref]: /uri +. +

foo *bar

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`][ref]` + +[ref]: /uri +. +

[foo][ref]

+```````````````````````````````` + + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foohttp://example.com/?search=][ref]

+```````````````````````````````` + + +Matching is case-insensitive: + +```````````````````````````````` example +[foo][BaR] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +Unicode case fold is used: + +```````````````````````````````` example +[Толпой][Толпой] is a Russian word. + +[ТОЛПОЙ]: /url +. +

Толпой is a Russian word.

+```````````````````````````````` + + +Consecutive internal [whitespace] is treated as one space for +purposes of determining matching: + +```````````````````````````````` example +[Foo + bar]: /url + +[Baz][Foo bar] +. +

Baz

+```````````````````````````````` + + +No [whitespace] is allowed between the [link text] and the +[link label]: + +```````````````````````````````` example +[foo] [bar] + +[bar]: /url "title" +. +

[foo] bar

+```````````````````````````````` + + +```````````````````````````````` example +[foo] +[bar] + +[bar]: /url "title" +. +

[foo] +bar

+```````````````````````````````` + + +This is a departure from John Gruber's original Markdown syntax +description, which explicitly allows whitespace between the link +text and the link label. It brings reference links in line with +[inline links], which (according to both original Markdown and +this spec) cannot have whitespace after the link text. More +importantly, it prevents inadvertent capture of consecutive +[shortcut reference links]. If whitespace is allowed between the +link text and the link label, then in the following we will have +a single reference link, not two shortcut reference links, as +intended: + +``` markdown +[foo] +[bar] + +[foo]: /url1 +[bar]: /url2 +``` + +(Note that [shortcut reference links] were introduced by Gruber +himself in a beta version of `Markdown.pl`, but never included +in the official syntax description. Without shortcut reference +links, it is harmless to allow space between the link text and +link label; but once shortcut references are introduced, it is +too dangerous to allow this, as it frequently leads to +unintended results.) + +When there are multiple matching [link reference definitions], +the first is used: + +```````````````````````````````` example +[foo]: /url1 + +[foo]: /url2 + +[bar][foo] +. +

bar

+```````````````````````````````` + + +Note that matching is performed on normalized strings, not parsed +inline content. So the following does not match, even though the +labels define equivalent inline content: + +```````````````````````````````` example +[bar][foo\!] + +[foo!]: /url +. +

[bar][foo!]

+```````````````````````````````` + + +[Link labels] cannot contain brackets, unless they are +backslash-escaped: + +```````````````````````````````` example +[foo][ref[] + +[ref[]: /uri +. +

[foo][ref[]

+

[ref[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref[bar]] + +[ref[bar]]: /uri +. +

[foo][ref[bar]]

+

[ref[bar]]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[[[foo]]] + +[[[foo]]]: /url +. +

[[[foo]]]

+

[[[foo]]]: /url

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref\[] + +[ref\[]: /uri +. +

foo

+```````````````````````````````` + + +Note that in this example `]` is not backslash-escaped: + +```````````````````````````````` example +[bar\\]: /uri + +[bar\\] +. +

bar\

+```````````````````````````````` + + +A [link label] must contain at least one [non-whitespace character]: + +```````````````````````````````` example +[] + +[]: /uri +. +

[]

+

[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[ + ] + +[ + ]: /uri +. +

[ +]

+

[ +]: /uri

+```````````````````````````````` + + +A [collapsed reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document, followed by the string `[]`. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title are +provided by the matching reference link definition. Thus, +`[foo][]` is equivalent to `[foo][foo]`. + +```````````````````````````````` example +[foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + + +As with full reference links, [whitespace] is not +allowed between the two sets of brackets: + +```````````````````````````````` example +[foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +A [shortcut reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document and is not followed by `[]` or a link label. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title +are provided by the matching link reference definition. +Thus, `[foo]` is equivalent to `[foo][]`. + +```````````````````````````````` example +[foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +[[*foo* bar]] + +[*foo* bar]: /url "title" +. +

[foo bar]

+```````````````````````````````` + + +```````````````````````````````` example +[[bar [foo] + +[foo]: /url +. +

[[bar foo

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +A space after the link text should be preserved: + +```````````````````````````````` example +[foo] bar + +[foo]: /url +. +

foo bar

+```````````````````````````````` + + +If you just want bracketed text, you can backslash-escape the +opening bracket to avoid links: + +```````````````````````````````` example +\[foo] + +[foo]: /url "title" +. +

[foo]

+```````````````````````````````` + + +Note that this is a link, because a link label ends with the first +following closing bracket: + +```````````````````````````````` example +[foo*]: /url + +*[foo*] +. +

*foo*

+```````````````````````````````` + + +Full and compact references take precedence over shortcut +references: + +```````````````````````````````` example +[foo][bar] + +[foo]: /url1 +[bar]: /url2 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo][] + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +Inline links also take precedence: + +```````````````````````````````` example +[foo]() + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo](not a link) + +[foo]: /url1 +. +

foo(not a link)

+```````````````````````````````` + +In the following case `[bar][baz]` is parsed as a reference, +`[foo]` as normal text: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url +. +

[foo]bar

+```````````````````````````````` + + +Here, though, `[foo][bar]` is parsed as a reference, since +`[bar]` is defined: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[bar]: /url2 +. +

foobaz

+```````````````````````````````` + + +Here `[foo]` is not parsed as a shortcut reference, because it +is followed by a link label (even though `[bar]` is not defined): + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[foo]: /url2 +. +

[foo]bar

+```````````````````````````````` + + + +## Images + +Syntax for images is like the syntax for links, with one +difference. Instead of [link text], we have an +[image description](@). The rules for this are the +same as for [link text], except that (a) an +image description starts with `![` rather than `[`, and +(b) an image description may contain links. +An image description has inline elements +as its contents. When an image is rendered to HTML, +this is standardly used as the image's `alt` attribute. + +```````````````````````````````` example +![foo](/url "title") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo ![bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo [bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +Though this spec is concerned with parsing, not rendering, it is +recommended that in rendering to HTML, only the plain string content +of the [image description] be used. Note that in +the above example, the alt attribute's value is `foo bar`, not `foo +[bar](/url)` or `foo bar`. Only the plain string +content is rendered, without formatting. + +```````````````````````````````` example +![foo *bar*][] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*][foobar] + +[FOOBAR]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo](train.jpg) +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +My ![foo bar](/path/to/train.jpg "title" ) +. +

My foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo]() +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![](/url) +. +

+```````````````````````````````` + + +Reference-style: + +```````````````````````````````` example +![foo][bar] + +[bar]: /url +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo][bar] + +[BAR]: /url +. +

foo

+```````````````````````````````` + + +Collapsed: + +```````````````````````````````` example +![foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The labels are case-insensitive: + +```````````````````````````````` example +![Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +As with reference links, [whitespace] is not allowed +between the two sets of brackets: + +```````````````````````````````` example +![foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +Shortcut: + +```````````````````````````````` example +![foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +Note that link labels cannot contain unescaped brackets: + +```````````````````````````````` example +![[foo]] + +[[foo]]: /url "title" +. +

![[foo]]

+

[[foo]]: /url "title"

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +![Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +If you just want a literal `!` followed by bracketed text, you can +backslash-escape the opening `[`: + +```````````````````````````````` example +!\[foo] + +[foo]: /url "title" +. +

![foo]

+```````````````````````````````` + + +If you want a link after a literal `!`, backslash-escape the +`!`: + +```````````````````````````````` example +\![foo] + +[foo]: /url "title" +. +

!foo

+```````````````````````````````` + + +## Autolinks + +[Autolink](@)s are absolute URIs and email addresses inside +`<` and `>`. They are parsed as links, with the URL or email address +as the link label. + +A [URI autolink](@) consists of `<`, followed by an +[absolute URI] not containing `<`, followed by `>`. It is parsed as +a link to the URI, with the URI as the link's label. + +An [absolute URI](@), +for these purposes, consists of a [scheme] followed by a colon (`:`) +followed by zero or more characters other than ASCII +[whitespace] and control characters, `<`, and `>`. If +the URI includes these characters, they must be percent-encoded +(e.g. `%20` for a space). + +For purposes of this spec, a [scheme](@) is any sequence +of 2--32 characters beginning with an ASCII letter and followed +by any combination of ASCII letters, digits, or the symbols plus +("+"), period ("."), or hyphen ("-"). + +Here are some valid autolinks: + +```````````````````````````````` example + +. +

http://foo.bar.baz

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://foo.bar.baz/test?q=hello&id=22&boolean

+```````````````````````````````` + + +```````````````````````````````` example + +. +

irc://foo.bar:2233/baz

+```````````````````````````````` + + +Uppercase is also fine: + +```````````````````````````````` example + +. +

MAILTO:FOO@BAR.BAZ

+```````````````````````````````` + + +Note that many strings that count as [absolute URIs] for +purposes of this spec are not valid URIs, because their +schemes are not registered or because of other problems +with their syntax: + +```````````````````````````````` example + +. +

a+b+c:d

+```````````````````````````````` + + +```````````````````````````````` example + +. +

made-up-scheme://foo,bar

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://../

+```````````````````````````````` + + +```````````````````````````````` example + +. +

localhost:5001/foo

+```````````````````````````````` + + +Spaces are not allowed in autolinks: + +```````````````````````````````` example + +. +

<http://foo.bar/baz bim>

+```````````````````````````````` + + +Backslash-escapes do not work inside autolinks: + +```````````````````````````````` example + +. +

http://example.com/\[\

+```````````````````````````````` + + +An [email autolink](@) +consists of `<`, followed by an [email address], +followed by `>`. The link's label is the email address, +and the URL is `mailto:` followed by the email address. + +An [email address](@), +for these purposes, is anything that matches +the [non-normative regex from the HTML5 +spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)): + + /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + +Examples of email autolinks: + +```````````````````````````````` example + +. +

foo@bar.example.com

+```````````````````````````````` + + +```````````````````````````````` example + +. +

foo+special@Bar.baz-bar0.com

+```````````````````````````````` + + +Backslash-escapes do not work inside email autolinks: + +```````````````````````````````` example + +. +

<foo+@bar.example.com>

+```````````````````````````````` + + +These are not autolinks: + +```````````````````````````````` example +<> +. +

<>

+```````````````````````````````` + + +```````````````````````````````` example +< http://foo.bar > +. +

< http://foo.bar >

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<m:abc>

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<foo.bar.baz>

+```````````````````````````````` + + +```````````````````````````````` example +http://example.com +. +

http://example.com

+```````````````````````````````` + + +```````````````````````````````` example +foo@bar.example.com +. +

foo@bar.example.com

+```````````````````````````````` + + +## Raw HTML + +Text between `<` and `>` that looks like an HTML tag is parsed as a +raw HTML tag and will be rendered in HTML without escaping. +Tag and attribute names are not limited to current HTML tags, +so custom tags (and even, say, DocBook tags) may be used. + +Here is the grammar for tags: + +A [tag name](@) consists of an ASCII letter +followed by zero or more ASCII letters, digits, or +hyphens (`-`). + +An [attribute](@) consists of [whitespace], +an [attribute name], and an optional +[attribute value specification]. + +An [attribute name](@) +consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII +letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML +specification restricted to ASCII. HTML5 is laxer.) + +An [attribute value specification](@) +consists of optional [whitespace], +a `=` character, optional [whitespace], and an [attribute +value]. + +An [attribute value](@) +consists of an [unquoted attribute value], +a [single-quoted attribute value], or a [double-quoted attribute value]. + +An [unquoted attribute value](@) +is a nonempty string of characters not +including spaces, `"`, `'`, `=`, `<`, `>`, or `` ` ``. + +A [single-quoted attribute value](@) +consists of `'`, zero or more +characters not including `'`, and a final `'`. + +A [double-quoted attribute value](@) +consists of `"`, zero or more +characters not including `"`, and a final `"`. + +An [open tag](@) consists of a `<` character, a [tag name], +zero or more [attributes], optional [whitespace], an optional `/` +character, and a `>` character. + +A [closing tag](@) consists of the string ``. + +An [HTML comment](@) consists of ``, +where *text* does not start with `>` or `->`, does not end with `-`, +and does not contain `--`. (See the +[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) + +A [processing instruction](@) +consists of the string ``, and the string +`?>`. + +A [declaration](@) consists of the +string ``, and the character `>`. + +A [CDATA section](@) consists of +the string ``, and the string `]]>`. + +An [HTML tag](@) consists of an [open tag], a [closing tag], +an [HTML comment], a [processing instruction], a [declaration], +or a [CDATA section]. + +Here are some simple open tags: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Empty elements: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +[Whitespace] is allowed: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +With attributes: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Custom tag names can be used: + +```````````````````````````````` example +Foo +. +

Foo

+```````````````````````````````` + + +Illegal tag names, not parsed as HTML: + +```````````````````````````````` example +<33> <__> +. +

<33> <__>

+```````````````````````````````` + + +Illegal attribute names: + +```````````````````````````````` example +
+. +

<a h*#ref="hi">

+```````````````````````````````` + + +Illegal attribute values: + +```````````````````````````````` example +
+. +

</a href="foo">

+```````````````````````````````` + + +Comments: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +foo +. +

foo <!-- not a comment -- two hyphens -->

+```````````````````````````````` + + +Not comments: + +```````````````````````````````` example +foo foo --> + +foo +. +

foo <!--> foo -->

+

foo <!-- foo--->

+```````````````````````````````` + + +Processing instructions: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +Declarations: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +CDATA sections: + +```````````````````````````````` example +foo &<]]> +. +

foo &<]]>

+```````````````````````````````` + + +Entity and numeric character references are preserved in HTML +attributes: + +```````````````````````````````` example +foo
+. +

foo

+```````````````````````````````` + + +Backslash escapes do not work in HTML attributes: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<a href=""">

+```````````````````````````````` + + +## Hard line breaks + +A line break (not in a code span or HTML tag) that is preceded +by two or more spaces and does not occur at the end of a block +is parsed as a [hard line break](@) (rendered +in HTML as a `
` tag): + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +For a more visible alternative, a backslash before the +[line ending] may be used instead of two spaces: + +```````````````````````````````` example +foo\ +baz +. +

foo
+baz

+```````````````````````````````` + + +More than two spaces can be used: + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +Leading spaces at the beginning of the next line are ignored: + +```````````````````````````````` example +foo + bar +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +foo\ + bar +. +

foo
+bar

+```````````````````````````````` + + +Line breaks can occur inside emphasis, links, and other constructs +that allow inline content: + +```````````````````````````````` example +*foo +bar* +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo\ +bar* +. +

foo
+bar

+```````````````````````````````` + + +Line breaks do not occur inside code spans + +```````````````````````````````` example +`code +span` +. +

code span

+```````````````````````````````` + + +```````````````````````````````` example +`code\ +span` +. +

code\ span

+```````````````````````````````` + + +or HTML tags: + +```````````````````````````````` example +
+. +

+```````````````````````````````` + + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Hard line breaks are for separating inline content within a block. +Neither syntax for hard line breaks works at the end of a paragraph or +other block element: + +```````````````````````````````` example +foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +### foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +### foo +. +

foo

+```````````````````````````````` + + +## Soft line breaks + +A regular line break (not in a code span or HTML tag) that is not +preceded by two or more spaces or a backslash is parsed as a +[softbreak](@). (A softbreak may be rendered in HTML either as a +[line ending] or as a space. The result will be the same in +browsers. In the examples here, a [line ending] will be used.) + +```````````````````````````````` example +foo +baz +. +

foo +baz

+```````````````````````````````` + + +Spaces at the end of the line and beginning of the next line are +removed: + +```````````````````````````````` example +foo + baz +. +

foo +baz

+```````````````````````````````` + + +A conforming parser may render a soft line break in HTML either as a +line break or as a space. + +A renderer may also provide an option to render soft line breaks +as hard line breaks. + +## Textual content + +Any characters not given an interpretation by the above rules will +be parsed as plain textual content. + +```````````````````````````````` example +hello $.;'there +. +

hello $.;'there

+```````````````````````````````` + + +```````````````````````````````` example +Foo χρῆν +. +

Foo χρῆν

+```````````````````````````````` + + +Internal spaces are preserved verbatim: + +```````````````````````````````` example +Multiple spaces +. +

Multiple spaces

+```````````````````````````````` + + + + +# Appendix: A parsing strategy + +In this appendix we describe some features of the parsing strategy +used in the CommonMark reference implementations. + +## Overview + +Parsing has two phases: + +1. In the first phase, lines of input are consumed and the block +structure of the document---its division into paragraphs, block quotes, +list items, and so on---is constructed. Text is assigned to these +blocks but not parsed. Link reference definitions are parsed and a +map of links is constructed. + +2. In the second phase, the raw text contents of paragraphs and headings +are parsed into sequences of Markdown inline elements (strings, +code spans, links, emphasis, and so on), using the map of link +references constructed in phase 1. + +At each point in processing, the document is represented as a tree of +**blocks**. The root of the tree is a `document` block. The `document` +may have any number of other blocks as **children**. These children +may, in turn, have other blocks as children. The last child of a block +is normally considered **open**, meaning that subsequent lines of input +can alter its contents. (Blocks that are not open are **closed**.) +Here, for example, is a possible document tree, with the open blocks +marked by arrows: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 1: block structure + +Each line that is processed has an effect on this tree. The line is +analyzed and, depending on its contents, the document may be altered +in one or more of the following ways: + +1. One or more open blocks may be closed. +2. One or more new blocks may be created as children of the + last open block. +3. Text may be added to the last (deepest) open block remaining + on the tree. + +Once a line has been incorporated into the tree in this way, +it can be discarded, so input can be read in a stream. + +For each line, we follow this procedure: + +1. First we iterate through the open blocks, starting with the +root document, and descending through last children down to the last +open block. Each block imposes a condition that the line must satisfy +if the block is to remain open. For example, a block quote requires a +`>` character. A paragraph requires a non-blank line. +In this phase we may match all or just some of the open +blocks. But we cannot close unmatched blocks yet, because we may have a +[lazy continuation line]. + +2. Next, after consuming the continuation markers for existing +blocks, we look for new block starts (e.g. `>` for a block quote). +If we encounter a new block start, we close any blocks unmatched +in step 1 before creating the new block as a child of the last +matched block. + +3. Finally, we look at the remainder of the line (after block +markers like `>`, list markers, and indentation have been consumed). +This is text that can be incorporated into the last open +block (a paragraph, code block, heading, or raw HTML). + +Setext headings are formed when we see a line of a paragraph +that is a [setext heading underline]. + +Reference link definitions are detected when a paragraph is closed; +the accumulated text lines are parsed to see if they begin with +one or more reference link definitions. Any remainder becomes a +normal paragraph. + +We can see how this works by considering how the tree above is +generated by four lines of Markdown: + +``` markdown +> Lorem ipsum dolor +sit amet. +> - Qui *quodsi iracundia* +> - aliquando id +``` + +At the outset, our document model is just + +``` tree +-> document +``` + +The first line of our text, + +``` markdown +> Lorem ipsum dolor +``` + +causes a `block_quote` block to be created as a child of our +open `document` block, and a `paragraph` block as a child of +the `block_quote`. Then the text is added to the last open +block, the `paragraph`: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor" +``` + +The next line, + +``` markdown +sit amet. +``` + +is a "lazy continuation" of the open `paragraph`, so it gets added +to the paragraph's text: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor\nsit amet." +``` + +The third line, + +``` markdown +> - Qui *quodsi iracundia* +``` + +causes the `paragraph` block to be closed, and a new `list` block +opened as a child of the `block_quote`. A `list_item` is also +added as a child of the `list`, and a `paragraph` as a child of +the `list_item`. The text is then added to the new `paragraph`: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + -> list_item + -> paragraph + "Qui *quodsi iracundia*" +``` + +The fourth line, + +``` markdown +> - aliquando id +``` + +causes the `list_item` (and its child the `paragraph`) to be closed, +and a new `list_item` opened up as child of the `list`. A `paragraph` +is added as a child of the new `list_item`, to contain the text. +We thus obtain the final tree: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 2: inline structure + +Once all of the input has been parsed, all open blocks are closed. + +We then "walk the tree," visiting every node, and parse raw +string contents of paragraphs and headings as inlines. At this +point we have seen all the link reference definitions, so we can +resolve reference links as we go. + +``` tree +document + block_quote + paragraph + str "Lorem ipsum dolor" + softbreak + str "sit amet." + list (type=bullet tight=true bullet_char=-) + list_item + paragraph + str "Qui " + emph + str "quodsi iracundia" + list_item + paragraph + str "aliquando id" +``` + +Notice how the [line ending] in the first paragraph has +been parsed as a `softbreak`, and the asterisks in the first list item +have become an `emph`. + +### An algorithm for parsing nested emphasis and links + +By far the trickiest part of inline parsing is handling emphasis, +strong emphasis, links, and images. This is done using the following +algorithm. + +When we're parsing inlines and we hit either + +- a run of `*` or `_` characters, or +- a `[` or `![` + +we insert a text node with these symbols as its literal content, and we +add a pointer to this text node to the [delimiter stack](@). + +The [delimiter stack] is a doubly linked list. Each +element contains a pointer to a text node, plus information about + +- the type of delimiter (`[`, `![`, `*`, `_`) +- the number of delimiters, +- whether the delimiter is "active" (all are active to start), and +- whether the delimiter is a potential opener, a potential closer, + or both (which depends on what sort of characters precede + and follow the delimiters). + +When we hit a `]` character, we call the *look for link or image* +procedure (see below). + +When we hit the end of the input, we call the *process emphasis* +procedure (see below), with `stack_bottom` = NULL. + +#### *look for link or image* + +Starting at the top of the delimiter stack, we look backwards +through the stack for an opening `[` or `![` delimiter. + +- If we don't find one, we return a literal text node `]`. + +- If we do find one, but it's not *active*, we remove the inactive + delimiter from the stack, and return a literal text node `]`. + +- If we find one and it's active, then we parse ahead to see if + we have an inline link/image, reference link/image, compact reference + link/image, or shortcut reference link/image. + + + If we don't, then we remove the opening delimiter from the + delimiter stack and return a literal text node `]`. + + + If we do, then + + * We return a link or image node whose children are the inlines + after the text node pointed to by the opening delimiter. + + * We run *process emphasis* on these inlines, with the `[` opener + as `stack_bottom`. + + * We remove the opening delimiter. + + * If we have a link (and not an image), we also set all + `[` delimiters before the opening delimiter to *inactive*. (This + will prevent us from getting links within links.) + +#### *process emphasis* + +Parameter `stack_bottom` sets a lower bound to how far we +descend in the [delimiter stack]. If it is NULL, we can +go all the way to the bottom. Otherwise, we stop before +visiting `stack_bottom`. + +Let `current_position` point to the element on the [delimiter stack] +just above `stack_bottom` (or the first element if `stack_bottom` +is NULL). + +We keep track of the `openers_bottom` for each delimiter +type (`*`, `_`). Initialize this to `stack_bottom`. + +Then we repeat the following until we run out of potential +closers: + +- Move `current_position` forward in the delimiter stack (if needed) + until we find the first potential closer with delimiter `*` or `_`. + (This will be the potential closer closest + to the beginning of the input -- the first one in parse order.) + +- Now, look back in the stack (staying above `stack_bottom` and + the `openers_bottom` for this delimiter type) for the + first matching potential opener ("matching" means same delimiter). + +- If one is found: + + + Figure out whether we have emphasis or strong emphasis: + if both closer and opener spans have length >= 2, we have + strong, otherwise regular. + + + Insert an emph or strong emph node accordingly, after + the text node corresponding to the opener. + + + Remove any delimiters between the opener and closer from + the delimiter stack. + + + Remove 1 (for regular emph) or 2 (for strong emph) delimiters + from the opening and closing text nodes. If they become empty + as a result, remove them and remove the corresponding element + of the delimiter stack. If the closing node is removed, reset + `current_position` to the next element in the stack. + +- If none in found: + + + Set `openers_bottom` to the element before `current_position`. + (We know that there are no openers for this kind of closer up to and + including this point, so this puts a lower bound on future searches.) + + + If the closer at `current_position` is not a potential opener, + remove it from the delimiter stack (since we know it can't + be a closer either). + + + Advance `current_position` to the next element in the stack. + +After we're done, we remove all delimiters above `stack_bottom` from the +delimiter stack. diff --git a/web/unmark/doc.umd b/web/unmark/doc.umd new file mode 100644 index 000000000..e77bc41ee --- /dev/null +++ b/web/unmark/doc.umd @@ -0,0 +1,348 @@ +:: :- :* title+"urbit-flavored markdown docs" +:: author+"ted blackman" +:: date+~2017.8.25 +:: == +:: +;> + +# udon: urbit-flavored markdown + +## overview + +Udon is a minimal markup language for creating and rendering text documents, +with a markdown-inspired syntax. It's integrated with the hoon programming +language, allowing it to be used as standalone prose in its own file or inside +a hoon source file, in which case it will be parsed into a tree of HTML nodes +using hoon's `sail` datatype. + +Udon is stricter than markdown and generally supports only one syntax for each +type of HTML node it emits. + +### headers + +Headers in udon begin with one or more `#` characters, followed by a space. The +number of leading `#`s corresponds to the resulting HTML element: `#` yields an +`

`, `##` yields an `

`, and so on through `

`. + +Example: +``` +### Header (h3) + +##### Header (h5) +``` +produces: + +> ### Header (h3) + + ##### Header (h5) + +### lists + +A line beginning with a `-` or `+` followed by a space is interpreted as an +element of a list. `-` means unordered list (`
    `) and `+` means ordered list +(`
      `). + +Example: + +``` +- unordered 1 + text on newline shows up on same line +- unordered 2\ + text on newline after `\` puts in
      line break + +- unordered after 1 blank line + - nested + - double-nested + ++ leading '+' ++ leading '+' +- unordered '-' + + nested ordered '+' item 1 + + nested ordered '+' item 2 + ++ ordered '+' + + nested item 1 + + nested item 2 +``` + +produces: + +> - unordered 1 + text on newline shows up on same line + - unordered 2\ + text on newline after `\` puts in
      line break + + - unordered after 1 blank line + - nested + - double-nested + + + leading '+' + + leading '+' + - unordered '-' + + nested ordered '+' item 1 + + nested ordered '+' item 2 + + + ordered '+' + + nested item 1 + + nested item 2 + +### blockquotes + +A section of text beginning with `> ` and indented by two spaces yields a +`
      ` element. This blockquote can itself turn contain more udon, +including more blockquotes to render nested levels of quotation. + +Example: + +``` +> As Gregor Samsa awoke one morning from uneasy dreams + he found himself _transformed_ in his bed into a *monstrous* vermin. +``` + +produces: + +> > As Gregor Samsa awoke one morning from uneasy dreams + he found himself _transformed_ in his bed into a *monstrous* vermin. + +### code blocks + +By enclosing a block of text in `\`\`\` on their own lines +before and after the block, the text will be treated as a code block. + +Example: + +``` +> ``` + (def Y (fn [f] + ((fn [x] + (x x)) + (fn [x] + (f (fn [y] + ((x x) y))))))) + ``` +``` + +produces: + +> ``` + (def Y (fn [f] + ((fn [x] + (x x)) + (fn [x] + (f (fn [y] + ((x x) y))))))) + ``` + +### poems + +A poem is a section of text with meaningful newlines. Normally in udon, +newlines are treated as spaces and do not create a new line of text. If you +want to embed text where newlines are retained, then indent the text by +question with eight spaces. + +Example: +``` + A shape with lion body and the head of a man, + A gaze blank and pitiless as the sun, + Is moving its slow thighs, while all about it + Reel shadows of the indignant desert birds. +``` +produces: +> A shape with lion body and the head of a man, + A gaze blank and pitiless as the sun, + Is moving its slow thighs, while all about it + Reel shadows of the indignant desert birds. + +### sail expressions + +It's possible to use udon as an HTML templating language akin to +PHP, ERB, JSP, or Handlebars templates. This facility derives +in part from the support for embedding hoon code inside the markup. +There are two ways to do embed hoon in udon: inline expressions and sail. +[Sail](https://urbit.org/fora/posts/~2017.7.6..21.27.00..bebb~/) +is a DSL within hoon for creating XML nodes, including HTML. It can +be used directly within udon to provide scripting capability and also to +provide more fine-grained control over the resulting HTML. + +Example: +``` +;= + ;p + ;strong: Don't panic! + ;br; + ;small: [reactive publishing intensifies] + == +== +``` + +produces: +> ;= + ;p + ;strong: Don't panic! + ;br; + ;small: [reactive publishing intensifies] + == + == + +_Note: +[urbit's web publishing system](https://urbit.org/docs/arvo/web-apps/) +currently does not apply `