urbit/gen/cram.hoon

593 lines
18 KiB
Plaintext
Raw Normal View History

2017-07-16 03:59:57 +03:00
::
:::: hoon/marc/gen
::
2017-07-18 04:07:47 +03:00
:- %say
2017-07-24 02:00:45 +03:00
|= {^ {pax/path $~} $~}
2017-07-18 04:07:47 +03:00
:- %noun
2017-07-24 02:00:45 +03:00
=< (test pax)
2017-07-16 03:59:57 +03:00
=> |%
2017-07-18 04:07:47 +03:00
++ item (pair mite (list flow)) :: xml node generator
2017-07-16 03:59:57 +03:00
++ colm @ud :: column
2017-07-21 06:19:22 +03:00
++ flow manx :: node or generator
2017-07-18 04:07:47 +03:00
++ mite :: context
$? $down :: outer embed
$list :: unordered list
$lime :: list item
$lord :: ordered list
2017-07-19 06:19:27 +03:00
$poem :: verse
2017-07-18 04:07:47 +03:00
$bloc :: blockquote
$code :: preformatted code
2017-07-21 06:19:22 +03:00
$head :: heading
2017-07-19 06:19:27 +03:00
$expr :: dynamic expression
2017-07-21 06:19:22 +03:00
== ::
2017-07-16 03:59:57 +03:00
++ trig :: line style
$: col/@ud :: start column
$= sty :: style
2017-07-20 07:48:00 +03:00
$? $done :: terminator
2017-07-16 03:59:57 +03:00
$none :: end of input
$lint :: + line item
$lite :: - line item
$head :: # heading
$text :: anything else
== == ::
2017-07-21 06:19:22 +03:00
++ graf :: input fragment
2017-07-22 09:56:21 +03:00
$% {$bold p/tape} :: bold
{$talc p/tape} :: italics
{$code p/tape} :: code literal
{$text p/tape} :: text symbol
{$link p/(list manx) q/tape} :: URL
2017-07-21 06:19:22 +03:00
==
2017-07-16 03:59:57 +03:00
--
2017-07-20 07:48:00 +03:00
|% ::
2017-07-24 02:00:45 +03:00
++ test :: test text parsing
|= pax/path
^- tape
:: src: text file as (list cord)
:: txt: source as tape with newlines
:: vex: parsing result
::
=/ src .^(wain %cx pax)
=* txt (zing (turn src |=(@t (weld (rip 3 +<) `tape`~[`@`10]))))
=/ vex (cram [1 1] txt)
:: print result as error or xml text
::
?~ q.vex
"syntax error: line {(scow %ud p.p.vex)}, column {(scow %ud q.p.vex)}"
(poxo p.u.q.vex)
:: ::
++ cram :: parse unmark
2017-07-16 03:59:57 +03:00
|= {naz/hair los/tape}
2017-07-22 09:56:21 +03:00
^- (like flow)
2017-07-16 03:59:57 +03:00
::
2017-07-18 04:07:47 +03:00
:: err: error position
2017-07-16 03:59:57 +03:00
:: col: current control column
:: hac: stack of items under construction
:: cur: current item under construction
2017-07-20 07:48:00 +03:00
:: lub: current block being read in
2017-07-16 03:59:57 +03:00
::
2017-07-18 04:07:47 +03:00
=| err/(unit hair)
=/ col q.naz
2017-07-16 03:59:57 +03:00
=| hac/(list item)
2017-07-20 07:48:00 +03:00
=/ cur/item [%down ~]
2017-07-16 03:59:57 +03:00
=| lub/(unit (pair hair (list tape)))
2017-07-18 04:07:47 +03:00
=< $:line
2017-07-16 03:59:57 +03:00
|%
:: ::
2017-07-20 07:48:00 +03:00
++ $ :: resolve
2017-07-22 09:56:21 +03:00
^- (like flow)
2017-07-20 07:48:00 +03:00
:: if error position is set, produce error
2017-07-16 03:59:57 +03:00
::
2017-07-20 07:48:00 +03:00
?. =(~ err) [+.err ~]
:: all data was consumed
2017-07-16 03:59:57 +03:00
::
2017-07-20 07:48:00 +03:00
=- [naz `[- [naz los]]]
2017-07-22 09:56:21 +03:00
|- ^- flow
2017-07-20 07:48:00 +03:00
:: fold all the way to top
2017-07-16 03:59:57 +03:00
::
2017-07-22 09:56:21 +03:00
?~ hac fine
2017-07-20 07:48:00 +03:00
$(..^$ fold)
2017-07-16 03:59:57 +03:00
:: ::
++ back :: column retreat
2017-07-20 07:48:00 +03:00
|= luc/@ud
2017-07-18 04:07:47 +03:00
^+ +>
2017-07-20 07:48:00 +03:00
?: =(luc col) +>
:: nex: next backward step that terminates this context
::
=/ nex/@ud
?- p.cur
$down 0
2017-07-21 06:19:22 +03:00
$head 0
2017-07-20 07:48:00 +03:00
$expr 2
$list 0
$lime 2
$lord 0
$poem 8
$code 4
$bloc 6
==
?: (gth nex (sub col luc))
:: indenting pattern violation
::
..^$(err `[p.naz luc])
$(..^$ fold, col (sub col nex))
2017-07-16 03:59:57 +03:00
:: ::
2017-07-20 07:48:00 +03:00
++ fine :: item to flow
^- flow
2017-07-21 06:19:22 +03:00
=- [[- ~] q.cur]
2017-07-20 07:48:00 +03:00
?+ p.cur !!
2017-07-24 02:00:45 +03:00
$down %body
2017-07-20 07:48:00 +03:00
$list %ul
$lord %ol
$lime %li
$bloc %bq
$code %pre
==
:: ::
++ fold ^+ . :: complete and pop
?~ hac .
%= .
hac t.hac
cur [p.i.hac [fine q.i.hac]]
==
:: ::
++ snap :: capture raw line
2017-07-16 03:59:57 +03:00
=| nap/tape
2017-07-20 07:48:00 +03:00
|- ^+ [nap +>]
:: no unterminated lines
::
?~ los [~ +>(err `naz)]
?: =(`@`10 i.los)
:: consume newline
::
:_ +>(los t.los, naz [+(p.naz) 1])
:: trim trailing spaces
::
|- ^- tape
?: ?=({$' ' *} nap)
$(nap t.nap)
(flop nap)
2017-07-21 06:19:22 +03:00
:: save byte and repeat
::
2017-07-20 07:48:00 +03:00
$(los t.los, q.naz +(q.naz), nap [i.los nap])
2017-07-16 03:59:57 +03:00
:: ::
2017-07-18 04:07:47 +03:00
++ skip +:snap :: discard line
2017-07-19 06:19:27 +03:00
++ look :: inspect line
^- (unit trig)
2017-07-20 07:48:00 +03:00
?~ los
`[q.naz %none]
?: =(`@`10 i.los)
~
?: =(' ' i.los)
look(los t.los, q.naz +(q.naz))
2017-07-21 06:19:22 +03:00
:+ ~ q.naz
2017-07-20 07:48:00 +03:00
?: =('\\' i.los)
%done
?: =('\\' i.los)
%head
?: ?=({$'-' $' ' *} los)
%lite
2017-07-21 06:19:22 +03:00
?: ?=({$'+' $' ' *} los)
2017-07-20 07:48:00 +03:00
%lint
%text
:: ::
++ cape :: xml-escape
|= tex/tape
^- tape
?~ tex tex
=+ $(tex t.tex)
?+ i.tex [i.tex -]
$34 ['&' 'q' 'u' 'o' 't' ';' -]
$38 ['&' 'a' 'm' 'p' ';' -]
$39 ['&' '#' '3' '9' ';' -]
$60 ['&' 'l' 't' ';' -]
$62 ['&' 'g' 't' ';' -]
==
:: ::
2017-07-22 09:56:21 +03:00
++ clue :: tape to xml
|= tex/tape
^- manx
[[%$ [%$ tex] ~] ~]
:: ::
++ cash :: escaped fence
|* tem/rule
;~ sfix
%- star
=+ ;~(pose bas tem)
;~(pose ;~(less - prn) ;~(pfix bas -))
::
tem
==
:: ::
++ calm :: complete to space
|* sef/rule
|= tub/nail
=/ vex (sef tub)
?~ q.vex vex
?: ?| ?=($~ q.q.u.q.vex)
=(' ' i.q.q.u.q.vex)
=(`@`10 i.q.q.u.q.vex)
==
vex
[p=p.vex q=~]
:: ::
++ cool :: reparsed fence
|* $: :: fex: fence delimiter
:: sab: main rule
::
fex/rule
sab/rule
==
|= {naz/hair los/tape}
^+ *sab
:: vex: fenced span
::
=/ vex/(like tape) (fex naz los)
?~ q.vex vex
:: hav: reparse full fenced text
::
=/ hav ((full sab) [naz p.u.q.vex])
:: escapes may make error position drift
::
?~ q.hav hav
:: the complete span with the main product
::
:- p.vex
`[p.u.q.hav q.u.q.vex]
:: ::
++ echo :: hoon literal
|= {naz/hair los/tape}
^- (like tape)
:: vex: result of parsing wide twig
::
=/ vex (wide:vast naz los)
:: use result of expression parser
::
?~ q.vex vex
=- [p.vex `[- q.u.q.vex]]
:: but replace payload with bytes consumed
::
|- ^- tape
?: =(q.q.u.q.vex los) ~
?~ los ~
[i.los $(los +.los)]
:: ::
++ word :: flow unit
;~ pose
:: *bold literal*
::
(stag %bold ;~(pfix tar (cash tar)))
:: _italic literal_
::
(stag %talc ;~(pfix tar (cash cab)))
:: =expression
::
(stag %code ;~(pfix tis echo))
:: ++arm
::
(stag %code ;~(plug lus lus low (star ;~(pose nud low hep))))
:: [arbitrary *content*](url)
::
%+ stag %link
;~ plug
;~(pfix sel (cool (cash ser) down))
;~(pfix gay ;~(pfix pel (cash per)))
==
:: lowercase word, ending on word boundary
::
(stag %text (calm (plus low)))
:: expression, ending on word boundary
::
(stag %code (calm echo))
:: any word-shaped junk
::
(stag %text (star ;~(less ace prn)))
==
:: ::
++ down :: parse inline flow
%+ knee *(list manx) |. ~+
2017-07-21 06:19:22 +03:00
%+ cook
2017-07-22 09:56:21 +03:00
:: collect raw flow into xml tags
::
|= gaf/(list graf)
^- (list manx)
:: nap: collected words
:: max: collected tags
::
=| fip/(list tape)
=| max/(list manx)
=< (flop max:main)
|% ::
++ fill ^+ . :: unify text block
:: txt: unconsumed text
::
=/ txt/tape
=| txt/tape
|- ^+ txt
2017-07-24 02:00:45 +03:00
?~ fip txt
2017-07-22 09:56:21 +03:00
%= $
2017-07-24 02:00:45 +03:00
fip t.fip
2017-07-22 09:56:21 +03:00
txt ?: =(~ txt)
2017-07-24 02:00:45 +03:00
i.fip
(weld i.fip `tape`[' ' txt])
2017-07-22 09:56:21 +03:00
==
?: =(~ txt) +
%= +
max :_(max (clue txt))
fip ~
== ::
++ main ^+ . :: flow to
?~ gaf fill
?: ?=($text -.i.gaf)
main(gaf t.gaf, fip [p.i.gaf fip])
:: nex: first word in flow
:: mor: rest of flow
::
=> :- [nex=i.gaf mor=t.gaf]
:: consume accumulated text
::
fill
:: convert and accumulate fragment
::
=- main(gaf mor, max [- max])
^- manx
?- -.nex
$bold [[%b ~] (clue (cape p.nex)) ~]
$talc [[%i ~] (clue (cape p.nex)) ~]
$code [[%i ~] (clue (cape p.nex)) ~]
$link [[%a [%href (cape q.nex)] ~] p.nex]
==
--
(most whit word)
:: ::
2017-07-24 02:00:45 +03:00
++ para :: paragraph
(cook |=((list manx) `(list manx)`[[%p ~] +<]~) down)
:: ::
2017-07-22 09:56:21 +03:00
++ whit :: whitespace
(cold ' ' (plus ;~(pose (just ' ') (just `@`10))))
:: ::
++ head :: parse heading
2017-07-21 06:19:22 +03:00
%+ cook
2017-07-22 09:56:21 +03:00
|= $: :: a: list of #
:: b: tag flow of header line
::
a/tape
b/(list manx)
==
^- (list manx)
:: hag: header tag, h1 through h6
::
=/ hag (cat 3 'h' (add '0' =+((lent a) ?:((gth - 6) 6 -))))
:: sid: header text flattened as id
::
=/ sid ^- tape
:: assemble, normalize and kebab-case
::
=- %- zing
%+ turn `(list tape)`(flop -)
|= tape ^- tape
%+ turn `tape`+<
|= @tD
^- @tD
?: ?| &((gte +< 'a') (lte +< 'z'))
&((gte +< '0') (lte +< '9'))
==
+<
?: &((gte +< 'A') (lte +< 'Z'))
(add 32 +<)
'-'
:: collect all text in header flow
::
=| ges/(list tape)
|- ^- (list tape)
?~ b ges
%= $
b t.b
ges
?: ?=({{$$ {$$ *} $~} $~} i.b)
:: capture text
::
[v.i.a.g.i.b ges]
:: descend into children
::
$(b c.i.b)
==
:: header as tag with id attribute
::
[[[%a [%id sid] ~] b] ~]
;~ plug
;~(sfix (star (just '#')) whit)
down
==
2017-07-21 06:19:22 +03:00
:: ::
2017-07-20 07:48:00 +03:00
++ made :: compose block
2017-07-16 03:59:57 +03:00
^+ .
2017-07-20 07:48:00 +03:00
:: empty block, no action
::
?~ lub .
:: if block is preformatted code
::
?: ?=($code p.cur)
:: add blank line between blocks
::
=. q.cur
?~ q.cur q.cur
2017-07-22 09:56:21 +03:00
:_(q.cur (clue `@`10 ~))
2017-07-20 07:48:00 +03:00
%= .
q.cur
%+ weld
%+ turn
q.u.lub
|= tape ^- flow
:: each line is text data with its newline
::
2017-07-22 09:56:21 +03:00
(clue (weld (slag col +<) `tape`[`@`10 ~]))
2017-07-20 07:48:00 +03:00
q.cur
==
:: if block is verse
::
?: ?=($poem p.cur)
:: add break between stanzas
::
=. q.cur ?~(q.cur q.cur [[[%br ~] ~] q.cur])
%= .
q.cur
%+ weld
%+ turn
q.u.lub
|= tape ^- flow
:: each line is a paragraph
::
:- [%p ~]
:_ ~
2017-07-22 09:56:21 +03:00
(clue (weld (slag col +<) `tape`[`@`10 ~]))
2017-07-20 07:48:00 +03:00
q.cur
==
2017-07-24 02:00:45 +03:00
:: yex: block recomposed, with newlines
::
=/ yex/tape
(zing (turn (flop q.u.lub) |=(tape (weld +< `tape`[`@`10 ~]))))
:: XX expressions commented out
2017-07-20 07:48:00 +03:00
::
2017-07-21 06:19:22 +03:00
?< ?=($expr p.cur)
:: vex: parse of paragraph
2017-07-20 07:48:00 +03:00
::
2017-07-22 09:56:21 +03:00
=/ vex/(like (list manx))
2017-07-24 02:00:45 +03:00
:: either a one-line header or a paragraph
2017-07-22 09:56:21 +03:00
::
2017-07-24 02:00:45 +03:00
%.([p.u.lub yex] ?:(?=($head p.cur) head para))
2017-07-21 06:19:22 +03:00
:: if error, propagate correctly
::
?~ q.vex ..$(err `p.vex)
:: save good result
::
2017-07-22 09:56:21 +03:00
..$(q.cur (weld p.u.q.vex q.cur))
2017-07-16 03:59:57 +03:00
:: ::
2017-07-18 04:07:47 +03:00
++ line ^+ . :: body line loop
2017-07-16 03:59:57 +03:00
:: abort after first error
::
2017-07-19 06:19:27 +03:00
?: !=(~ err) .
2017-07-16 03:59:57 +03:00
:: pic: profile of this line
::
=/ pic look
:: if line is blank
::
2017-07-19 06:19:27 +03:00
?~ pic
2017-07-16 03:59:57 +03:00
:: break section
::
2017-07-19 06:19:27 +03:00
line:made:skip
2017-07-16 03:59:57 +03:00
:: line is not blank
::
=> .(pic u.pic)
2017-07-18 04:07:47 +03:00
:: if end of input, complete
2017-07-16 03:59:57 +03:00
::
2017-07-18 04:07:47 +03:00
?: ?=($none sty.pic)
..$(q.naz col.pic)
:: if end marker behind current column
2017-07-16 03:59:57 +03:00
::
2017-07-19 06:19:27 +03:00
?: &(?=($done sty.pic) (lth col.pic col))
2017-07-18 04:07:47 +03:00
:: retract and complete
2017-07-16 03:59:57 +03:00
::
2017-07-18 04:07:47 +03:00
(back(q.naz (add 2 col.pic)) col.pic)
2017-07-19 06:19:27 +03:00
:: bal: inspection copy of lub, current section
::
=/ bal lub
2017-07-18 04:07:47 +03:00
:: if within section
2017-07-16 03:59:57 +03:00
::
2017-07-19 06:19:27 +03:00
?^ bal
2017-07-18 04:07:47 +03:00
:: detect bad block structure
2017-07-16 03:59:57 +03:00
::
2017-07-21 06:19:22 +03:00
?: ?| :: only one line in a heading
::
=(%head p.cur)
2017-07-18 04:07:47 +03:00
?: ?=(?($code $poem $expr) p.cur)
2017-07-21 06:19:22 +03:00
:: literals need to end with a blank line
::
2017-07-18 04:07:47 +03:00
(lth col.pic col)
2017-07-21 06:19:22 +03:00
:: text flows must continue aligned
::
2017-07-19 06:19:27 +03:00
|(!=(%text sty.pic) !=(col.pic col))
2017-07-18 04:07:47 +03:00
==
..$(err `[p.naz col.pic])
:: accept line and continue
::
2017-07-20 07:48:00 +03:00
=^ nap ..$ snap
2017-07-19 06:19:27 +03:00
line(lub bal(q.u [nap q.u.bal]))
2017-07-18 04:07:47 +03:00
:: if column has retreated, adjust stack
2017-07-16 03:59:57 +03:00
::
2017-07-18 04:07:47 +03:00
=. ..$ ?: (lth col.pic col) ..$
(back col.pic)
:: dif: columns advanced
2017-07-19 06:19:27 +03:00
:: erp: error position
2017-07-16 03:59:57 +03:00
::
2017-07-18 04:07:47 +03:00
=/ dif (sub col.pic col)
2017-07-19 06:19:27 +03:00
=/ erp [p.naz col.pic]
2017-07-18 04:07:47 +03:00
=. col col.pic
:: nap: take first line
::
=^ nap ..$ snap
:: execute appropriate paragraph form
::
2017-07-19 19:53:34 +03:00
=< line:abet:apex
2017-07-18 04:07:47 +03:00
|%
:: ::
++ abet :: accept line
..$(lub `[naz nap ~])
:: ::
2017-07-19 06:19:27 +03:00
++ apex ^+ . :: by column offset
2017-07-18 04:07:47 +03:00
?+ dif fail
$0 apse
$2 expr
$4 code
$6 bloc
$8 poem
==
:: ::
2017-07-19 06:19:27 +03:00
++ apse ^+ . :: by prefix style
2017-07-18 04:07:47 +03:00
?- sty.pic
2017-07-19 06:19:27 +03:00
$done !!
2017-07-18 04:07:47 +03:00
$head head
$lite lite
$lint lint
$text text
==
:: ::
++ bloc apse:(push %bloc) :: blockquote line
2017-07-19 06:19:27 +03:00
++ fail .(err `erp) :: set error position
2017-07-20 07:48:00 +03:00
++ push |=(mite %_(+> hac [cur hac], cur [+< ~])) :: push context
2017-07-18 04:07:47 +03:00
++ expr (push %expr) :: hoon expression
++ code (push %code) :: code literal
++ poem (push %poem) :: verse literal
2017-07-21 06:19:22 +03:00
++ head (push %head) :: heading
2017-07-18 04:07:47 +03:00
++ lent :: list entry
|= ord/?
2017-07-19 06:19:27 +03:00
^+ +>
2017-07-18 04:07:47 +03:00
:: erase list marker
::
=. nap =+(+(col) (runt [- ' '] (slag - nap)))
:: indent by 2
::
=. col (add 2 col)
:: can't switch list types
::
?: =(?:(ord %list %lord) p.cur) fail
2017-07-20 07:48:00 +03:00
:: push list item
::
%. %lime
=< push
:: push list context, unless we're in list
2017-07-18 04:07:47 +03:00
::
=+ ?:(ord %lord %list)
2017-07-20 07:48:00 +03:00
?: =(- p.cur) ..push (push -)
2017-07-18 04:07:47 +03:00
::
++ lint (lent &) :: numbered list
++ lite (lent |) :: unnumbered list
++ text :: plain text
2017-07-19 06:19:27 +03:00
^+ .
2017-07-21 06:19:22 +03:00
:: only in lists, fold
2017-07-18 04:07:47 +03:00
::
?. ?=(?($list $lord) p.cur) .
2017-07-21 06:19:22 +03:00
.($ fold)
2017-07-18 04:07:47 +03:00
--
2017-07-16 03:59:57 +03:00
--
2017-07-18 04:07:47 +03:00
--