urbit/docs/pub/doc/hoon/library/2em.md

759 lines
18 KiB
Markdown
Raw Normal View History

2015-02-18 06:03:21 +03:00
section 2eM, regular-expressions
================================
### ++pars
++ pars
|= [a=tape] :: parse tape to rege
^- (unit rege)
=+ foo=((full apex:rags) [[1 1] a])
?~ q.foo
~
[~ p.u.q.foo]
::
Parse regular expression
~zod/try=> (pars "samo")
[ ~
[ %pair
p=[%lite p=~~s]
q=[%pair p=[%lite p=~~a] q=[%pair p=[%lite p=~~m] q=[%lite p=~~o]]]
]
]
~zod/try=> (pars "so[,.0-9]")
[ ~
[ %pair
p=[%lite p=~~s]
q=[%pair p=[%lite p=~~o] q=[%brac p=288.036.862.105.223.168]]
]
]
~zod/try=> `@ub`288.036.862.105.223.168
0b11.1111.1111.0101.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000
~zod/try=> `@ub`(lsh 0 `@`'9' 1)
0b10.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000
~zod/try=> `@ub`(roll (turn ",.0123456789" |=(a=@ (lsh 0 a 1))) con)
0b11.1111.1111.0101.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000
~zod/try=> (pars "sop.*")
[ ~
[ %pair
p=[%lite p=~~s]
q=[%pair p=[%lite p=~~o] q=[%pair p=[%lite p=~~p] q=[%mant p=%dote]]]
]
]
~zod/try=> (pars "(hel)?")
[ ~
[ %eith
p
[ %capt
p=[%pair p=[%lite p=~~h] q=[%pair p=[%lite p=~~e] q=[%lite p=~~l]]]
q=0
]
q=%empt
]
]
~zod/try=> (pars "(hel)??")
[ ~
[ %eith
p=%empt
q
[ %capt
p=[%pair p=[%lite p=~~h] q=[%pair p=[%lite p=~~e] q=[%lite p=~~l]]]
q=0
]
]
]
~zod/try=> (pars "a\{1,20}")
[~ [%betw p=[%lite p=~~a] q=1 r=20]]
### ++rags
++ rags :: rege parsers
=> |%
Regex parser arms
### ++nor
++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char
XX document
### ++les
++ les ;~(less bas asp) :: not backslash
XX document
### ++lep
++ lep ;~(less (mask "^[]\\") asp) :: charset non-control
XX document
### ++asp
++ asp (shim 32 126) :: printable ascii
XX document
### ++alb
++ alb ;~(less ser asp) :: charset literal char
XX document
### ++mis
++ mis ;~(less aln asp) :: non alphanumeric
--
|%
XX document
### ++apex
++ apex :: top level
%+ knee *rege |. ~+
;~ pose
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex))
(stag %eith ;~(plug (easy %empt) ;~(pfix bar apex)))
(easy %empt)
==
::
XX document
### ++mall
++ mall
%+ knee *rege |. ~+
;~((bend |=(a=[rege rege] (some [%pair a]))) bets mall)
::
XX document
### ++bets
++ bets
%+ knee *rege |. ~+
|= tub=nail
=+ vex=(chun tub)
?~ q.vex
vex
=+ a=p.u.q.vex
%- ;~ pose
(cold [%eith %empt a] (jest '??'))
(cold [%manl a] (jest '*?'))
(cold [%plll a] (jest '+?'))
(cold [%eith a %empt] wut)
(cold [%mant a] tar)
(cold [%plls a] lus)
(stag %betl ;~(plug (easy a) ;~(sfix rang wut)))
(stag %betw ;~(plug (easy a) rang))
(stag %binl ;~(plug (easy a) (ifix [kel (jest ',}?')] dim:ag)))
(stag %bant ;~(plug (easy a) (ifix [kel (jest '}?')] dim:ag)))
(stag %bant ;~(plug (easy a) (ifix [kel ker] dim:ag)))
(stag %bint ;~(plug (easy a) (ifix [kel (jest ',}')] dim:ag)))
(easy a)
==
q.u.q.vex
::
XX document
### ++ranc
++ ranc
|= [a=@ b=@]
^- @
?:((gth a b) 0 (con (bex a) $(a +(a))))
::
XX document
### ++flap
++ flap |=(a=@ (mix a (dec (bex 256))))
::
XX document
### ++rang
++ rang
%+ sear |=([a=@ b=@] ?:((lte a b) (some [a b]) ~))
(ifix [kel ker] ;~(plug dim:ag ;~(pfix com dim:ag)))
::
XX document
### ++chun
++ chun
%+ knee *rege |. ~+
;~ pose
(cold %ende buc)
(cold %sart ket)
(cold %dote dot)
%+ cook |=(a=(list char) (reel a |=([p=char q=rege] [%pair [%lite p] q])))
;~(pfix (jest '\\Q') cape)
|= tub=nail
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
=+ bar=(foo tub)
?~(q.bar (chad tub) (fail tub))
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex))
%+ cook |=([a=rege] [%capt a 0])
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex)
(ifix [(jest '(?:') per] apex)
(stag %brac ;~(pfix sel seac))
==
::
XX document
### ++seac
++ seac
|= tub=nail
?~ q.tub
(fail tub)
?: =(i.q.tub '^')
(;~(pfix ket (cook flap sead)) tub)
(sead tub)
::
XX document
### ++sead
++ sead
%+ knee *@ |. ~+
;~ pose
|= tub=nail
?~ q.tub
(fail tub)
?. =(i.q.tub ']')
(fail tub)
?~ t.q.tub
(fail tub)
?: =(i.t.q.tub '-')
?~ t.t.q.tub
(fail tub)
?: =(i.t.t.q.tub ']')
(;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub)
(fail tub)
(;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub)
|= tub=nail
?~ q.tub
(fail tub)
?. =(i.q.tub '-')
(fail tub)
?~ t.q.tub
(fail tub)
?: =(i.t.q.tub '-')
?~ t.t.q.tub
(fail tub)
?: =(i.t.t.q.tub ']')
(;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub)
(fail tub)
(;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub)
(cook |=(a=[@ @] (con a)) ;~(plug seap sade))
==
::
XX document
### ++sade
++ sade
%+ knee *@ |. ~+
;~ pose
(cold (bex '-') (jest '-]'))
(cold 0 ser)
(cook |=([p=@ q=@] `@`(con p q)) ;~(plug seap sade))
==
::
XX document
### ++seap
++ seap
%+ knee *@ |. ~+
;~ pose
unid
%+ ifix (jest '[:')^(jest ':]')
;~(pose ;~(pfix ket (cook flap chas)) chas)
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
;~(plug asp ;~(pfix hep alb))
|= tub=nail
?~ q.tub
(fail tub)
?~ t.q.tub
((cook bex les) tub)
?: =(i.t.q.tub '-')
?~ t.t.q.tub
((cook bex les) tub)
?: =(i.t.t.q.tub ']')
((cook bex les) tub)
(fail tub)
((cook bex les) tub)
;~(pfix bas escd)
==
::
XX document
### ++cape
++ cape
%+ knee *tape |. ~+
;~ pose
(cold ~ (jest '\\E'))
;~(plug next cape)
(cook |=(a=char (tape [a ~])) next)
(full (easy ~))
==
XX document
### ++chas
++ chas :: ascii character set
=- (sear ~(get by -) sym)
%- mo ^- (list ,[@tas @I])
:~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl
digit/digit graph/graph lower/lower print/print punct/punct
space/space upper/upper word/wordc xdigit/xdigit
==
:: Character sets
++ alnum :(con lower upper digit)
XX document
###++alpha
++ alpha :(con lower upper)
XX document
###++ascii
++ ascii (ranc 0 127)
++ blank (con (bex 32) (bex 9))
XX document
### ++cntrl
++ cntrl :(con (ranc 0 31) (bex 127))
XX document
### ++digit
++ digit (ranc '0' '9')
XX document
### ++graph
++ graph (ranc 33 126)
XX document
### ++lower
++ lower (ranc 'a' 'z')
XX document
### ++print
++ print (ranc 32 126)
XX document
### ++punct
++ punct ;: con
(ranc '!' '/')
(ranc ':' '@')
(ranc '[' '`')
(ranc '{' '~')
==
XX document
### ++space
++ space :(con (ranc 9 13) (bex ' '))
XX document
### ++upper
++ upper (ranc 'A' 'Z')
XX document
### ++white
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
XX document
### ++wordc
++ wordc :(con digit lower upper (bex '_'))
XX document
### ++xdigit
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
::
XX document
### ++chad
++ chad
%+ knee *rege |. ~+
;~(pose (stag %lite nor) (stag %brac unid) ;~(pfix bas escp))
::
XX document
### ++escd
++ escd
%+ knee *@ |. ~+
;~ pose
(cold (bex 7) (just 'a'))
(cold (bex 9) (just 't'))
(cold (bex 10) (just 'n'))
(cold (bex 11) (just 'v'))
(cold (bex 12) (just 'f'))
(cold (bex 13) (just 'r'))
(cold (bex 0) (just '0'))
(sear |=(a=@ ?:((lth a 256) (some (bex a)) ~)) (bass 8 (stun [2 3] cit)))
(cook bex ;~(pfix (just 'x') (bass 16 (stun [2 2] hit))))
(cook bex (ifix [(jest 'x{') ker] (bass 16 (stun [2 2] hit))))
(cook bex mis)
==
::
XX document
### ++escp
++ escp
%+ knee *rege |. ~+
;~ pose
(cold %empt (just 'Q'))
(cold [%lite `@tD`0] (just '0'))
(cold [%lite `@tD`7] (just 'a'))
(cold [%lite `@tD`9] (just 't'))
(cold [%lite `@tD`10] (just 'n'))
(cold [%lite `@tD`11] (just 'v'))
(cold [%lite `@tD`12] (just 'f'))
(cold [%lite `@tD`13] (just 'r'))
(sear |=(a=@ ?:((lth a 256) (some [%lite a]) ~)) (bass 8 (stun [2 3] cit)))
(stag %lite ;~(pfix (just 'x') (bass 16 (stun [2 2] hit))))
(stag %lite (ifix [(jest 'x{') ker] (bass 16 (stun [2 2] hit))))
(cold %dote (just 'C'))
(cold %sart (just 'A'))
(cold %ende (just 'z'))
(cold %boun (just 'b'))
(cold %bout (just 'B'))
(stag %brac (cold wordc (just 'w')))
(stag %brac (cold (flap wordc) (just 'W')))
(stag %lite mis)
==
::
XX document
### ++unid
++ unid
%+ knee *@ |. ~+
;~ pose
(cold digit (jest '\\d'))
(cold (flap digit) (jest '\\D'))
(cold white (jest '\\s'))
(cold (flap white) (jest '\\S'))
(cold wordc (jest '\\w'))
(cold (flap wordc) (jest '\\W'))
==
--
::
XX document
### ++ra
++ ra :: regex engine
|_ a=rege
XX document
### ++proc
++ proc :: capture numbering
|= b=@
=- -(+ +>.$(a a))
^- [p=@ a=rege]
?- a
[%capt *] =+ foo=$(a p.a, b +(b))
[p.foo [%capt a.foo b]]
[%eith *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b p.foo)
[p.bar [%eith a.foo a.bar]]
[%pair *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b p.foo)
[p.bar [%pair a.foo a.bar]]
[%manl *] =+ foo=$(a p.a)
[p.foo [%manl a.foo]]
[%plll *] =+ foo=$(a p.a)
[p.foo [%plll a.foo]]
[%binl *] =+ foo=$(a p.a)
[p.foo [%binl a.foo q.a]]
[%betl *] =+ foo=$(a p.a)
[p.foo [%betl a.foo q.a r.a]]
[%mant *] =+ foo=$(a p.a)
[p.foo [%mant a.foo]]
[%plls *] =+ foo=$(a p.a)
[p.foo [%plls a.foo]]
[%bant *] =+ foo=$(a p.a)
[p.foo [%bant a.foo q.a]]
[%bint *] =+ foo=$(a p.a)
[p.foo [%bint a.foo q.a]]
[%betw *] =+ foo=$(a p.a)
[p.foo [%betw a.foo q.a r.a]]
* [b a]
==
::
XX document
### ++cont
++ cont
|= [a=(map ,@u tape) b=(map ,@u tape)]
(~(gas by _(map ,@u tape)) (weld (~(tap by a)) (~(tap by b))))
::
XX document
### ++abor
++ abor
|= [a=char b=(unit ,[tape (map ,@u tape)])]
^- (unit ,[tape (map ,@u tape)])
?~ b
b
[~ [[a -.u.b] +.u.b]]
::
XX document
### ++matc
++ matc
|= [b=tape c=tape]
^- (unit (map ,@u tape))
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c)
(bind foo |*(a=^ (~(put by +.a) 0 -.a)))
::
XX document
### ++chet
++ chet
|= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape]
^- (unit ,[tape (map ,@u tape)])
?~ b
b
?~ -.u.b
b
=+ bar=(deep (slag (lent -.u.b) c) %empt d)
?~ bar
bar
b
XX document
### ++blak
++ blak (some ["" _(map ,@u tape)])
XX document
### ++word
++ word |=(a=char =((dis wordc:rags (bex a)) 0))
XX document
### ++deep
++ deep
|= [b=tape c=rege d=tape]
^- (unit ,[tape (map ,@u tape)])
?- a
%dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)]))
%ende ?~(b blak ~)
%sart ?:(=(b d) blak ~)
%empt blak
%boun =+ ^= luc
?: =(b d)
&
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
(word -.foo)
=+ cuc=?~(b & (word -.b))
?:(!=(luc cuc) blak ~)
%bout =+ ^= luc
?: =(b d)
&
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
(word -.foo)
=+ cuc=?~(b & (word -.b))
?:(=(luc cuc) blak ~)
[%capt *] =+ foo=$(a p.a)
?~ foo
foo
=+ ft=u.foo
=+ bar=$(a c, b (slag (lent -.ft) b), c %empt)
?~ bar
bar
[~ [-.ft (~(put by +.ft) q.a -.ft)]]
[%lite *] ?~(b ~ ?:(=(i.b p.a) (some [[i.b ~] _(map ,@u tape)]) ~))
[%brac *] ?~ b
~
?. =((dis (bex `@`i.b) p.a) 0)
(some [[i.b ~] _(map ,@u tape)])
~
[%eith *] =+ foo=(chet(a c) $(a p.a) b d)
=+ bar=(chet(a c) $(a q.a) b d)
?~ foo
bar
?~ bar
foo
=+ ft=u.foo
=+ bt=u.bar
?: (gte (lent -.ft) (lent -.bt))
foo
bar
[%pair *] =+ foo=$(a p.a, c [%pair q.a c])
?~ foo
foo
=+ ft=u.foo
=+ bar=$(a q.a, b (slag (lent -.ft) b))
?~ bar
bar
=+ bt=u.bar
[~ [(weld -.ft -.bt) (cont +.ft +.bt)]]
[%manl *] =+ foo=$(a p.a)
?~ foo
blak
?~ -.u.foo
blak
$(a [%eith %empt [%pair p.a [%eith %empt a]]])
[%mant *] =+ foo=$(a p.a)
?~ foo
blak
=+ ft=u.foo
?~ -.ft
blak
$(a [%eith [%pair p.a [%eith a %empt]] %empt])
[%plls *] $(a [%pair p.a [%mant p.a]])
[%plll *] $(a [%pair p.a [%manl p.a]])
[%binl *] =+ min=?:(=(q.a 0) 0 (dec q.a))
?: =(q.a 0)
$(a [%manl p.a])
$(a [%pair p.a [%binl p.a min]])
[%bant *] ?: =(0 q.a)
blak
$(a [%pair p.a [%bant p.a (dec q.a)]])
[%bint *] =+ min=?:(=(q.a 0) 0 (dec q.a))
?: =(q.a 0)
$(a [%mant p.a])
$(a [%pair p.a [%bint p.a min]])
[%betw *] ?: =(0 r.a)
blak
?: =(q.a 0)
$(a [%eith [%pair p.a [%betw p.a 0 (dec r.a)]] %empt])
$(a [%pair p.a [%betw p.a (dec q.a) (dec r.a)]])
[%betl *] ?: =(0 r.a)
blak
?: =(q.a 0)
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
==
--
::
XX document
### ++rexp
++ rexp :: Regex match
~/ %rexp
|= [a=tape b=tape]
^- (unit (unit (map ,@u tape)))
=+ ^= bar
|= [a=@ b=(map ,@u tape)]
?: =(a 0)
b
=+ c=(~(get by b) a)
?~ c
$(a (dec a), b (~(put by b) a ""))
$(a (dec a))
=+ par=(pars a)
?~ par ~
=+ poc=(~(proc ra u.par) 1)
=+ c=b
|-
=+ foo=(matc:poc c b)
?~ foo
?~ c
[~ ~]
$(c t.c)
[~ [~ (bar (dec p.poc) u.foo)]]
::
XX document
### ++repg
++ repg :: Global regex replace
~/ %repg
|= [a=tape b=tape c=tape]
^- (unit tape)
=+ par=(pars a)
?~ par ~
=+ poc=(~(proc ra u.par) 1)
=+ d=b
:- ~
|-
^- tape
=+ foo=(matc:poc d b)
?~ foo
?~ d
~
[i.d $(d t.d)]
=+ ft=(need (~(get by u.foo) 0))
?~ d
c
(weld c $(d `tape`(slag (lent ft) `tape`d)))
::::::::::::::::::::::::::::::::::::::::::::::::::::::::::