urbit/pub/docs/dev/hoon/library/2em.md

759 lines
18 KiB
Markdown
Raw Normal View History

2015-02-18 06:03:21 +03:00
section 2eM, regular-expressions
================================
2015-08-12 00:32:57 +03:00
### `++pars`
2015-02-18 06:03:21 +03:00
++ pars
|= [a=tape] :: parse tape to rege
^- (unit rege)
=+ foo=((full apex:rags) [[1 1] a])
?~ q.foo
~
[~ p.u.q.foo]
::
Parse regular expression
~zod/try=> (pars "samo")
[ ~
[ %pair
p=[%lite p=~~s]
q=[%pair p=[%lite p=~~a] q=[%pair p=[%lite p=~~m] q=[%lite p=~~o]]]
]
]
~zod/try=> (pars "so[,.0-9]")
[ ~
[ %pair
p=[%lite p=~~s]
q=[%pair p=[%lite p=~~o] q=[%brac p=288.036.862.105.223.168]]
]
]
~zod/try=> `@ub`288.036.862.105.223.168
0b11.1111.1111.0101.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000
~zod/try=> `@ub`(lsh 0 `@`'9' 1)
0b10.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000
~zod/try=> `@ub`(roll (turn ",.0123456789" |=(a=@ (lsh 0 a 1))) con)
0b11.1111.1111.0101.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000.0000
~zod/try=> (pars "sop.*")
[ ~
[ %pair
p=[%lite p=~~s]
q=[%pair p=[%lite p=~~o] q=[%pair p=[%lite p=~~p] q=[%mant p=%dote]]]
]
]
~zod/try=> (pars "(hel)?")
[ ~
[ %eith
p
[ %capt
p=[%pair p=[%lite p=~~h] q=[%pair p=[%lite p=~~e] q=[%lite p=~~l]]]
q=0
]
q=%empt
]
]
~zod/try=> (pars "(hel)??")
[ ~
[ %eith
p=%empt
q
[ %capt
p=[%pair p=[%lite p=~~h] q=[%pair p=[%lite p=~~e] q=[%lite p=~~l]]]
q=0
]
]
]
~zod/try=> (pars "a\{1,20}")
[~ [%betw p=[%lite p=~~a] q=1 r=20]]
2015-08-12 00:32:57 +03:00
### `++rags`
2015-02-18 06:03:21 +03:00
++ rags :: rege parsers
=> |%
Regex parser arms
2015-08-12 00:32:57 +03:00
### `++nor`
2015-02-18 06:03:21 +03:00
++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char
XX document
2015-08-12 00:32:57 +03:00
### `++les`
2015-02-18 06:03:21 +03:00
++ les ;~(less bas asp) :: not backslash
XX document
2015-08-12 00:32:57 +03:00
### `++lep`
2015-02-18 06:03:21 +03:00
++ lep ;~(less (mask "^[]\\") asp) :: charset non-control
XX document
2015-08-12 00:32:57 +03:00
### `++asp`
2015-02-18 06:03:21 +03:00
++ asp (shim 32 126) :: printable ascii
XX document
2015-08-12 00:32:57 +03:00
### `++alb`
2015-02-18 06:03:21 +03:00
++ alb ;~(less ser asp) :: charset literal char
XX document
2015-08-12 00:32:57 +03:00
### `++mis`
2015-02-18 06:03:21 +03:00
++ mis ;~(less aln asp) :: non alphanumeric
--
|%
XX document
2015-08-12 00:32:57 +03:00
### `++apex`
2015-02-18 06:03:21 +03:00
++ apex :: top level
%+ knee *rege |. ~+
;~ pose
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex))
(stag %eith ;~(plug (easy %empt) ;~(pfix bar apex)))
(easy %empt)
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++mall`
2015-02-18 06:03:21 +03:00
++ mall
%+ knee *rege |. ~+
;~((bend |=(a=[rege rege] (some [%pair a]))) bets mall)
::
XX document
2015-08-12 00:32:57 +03:00
### `++bets`
2015-02-18 06:03:21 +03:00
++ bets
%+ knee *rege |. ~+
|= tub=nail
=+ vex=(chun tub)
?~ q.vex
vex
=+ a=p.u.q.vex
%- ;~ pose
(cold [%eith %empt a] (jest '??'))
(cold [%manl a] (jest '*?'))
(cold [%plll a] (jest '+?'))
(cold [%eith a %empt] wut)
(cold [%mant a] tar)
(cold [%plls a] lus)
(stag %betl ;~(plug (easy a) ;~(sfix rang wut)))
(stag %betw ;~(plug (easy a) rang))
(stag %binl ;~(plug (easy a) (ifix [kel (jest ',}?')] dim:ag)))
(stag %bant ;~(plug (easy a) (ifix [kel (jest '}?')] dim:ag)))
(stag %bant ;~(plug (easy a) (ifix [kel ker] dim:ag)))
(stag %bint ;~(plug (easy a) (ifix [kel (jest ',}')] dim:ag)))
(easy a)
==
q.u.q.vex
::
XX document
2015-08-12 00:32:57 +03:00
### `++ranc`
2015-02-18 06:03:21 +03:00
++ ranc
|= [a=@ b=@]
^- @
?:((gth a b) 0 (con (bex a) $(a +(a))))
::
XX document
2015-08-12 00:32:57 +03:00
### `++flap`
2015-02-18 06:03:21 +03:00
++ flap |=(a=@ (mix a (dec (bex 256))))
::
XX document
2015-08-12 00:32:57 +03:00
### `++rang`
2015-02-18 06:03:21 +03:00
++ rang
%+ sear |=([a=@ b=@] ?:((lte a b) (some [a b]) ~))
(ifix [kel ker] ;~(plug dim:ag ;~(pfix com dim:ag)))
::
XX document
2015-08-12 00:32:57 +03:00
### `++chun`
2015-02-18 06:03:21 +03:00
++ chun
%+ knee *rege |. ~+
;~ pose
(cold %ende buc)
(cold %sart ket)
(cold %dote dot)
%+ cook |=(a=(list char) (reel a |=([p=char q=rege] [%pair [%lite p] q])))
;~(pfix (jest '\\Q') cape)
|= tub=nail
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
=+ bar=(foo tub)
?~(q.bar (chad tub) (fail tub))
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex))
%+ cook |=([a=rege] [%capt a 0])
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex)
(ifix [(jest '(?:') per] apex)
(stag %brac ;~(pfix sel seac))
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++seac`
2015-02-18 06:03:21 +03:00
++ seac
|= tub=nail
?~ q.tub
(fail tub)
?: =(i.q.tub '^')
(;~(pfix ket (cook flap sead)) tub)
(sead tub)
::
XX document
2015-08-12 00:32:57 +03:00
### `++sead`
2015-02-18 06:03:21 +03:00
++ sead
%+ knee *@ |. ~+
;~ pose
|= tub=nail
?~ q.tub
(fail tub)
?. =(i.q.tub ']')
(fail tub)
?~ t.q.tub
(fail tub)
?: =(i.t.q.tub '-')
?~ t.t.q.tub
(fail tub)
?: =(i.t.t.q.tub ']')
(;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub)
(fail tub)
(;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub)
|= tub=nail
?~ q.tub
(fail tub)
?. =(i.q.tub '-')
(fail tub)
?~ t.q.tub
(fail tub)
?: =(i.t.q.tub '-')
?~ t.t.q.tub
(fail tub)
?: =(i.t.t.q.tub ']')
(;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub)
(fail tub)
(;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub)
(cook |=(a=[@ @] (con a)) ;~(plug seap sade))
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++sade`
2015-02-18 06:03:21 +03:00
++ sade
%+ knee *@ |. ~+
;~ pose
(cold (bex '-') (jest '-]'))
(cold 0 ser)
(cook |=([p=@ q=@] `@`(con p q)) ;~(plug seap sade))
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++seap`
2015-02-18 06:03:21 +03:00
++ seap
%+ knee *@ |. ~+
;~ pose
unid
%+ ifix (jest '[:')^(jest ':]')
;~(pose ;~(pfix ket (cook flap chas)) chas)
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
;~(plug asp ;~(pfix hep alb))
|= tub=nail
?~ q.tub
(fail tub)
?~ t.q.tub
((cook bex les) tub)
?: =(i.t.q.tub '-')
?~ t.t.q.tub
((cook bex les) tub)
?: =(i.t.t.q.tub ']')
((cook bex les) tub)
(fail tub)
((cook bex les) tub)
;~(pfix bas escd)
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++cape`
2015-02-18 06:03:21 +03:00
++ cape
%+ knee *tape |. ~+
;~ pose
(cold ~ (jest '\\E'))
;~(plug next cape)
(cook |=(a=char (tape [a ~])) next)
(full (easy ~))
==
XX document
2015-08-12 00:32:57 +03:00
### `++chas`
2015-02-18 06:03:21 +03:00
++ chas :: ascii character set
=- (sear ~(get by -) sym)
%- mo ^- (list ,[@tas @I])
:~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl
digit/digit graph/graph lower/lower print/print punct/punct
space/space upper/upper word/wordc xdigit/xdigit
==
:: Character sets
++ alnum :(con lower upper digit)
XX document
###++alpha
++ alpha :(con lower upper)
XX document
###++ascii
++ ascii (ranc 0 127)
++ blank (con (bex 32) (bex 9))
XX document
2015-08-12 00:32:57 +03:00
### `++cntrl`
2015-02-18 06:03:21 +03:00
++ cntrl :(con (ranc 0 31) (bex 127))
XX document
2015-08-12 00:32:57 +03:00
### `++digit`
2015-02-18 06:03:21 +03:00
++ digit (ranc '0' '9')
XX document
2015-08-12 00:32:57 +03:00
### `++graph`
2015-02-18 06:03:21 +03:00
++ graph (ranc 33 126)
XX document
2015-08-12 00:32:57 +03:00
### `++lower`
2015-02-18 06:03:21 +03:00
++ lower (ranc 'a' 'z')
XX document
2015-08-12 00:32:57 +03:00
### `++print`
2015-02-18 06:03:21 +03:00
++ print (ranc 32 126)
XX document
2015-08-12 00:32:57 +03:00
### `++punct`
2015-02-18 06:03:21 +03:00
++ punct ;: con
(ranc '!' '/')
(ranc ':' '@')
(ranc '[' '`')
(ranc '{' '~')
==
XX document
2015-08-12 00:32:57 +03:00
### `++space`
2015-02-18 06:03:21 +03:00
++ space :(con (ranc 9 13) (bex ' '))
XX document
2015-08-12 00:32:57 +03:00
### `++upper`
2015-02-18 06:03:21 +03:00
++ upper (ranc 'A' 'Z')
XX document
2015-08-12 00:32:57 +03:00
### `++white`
2015-02-18 06:03:21 +03:00
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
XX document
2015-08-12 00:32:57 +03:00
### `++wordc`
2015-02-18 06:03:21 +03:00
++ wordc :(con digit lower upper (bex '_'))
XX document
2015-08-12 00:32:57 +03:00
### `++xdigit`
2015-02-18 06:03:21 +03:00
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
::
XX document
2015-08-12 00:32:57 +03:00
### `++chad`
2015-02-18 06:03:21 +03:00
++ chad
%+ knee *rege |. ~+
;~(pose (stag %lite nor) (stag %brac unid) ;~(pfix bas escp))
::
XX document
2015-08-12 00:32:57 +03:00
### `++escd`
2015-02-18 06:03:21 +03:00
++ escd
%+ knee *@ |. ~+
;~ pose
(cold (bex 7) (just 'a'))
(cold (bex 9) (just 't'))
(cold (bex 10) (just 'n'))
(cold (bex 11) (just 'v'))
(cold (bex 12) (just 'f'))
(cold (bex 13) (just 'r'))
(cold (bex 0) (just '0'))
(sear |=(a=@ ?:((lth a 256) (some (bex a)) ~)) (bass 8 (stun [2 3] cit)))
(cook bex ;~(pfix (just 'x') (bass 16 (stun [2 2] hit))))
(cook bex (ifix [(jest 'x{') ker] (bass 16 (stun [2 2] hit))))
(cook bex mis)
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++escp`
2015-02-18 06:03:21 +03:00
++ escp
%+ knee *rege |. ~+
;~ pose
(cold %empt (just 'Q'))
(cold [%lite `@tD`0] (just '0'))
(cold [%lite `@tD`7] (just 'a'))
(cold [%lite `@tD`9] (just 't'))
(cold [%lite `@tD`10] (just 'n'))
(cold [%lite `@tD`11] (just 'v'))
(cold [%lite `@tD`12] (just 'f'))
(cold [%lite `@tD`13] (just 'r'))
(sear |=(a=@ ?:((lth a 256) (some [%lite a]) ~)) (bass 8 (stun [2 3] cit)))
(stag %lite ;~(pfix (just 'x') (bass 16 (stun [2 2] hit))))
(stag %lite (ifix [(jest 'x{') ker] (bass 16 (stun [2 2] hit))))
(cold %dote (just 'C'))
(cold %sart (just 'A'))
(cold %ende (just 'z'))
(cold %boun (just 'b'))
(cold %bout (just 'B'))
(stag %brac (cold wordc (just 'w')))
(stag %brac (cold (flap wordc) (just 'W')))
(stag %lite mis)
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++unid`
2015-02-18 06:03:21 +03:00
++ unid
%+ knee *@ |. ~+
;~ pose
(cold digit (jest '\\d'))
(cold (flap digit) (jest '\\D'))
(cold white (jest '\\s'))
(cold (flap white) (jest '\\S'))
(cold wordc (jest '\\w'))
(cold (flap wordc) (jest '\\W'))
==
--
::
XX document
2015-08-12 00:32:57 +03:00
### `++ra`
2015-02-18 06:03:21 +03:00
++ ra :: regex engine
|_ a=rege
XX document
2015-08-12 00:32:57 +03:00
### `++proc`
2015-02-18 06:03:21 +03:00
++ proc :: capture numbering
|= b=@
=- -(+ +>.$(a a))
^- [p=@ a=rege]
?- a
[%capt *] =+ foo=$(a p.a, b +(b))
[p.foo [%capt a.foo b]]
[%eith *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b p.foo)
[p.bar [%eith a.foo a.bar]]
[%pair *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b p.foo)
[p.bar [%pair a.foo a.bar]]
[%manl *] =+ foo=$(a p.a)
[p.foo [%manl a.foo]]
[%plll *] =+ foo=$(a p.a)
[p.foo [%plll a.foo]]
[%binl *] =+ foo=$(a p.a)
[p.foo [%binl a.foo q.a]]
[%betl *] =+ foo=$(a p.a)
[p.foo [%betl a.foo q.a r.a]]
[%mant *] =+ foo=$(a p.a)
[p.foo [%mant a.foo]]
[%plls *] =+ foo=$(a p.a)
[p.foo [%plls a.foo]]
[%bant *] =+ foo=$(a p.a)
[p.foo [%bant a.foo q.a]]
[%bint *] =+ foo=$(a p.a)
[p.foo [%bint a.foo q.a]]
[%betw *] =+ foo=$(a p.a)
[p.foo [%betw a.foo q.a r.a]]
* [b a]
==
::
XX document
2015-08-12 00:32:57 +03:00
### `++cont`
2015-02-18 06:03:21 +03:00
++ cont
|= [a=(map ,@u tape) b=(map ,@u tape)]
(~(gas by _(map ,@u tape)) (weld (~(tap by a)) (~(tap by b))))
::
XX document
2015-08-12 00:32:57 +03:00
### `++abor`
2015-02-18 06:03:21 +03:00
++ abor
|= [a=char b=(unit ,[tape (map ,@u tape)])]
^- (unit ,[tape (map ,@u tape)])
?~ b
b
[~ [[a -.u.b] +.u.b]]
::
XX document
2015-08-12 00:32:57 +03:00
### `++matc`
2015-02-18 06:03:21 +03:00
++ matc
|= [b=tape c=tape]
^- (unit (map ,@u tape))
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c)
(bind foo |*(a=^ (~(put by +.a) 0 -.a)))
::
XX document
2015-08-12 00:32:57 +03:00
### `++chet`
2015-02-18 06:03:21 +03:00
++ chet
|= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape]
^- (unit ,[tape (map ,@u tape)])
?~ b
b
?~ -.u.b
b
=+ bar=(deep (slag (lent -.u.b) c) %empt d)
?~ bar
bar
b
XX document
2015-08-12 00:32:57 +03:00
### `++blak`
2015-02-18 06:03:21 +03:00
++ blak (some ["" _(map ,@u tape)])
XX document
2015-08-12 00:32:57 +03:00
### `++word`
2015-02-18 06:03:21 +03:00
++ word |=(a=char =((dis wordc:rags (bex a)) 0))
XX document
2015-08-12 00:32:57 +03:00
### `++deep`
2015-02-18 06:03:21 +03:00
++ deep
|= [b=tape c=rege d=tape]
^- (unit ,[tape (map ,@u tape)])
?- a
%dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)]))
%ende ?~(b blak ~)
%sart ?:(=(b d) blak ~)
%empt blak
%boun =+ ^= luc
?: =(b d)
&
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
(word -.foo)
=+ cuc=?~(b & (word -.b))
?:(!=(luc cuc) blak ~)
%bout =+ ^= luc
?: =(b d)
&
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
(word -.foo)
=+ cuc=?~(b & (word -.b))
?:(=(luc cuc) blak ~)
[%capt *] =+ foo=$(a p.a)
?~ foo
foo
=+ ft=u.foo
=+ bar=$(a c, b (slag (lent -.ft) b), c %empt)
?~ bar
bar
[~ [-.ft (~(put by +.ft) q.a -.ft)]]
[%lite *] ?~(b ~ ?:(=(i.b p.a) (some [[i.b ~] _(map ,@u tape)]) ~))
[%brac *] ?~ b
~
?. =((dis (bex `@`i.b) p.a) 0)
(some [[i.b ~] _(map ,@u tape)])
~
[%eith *] =+ foo=(chet(a c) $(a p.a) b d)
=+ bar=(chet(a c) $(a q.a) b d)
?~ foo
bar
?~ bar
foo
=+ ft=u.foo
=+ bt=u.bar
?: (gte (lent -.ft) (lent -.bt))
foo
bar
[%pair *] =+ foo=$(a p.a, c [%pair q.a c])
?~ foo
foo
=+ ft=u.foo
=+ bar=$(a q.a, b (slag (lent -.ft) b))
?~ bar
bar
=+ bt=u.bar
[~ [(weld -.ft -.bt) (cont +.ft +.bt)]]
[%manl *] =+ foo=$(a p.a)
?~ foo
blak
?~ -.u.foo
blak
$(a [%eith %empt [%pair p.a [%eith %empt a]]])
[%mant *] =+ foo=$(a p.a)
?~ foo
blak
=+ ft=u.foo
?~ -.ft
blak
$(a [%eith [%pair p.a [%eith a %empt]] %empt])
[%plls *] $(a [%pair p.a [%mant p.a]])
[%plll *] $(a [%pair p.a [%manl p.a]])
[%binl *] =+ min=?:(=(q.a 0) 0 (dec q.a))
?: =(q.a 0)
$(a [%manl p.a])
$(a [%pair p.a [%binl p.a min]])
[%bant *] ?: =(0 q.a)
blak
$(a [%pair p.a [%bant p.a (dec q.a)]])
[%bint *] =+ min=?:(=(q.a 0) 0 (dec q.a))
?: =(q.a 0)
$(a [%mant p.a])
$(a [%pair p.a [%bint p.a min]])
[%betw *] ?: =(0 r.a)
blak
?: =(q.a 0)
$(a [%eith [%pair p.a [%betw p.a 0 (dec r.a)]] %empt])
$(a [%pair p.a [%betw p.a (dec q.a) (dec r.a)]])
[%betl *] ?: =(0 r.a)
blak
?: =(q.a 0)
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
==
--
::
XX document
2015-08-12 00:32:57 +03:00
### `++rexp`
2015-02-18 06:03:21 +03:00
++ rexp :: Regex match
~/ %rexp
|= [a=tape b=tape]
^- (unit (unit (map ,@u tape)))
=+ ^= bar
|= [a=@ b=(map ,@u tape)]
?: =(a 0)
b
=+ c=(~(get by b) a)
?~ c
$(a (dec a), b (~(put by b) a ""))
$(a (dec a))
=+ par=(pars a)
?~ par ~
=+ poc=(~(proc ra u.par) 1)
=+ c=b
|-
=+ foo=(matc:poc c b)
?~ foo
?~ c
[~ ~]
$(c t.c)
[~ [~ (bar (dec p.poc) u.foo)]]
::
XX document
2015-08-12 00:32:57 +03:00
### `++repg`
2015-02-18 06:03:21 +03:00
++ repg :: Global regex replace
~/ %repg
|= [a=tape b=tape c=tape]
^- (unit tape)
=+ par=(pars a)
?~ par ~
=+ poc=(~(proc ra u.par) 1)
=+ d=b
:- ~
|-
^- tape
=+ foo=(matc:poc d b)
?~ foo
?~ d
~
[i.d $(d t.d)]
=+ ft=(need (~(get by u.foo) 0))
?~ d
c
(weld c $(d `tape`(slag (lent ft) `tape`d)))
::::::::::::::::::::::::::::::::::::::::::::::::::::::::::