mirror of
https://github.com/urbit/shrub.git
synced 2025-01-07 05:26:56 +03:00
- regex
This commit is contained in:
parent
cfa3183640
commit
01becd1f7c
437
arvo/hoon.hoon
437
arvo/hoon.hoon
@ -639,6 +639,7 @@
|
||||
::
|
||||
++ need :: demand
|
||||
|* a=(unit)
|
||||
~| %need
|
||||
?~ a !!
|
||||
u.a
|
||||
::
|
||||
@ -3411,11 +3412,13 @@
|
||||
==
|
||||
::
|
||||
%u
|
||||
?: ?=(%c hay)
|
||||
%+ welp ['0' 'c' (reap (pad:fa q.p.lot) '1')]
|
||||
(c-co (enc:fa q.p.lot))
|
||||
=- (weld p.gam ?:(=(0 q.p.lot) `tape`['0' ~] q.gam))
|
||||
^= gam ^- [p=tape q=tape]
|
||||
?+ hay [~ ((ox-co [10 3] |=(a=@ ~(d ne a))) q.p.lot)]
|
||||
%b [['0' 'b' ~] ((ox-co [2 4] |=(a=@ ~(d ne a))) q.p.lot)]
|
||||
%c [['0' 'c' (reap (pad:fa q.p.lot) '1')] (c-co (enc:fa q.p.lot))]
|
||||
%i [['0' 'i' ~] ((d-co 1) q.p.lot)]
|
||||
%x [['0' 'x' ~] ((ox-co [16 4] |=(a=@ ~(x ne a))) q.p.lot)]
|
||||
%v [['0' 'v' ~] ((ox-co [32 5] |=(a=@ ~(x ne a))) q.p.lot)]
|
||||
@ -3696,437 +3699,7 @@
|
||||
::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||
:: section 2eM, regular-expressions ::
|
||||
::
|
||||
++ pars
|
||||
|= [a=tape] :: parse tape to rege
|
||||
^- (unit rege)
|
||||
=+ foo=((full apex:rags) [[1 1] a])
|
||||
?~ q.foo
|
||||
~
|
||||
[~ p.u.q.foo]
|
||||
::
|
||||
++ rags :: rege parsers
|
||||
=> |%
|
||||
++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char
|
||||
++ les ;~(less bas asp) :: not backslash
|
||||
++ lep ;~(less (mask "-^[]\\") asp) :: charset non-control
|
||||
++ asp (shim 32 126) :: printable ascii
|
||||
++ alb ;~(less ser asp) :: charset literal char
|
||||
++ mis ;~(less aln asp) :: non alphanumeric
|
||||
--
|
||||
|%
|
||||
++ apex :: top level
|
||||
%+ knee *rege |. ~+
|
||||
;~ pose
|
||||
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex))
|
||||
(stag %eith ;~(plug (easy %empt) ;~(pfix bar apex)))
|
||||
(easy %empt)
|
||||
==
|
||||
::
|
||||
++ mall
|
||||
%+ knee *rege |. ~+
|
||||
;~((bend |=(a=[rege rege] (some [%pair a]))) bets mall)
|
||||
::
|
||||
++ bets
|
||||
%+ knee *rege |. ~+
|
||||
|= tub=nail
|
||||
=+ vex=(chun tub)
|
||||
?~ q.vex
|
||||
vex
|
||||
=+ a=p.u.q.vex
|
||||
%- ;~ pose
|
||||
(cold [%eith %empt a] (jest '??'))
|
||||
(cold [%manl a] (jest '*?'))
|
||||
(cold [%plll a] (jest '+?'))
|
||||
(cold [%eith a %empt] wut)
|
||||
(cold [%mant a] tar)
|
||||
(cold [%plls a] lus)
|
||||
(stag %betl ;~(plug (easy a) ;~(sfix rang wut)))
|
||||
(stag %betw ;~(plug (easy a) rang))
|
||||
(stag %binl ;~(plug (easy a) (ifix [kel (jest ',}?')] dim:ag)))
|
||||
(stag %bant ;~(plug (easy a) (ifix [kel (jest '}?')] dim:ag)))
|
||||
(stag %bant ;~(plug (easy a) (ifix [kel ker] dim:ag)))
|
||||
(stag %bint ;~(plug (easy a) (ifix [kel (jest ',}')] dim:ag)))
|
||||
(easy a)
|
||||
==
|
||||
q.u.q.vex
|
||||
::
|
||||
++ ranc
|
||||
|= [a=@ b=@]
|
||||
^- @
|
||||
?:((gth a b) 0 (con (bex a) $(a +(a))))
|
||||
::
|
||||
++ flap |=(a=@ (mix a (dec (bex 256))))
|
||||
::
|
||||
++ rang
|
||||
%+ sear |=([a=@ b=@] ?:((lte a b) (some [a b]) ~))
|
||||
(ifix [kel ker] ;~(plug dim:ag ;~(pfix com dim:ag)))
|
||||
::
|
||||
++ chun
|
||||
%+ knee *rege |. ~+
|
||||
;~ pose
|
||||
(cold %ende buc)
|
||||
(cold %sart ket)
|
||||
(cold %dote dot)
|
||||
%+ cook |=(a=(list char) (reel a |=([p=char q=rege] [%pair [%lite p] q])))
|
||||
;~(pfix (jest '\\Q') cape)
|
||||
|= tub=nail
|
||||
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
|
||||
=+ bar=(foo tub)
|
||||
?~(q.bar (chad tub) (fail tub))
|
||||
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex))
|
||||
%+ cook |=([a=rege] [%capt a 0])
|
||||
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex)
|
||||
(ifix [(jest '(?:') per] apex)
|
||||
(stag %brac ;~(pfix sel seac))
|
||||
==
|
||||
::
|
||||
++ seac
|
||||
|= tub=nail
|
||||
?~ q.tub
|
||||
(fail tub)
|
||||
?: =(i.q.tub '^')
|
||||
(;~(pfix ket (cook flap sead)) tub)
|
||||
(sead tub)
|
||||
::
|
||||
++ sead
|
||||
%+ knee *@ |. ~+
|
||||
;~ pose
|
||||
|= tub=nail
|
||||
?~ q.tub
|
||||
(fail tub)
|
||||
?. =(i.q.tub ']')
|
||||
(fail tub)
|
||||
?~ t.q.tub
|
||||
(fail tub)
|
||||
?: =(i.t.q.tub '-')
|
||||
?~ t.t.q.tub
|
||||
(fail tub)
|
||||
?: =(i.t.t.q.tub ']')
|
||||
(;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub)
|
||||
(fail tub)
|
||||
(;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub)
|
||||
|= tub=nail
|
||||
?~ q.tub
|
||||
(fail tub)
|
||||
?. =(i.q.tub '-')
|
||||
(fail tub)
|
||||
?~ t.q.tub
|
||||
(fail tub)
|
||||
?: =(i.t.q.tub '-')
|
||||
?~ t.t.q.tub
|
||||
(fail tub)
|
||||
?: =(i.t.t.q.tub ']')
|
||||
(;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub)
|
||||
(fail tub)
|
||||
(;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub)
|
||||
(cook |=(a=[@ @] (con a)) ;~(plug seap sade))
|
||||
==
|
||||
::
|
||||
++ sade
|
||||
%+ knee *@ |. ~+
|
||||
;~ pose
|
||||
(cold (bex '-') (jest '-]'))
|
||||
(cold 0 ser)
|
||||
(cook |=([p=@ q=@] `@`(con p q)) ;~(plug seap sade))
|
||||
==
|
||||
::
|
||||
++ seap
|
||||
%+ knee *@ |. ~+
|
||||
;~ pose
|
||||
unid
|
||||
%+ ifix (jest '[:')^(jest ':]')
|
||||
;~(pose ;~(pfix ket (cook flap chas)) chas)
|
||||
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
|
||||
;~(plug asp ;~(pfix hep alb))
|
||||
|= tub=nail
|
||||
?~ q.tub
|
||||
(fail tub)
|
||||
?~ t.q.tub
|
||||
((cook bex les) tub)
|
||||
?. =(i.t.q.tub '-')
|
||||
((cook bex les) tub)
|
||||
?~ t.t.q.tub
|
||||
((cook bex les) tub)
|
||||
?: =(i.t.t.q.tub ']')
|
||||
((cook bex les) tub)
|
||||
(fail tub)
|
||||
;~(pfix bas escd)
|
||||
==
|
||||
::
|
||||
++ cape
|
||||
%+ knee *tape |. ~+
|
||||
;~ pose
|
||||
(cold ~ (jest '\\E'))
|
||||
;~(plug next cape)
|
||||
(cook |=(a=char (tape [a ~])) next)
|
||||
(full (easy ~))
|
||||
==
|
||||
++ chas :: ascii character set
|
||||
=- (sear ~(get by -) sym)
|
||||
%- mo ^- (list ,[@tas @I])
|
||||
:~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl
|
||||
digit/digit graph/graph lower/lower print/print punct/punct
|
||||
space/space upper/upper word/wordc xdigit/xdigit
|
||||
==
|
||||
:: Character sets
|
||||
++ alnum :(con lower upper digit)
|
||||
++ alpha :(con lower upper)
|
||||
++ ascii (ranc 0 127)
|
||||
++ blank (con (bex 32) (bex 9))
|
||||
++ cntrl :(con (ranc 0 31) (bex 127))
|
||||
++ digit (ranc '0' '9')
|
||||
++ graph (ranc 33 126)
|
||||
++ lower (ranc 'a' 'z')
|
||||
++ print (ranc 32 126)
|
||||
++ punct ;: con
|
||||
(ranc '!' '/')
|
||||
(ranc ':' '@')
|
||||
(ranc '[' '`')
|
||||
(ranc '{' '~')
|
||||
==
|
||||
++ space :(con (ranc 9 13) (bex ' '))
|
||||
++ upper (ranc 'A' 'Z')
|
||||
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
|
||||
++ wordc :(con digit lower upper (bex '_'))
|
||||
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
|
||||
::
|
||||
++ chad
|
||||
%+ knee *rege |. ~+
|
||||
;~(pose (stag %lite nor) (stag %brac unid) ;~(pfix bas escp))
|
||||
::
|
||||
++ escd
|
||||
%+ cook bex
|
||||
;~ pose
|
||||
(cold 0 (just '0'))
|
||||
(sear ~(get by (mo a/7 t/9 n/10 v/11 f/12 r/13 ~)) low)
|
||||
(sear |=(a=@ ?:((lth a 256) (some a) ~)) (bass 8 (stun [2 3] cit)))
|
||||
;~(pfix (just 'x') (bass 16 (stun [2 2] hit)))
|
||||
(ifix [(jest 'x{') ker] (bass 16 (stun [2 2] hit)))
|
||||
mis
|
||||
==
|
||||
::
|
||||
++ escp
|
||||
;~ pose
|
||||
(stag %lite escd)
|
||||
(sear ~(get by (mo b/%boun w/[%brac wordc] z/%ende ~)) low)
|
||||
=- (sear ~(get by (mo -)) hig)
|
||||
~['A'^%sart 'B'^%bout 'C'^%dote 'Q'^%empt 'W'^[%brac (flap wordc)]]
|
||||
==
|
||||
::
|
||||
++ unid
|
||||
=+ cha=~(get by (mo d/digit s/white w/wordc ~))
|
||||
;~ pfix bas
|
||||
;~ pose
|
||||
(sear cha low)
|
||||
(cook flap (sear |=(a=@ (cha (add a 32))) hig))
|
||||
== ==
|
||||
--
|
||||
::
|
||||
++ ra :: regex engine
|
||||
|_ a=rege
|
||||
++ proc :: capture numbering
|
||||
|= b=@
|
||||
=- -(+ +>.$(a a))
|
||||
^- [p=@ a=rege]
|
||||
?- a
|
||||
[%capt *] =+ foo=$(a p.a, b +(b))
|
||||
[p.foo [%capt a.foo b]]
|
||||
[%eith *] =+ foo=$(a p.a)
|
||||
=+ bar=$(a q.a, b p.foo)
|
||||
[p.bar [%eith a.foo a.bar]]
|
||||
[%pair *] =+ foo=$(a p.a)
|
||||
=+ bar=$(a q.a, b p.foo)
|
||||
[p.bar [%pair a.foo a.bar]]
|
||||
[%manl *] =+ foo=$(a p.a)
|
||||
[p.foo [%manl a.foo]]
|
||||
[%plll *] =+ foo=$(a p.a)
|
||||
[p.foo [%plll a.foo]]
|
||||
[%binl *] =+ foo=$(a p.a)
|
||||
[p.foo [%binl a.foo q.a]]
|
||||
[%betl *] =+ foo=$(a p.a)
|
||||
[p.foo [%betl a.foo q.a r.a]]
|
||||
[%mant *] =+ foo=$(a p.a)
|
||||
[p.foo [%mant a.foo]]
|
||||
[%plls *] =+ foo=$(a p.a)
|
||||
[p.foo [%plls a.foo]]
|
||||
[%bant *] =+ foo=$(a p.a)
|
||||
[p.foo [%bant a.foo q.a]]
|
||||
[%bint *] =+ foo=$(a p.a)
|
||||
[p.foo [%bint a.foo q.a]]
|
||||
[%betw *] =+ foo=$(a p.a)
|
||||
[p.foo [%betw a.foo q.a r.a]]
|
||||
* [b a]
|
||||
==
|
||||
::
|
||||
++ cont
|
||||
|= [a=(map ,@u tape) b=(map ,@u tape)]
|
||||
(~(gas by *(map ,@u tape)) (weld (~(tap by a)) (~(tap by b))))
|
||||
::
|
||||
++ abor
|
||||
|= [a=char b=(unit ,[tape (map ,@u tape)])]
|
||||
^- (unit ,[tape (map ,@u tape)])
|
||||
?~ b
|
||||
b
|
||||
[~ [[a -.u.b] +.u.b]]
|
||||
::
|
||||
++ matc
|
||||
|= [b=tape c=tape]
|
||||
^- (unit (map ,@u tape))
|
||||
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c)
|
||||
(bind foo |*(a=^ (~(put by +.a) 0 -.a)))
|
||||
::
|
||||
++ chet
|
||||
|= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape]
|
||||
^- (unit ,[tape (map ,@u tape)])
|
||||
?~ b
|
||||
b
|
||||
?~ -.u.b
|
||||
b
|
||||
=+ bar=(deep (slag (lent -.u.b) c) %empt d)
|
||||
?~ bar
|
||||
bar
|
||||
b
|
||||
++ blak (some ["" *(map ,@u tape)])
|
||||
++ word |=(a=char =((dis wordc:rags (bex a)) 0))
|
||||
++ deep
|
||||
|= [b=tape c=rege d=tape]
|
||||
^- (unit ,[tape (map ,@u tape)])
|
||||
?- a
|
||||
%dote ?~(b ~ (some [[i.b ~] *(map ,@u tape)]))
|
||||
%ende ?~(b blak ~)
|
||||
%sart ?:(=(b d) blak ~)
|
||||
%empt blak
|
||||
%boun =+ ^= luc
|
||||
?: =(b d)
|
||||
&
|
||||
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
||||
(word -.foo)
|
||||
=+ cuc=?~(b & (word -.b))
|
||||
?:(!=(luc cuc) blak ~)
|
||||
%bout =+ ^= luc
|
||||
?: =(b d)
|
||||
&
|
||||
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
||||
(word -.foo)
|
||||
=+ cuc=?~(b & (word -.b))
|
||||
?:(=(luc cuc) blak ~)
|
||||
[%capt *] =+ foo=$(a p.a)
|
||||
?~ foo
|
||||
foo
|
||||
=+ ft=u.foo
|
||||
=+ bar=$(a c, b (slag (lent -.ft) b), c %empt)
|
||||
?~ bar
|
||||
bar
|
||||
[~ [-.ft (~(put by +.ft) q.a -.ft)]]
|
||||
[%lite *] ?~(b ~ ?:(=(i.b p.a) (some [[i.b ~] *(map ,@u tape)]) ~))
|
||||
[%brac *] ?~ b
|
||||
~
|
||||
?. =((dis (bex `@`i.b) p.a) 0)
|
||||
(some [[i.b ~] *(map ,@u tape)])
|
||||
~
|
||||
[%eith *] =+ foo=(chet(a c) $(a p.a) b d)
|
||||
=+ bar=(chet(a c) $(a q.a) b d)
|
||||
?~ foo
|
||||
bar
|
||||
?~ bar
|
||||
foo
|
||||
=+ ft=u.foo
|
||||
=+ bt=u.bar
|
||||
?: (gte (lent -.ft) (lent -.bt))
|
||||
foo
|
||||
bar
|
||||
[%pair *] =+ foo=$(a p.a, c [%pair q.a c])
|
||||
?~ foo
|
||||
foo
|
||||
=+ ft=u.foo
|
||||
=+ bar=$(a q.a, b (slag (lent -.ft) b))
|
||||
?~ bar
|
||||
bar
|
||||
=+ bt=u.bar
|
||||
[~ [(weld -.ft -.bt) (cont +.ft +.bt)]]
|
||||
[%manl *] =+ foo=$(a p.a)
|
||||
?~ foo
|
||||
blak
|
||||
?~ -.u.foo
|
||||
blak
|
||||
$(a [%eith %empt [%pair p.a [%eith %empt a]]])
|
||||
[%mant *] =+ foo=$(a p.a)
|
||||
?~ foo
|
||||
blak
|
||||
=+ ft=u.foo
|
||||
?~ -.ft
|
||||
blak
|
||||
$(a [%eith [%pair p.a [%eith a %empt]] %empt])
|
||||
[%plls *] $(a [%pair p.a [%mant p.a]])
|
||||
[%plll *] $(a [%pair p.a [%manl p.a]])
|
||||
[%binl *] =+ min=?:(=(q.a 0) 0 (dec q.a))
|
||||
?: =(q.a 0)
|
||||
$(a [%manl p.a])
|
||||
$(a [%pair p.a [%binl p.a min]])
|
||||
[%bant *] ?: =(0 q.a)
|
||||
blak
|
||||
$(a [%pair p.a [%bant p.a (dec q.a)]])
|
||||
[%bint *] =+ min=?:(=(q.a 0) 0 (dec q.a))
|
||||
?: =(q.a 0)
|
||||
$(a [%mant p.a])
|
||||
$(a [%pair p.a [%bint p.a min]])
|
||||
[%betw *] ?: =(0 r.a)
|
||||
blak
|
||||
?: =(q.a 0)
|
||||
$(a [%eith [%pair p.a [%betw p.a 0 (dec r.a)]] %empt])
|
||||
$(a [%pair p.a [%betw p.a (dec q.a) (dec r.a)]])
|
||||
[%betl *] ?: =(0 r.a)
|
||||
blak
|
||||
?: =(q.a 0)
|
||||
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
|
||||
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
|
||||
==
|
||||
--
|
||||
::
|
||||
++ rexp :: Regex match
|
||||
~/ %rexp
|
||||
|= [a=tape b=tape]
|
||||
^- (unit (unit (map ,@u tape)))
|
||||
=+ ^= bar
|
||||
|= [a=@ b=(map ,@u tape)]
|
||||
?: =(a 0)
|
||||
b
|
||||
=+ c=(~(get by b) a)
|
||||
?~ c
|
||||
$(a (dec a), b (~(put by b) a ""))
|
||||
$(a (dec a))
|
||||
=+ par=(pars a)
|
||||
?~ par ~
|
||||
=+ poc=(~(proc ra u.par) 1)
|
||||
=+ c=b
|
||||
|-
|
||||
=+ foo=(matc:poc c b)
|
||||
?~ foo
|
||||
?~ c
|
||||
[~ ~]
|
||||
$(c t.c)
|
||||
[~ [~ (bar (dec p.poc) u.foo)]]
|
||||
::
|
||||
++ repg :: Global regex replace
|
||||
~/ %repg
|
||||
|= [a=tape b=tape c=tape]
|
||||
^- (unit tape)
|
||||
=+ par=(pars a)
|
||||
?~ par ~
|
||||
=+ poc=(~(proc ra u.par) 1)
|
||||
=+ d=b
|
||||
:- ~
|
||||
|-
|
||||
^- tape
|
||||
=+ foo=(matc:poc d b)
|
||||
?~ foo
|
||||
?~ d
|
||||
~
|
||||
[i.d $(d t.d)]
|
||||
=+ ft=(need (~(get by u.foo) 0))
|
||||
?~ d
|
||||
c
|
||||
(weld c $(d `tape`(slag (lent ft) `tape`d)))
|
||||
|
||||
::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||
:: section 2eN, pseudo-cryptography ::
|
||||
::
|
||||
|
Loading…
Reference in New Issue
Block a user