From 01becd1f7c677f405d22dc6a1c52990b6ed9a423 Mon Sep 17 00:00:00 2001 From: Anton Dyudin Date: Mon, 2 Mar 2015 17:51:00 -0800 Subject: [PATCH] - regex --- arvo/hoon.hoon | 437 +------------------------------------------------ 1 file changed, 5 insertions(+), 432 deletions(-) diff --git a/arvo/hoon.hoon b/arvo/hoon.hoon index af72f8fb9..17010aab1 100644 --- a/arvo/hoon.hoon +++ b/arvo/hoon.hoon @@ -639,6 +639,7 @@ :: ++ need :: demand |* a=(unit) + ~| %need ?~ a !! u.a :: @@ -3411,11 +3412,13 @@ == :: %u + ?: ?=(%c hay) + %+ welp ['0' 'c' (reap (pad:fa q.p.lot) '1')] + (c-co (enc:fa q.p.lot)) =- (weld p.gam ?:(=(0 q.p.lot) `tape`['0' ~] q.gam)) ^= gam ^- [p=tape q=tape] ?+ hay [~ ((ox-co [10 3] |=(a=@ ~(d ne a))) q.p.lot)] %b [['0' 'b' ~] ((ox-co [2 4] |=(a=@ ~(d ne a))) q.p.lot)] - %c [['0' 'c' (reap (pad:fa q.p.lot) '1')] (c-co (enc:fa q.p.lot))] %i [['0' 'i' ~] ((d-co 1) q.p.lot)] %x [['0' 'x' ~] ((ox-co [16 4] |=(a=@ ~(x ne a))) q.p.lot)] %v [['0' 'v' ~] ((ox-co [32 5] |=(a=@ ~(x ne a))) q.p.lot)] @@ -3696,437 +3699,7 @@ :::::::::::::::::::::::::::::::::::::::::::::::::::::::::: :: section 2eM, regular-expressions :: :: -++ pars - |= [a=tape] :: parse tape to rege - ^- (unit rege) - =+ foo=((full apex:rags) [[1 1] a]) - ?~ q.foo - ~ - [~ p.u.q.foo] -:: -++ rags :: rege parsers - => |% - ++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char - ++ les ;~(less bas asp) :: not backslash - ++ lep ;~(less (mask "-^[]\\") asp) :: charset non-control - ++ asp (shim 32 126) :: printable ascii - ++ alb ;~(less ser asp) :: charset literal char - ++ mis ;~(less aln asp) :: non alphanumeric - -- - |% - ++ apex :: top level - %+ knee *rege |. ~+ - ;~ pose - ;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex)) - (stag %eith ;~(plug (easy %empt) ;~(pfix bar apex))) - (easy %empt) - == - :: - ++ mall - %+ knee *rege |. ~+ - ;~((bend |=(a=[rege rege] (some [%pair a]))) bets mall) - :: - ++ bets - %+ knee *rege |. ~+ - |= tub=nail - =+ vex=(chun tub) - ?~ q.vex - vex - =+ a=p.u.q.vex - %- ;~ pose - (cold [%eith %empt a] (jest '??')) - (cold [%manl a] (jest '*?')) - (cold [%plll a] (jest '+?')) - (cold [%eith a %empt] wut) - (cold [%mant a] tar) - (cold [%plls a] lus) - (stag %betl ;~(plug (easy a) ;~(sfix rang wut))) - (stag %betw ;~(plug (easy a) rang)) - (stag %binl ;~(plug (easy a) (ifix [kel (jest ',}?')] dim:ag))) - (stag %bant ;~(plug (easy a) (ifix [kel (jest '}?')] dim:ag))) - (stag %bant ;~(plug (easy a) (ifix [kel ker] dim:ag))) - (stag %bint ;~(plug (easy a) (ifix [kel (jest ',}')] dim:ag))) - (easy a) - == - q.u.q.vex - :: - ++ ranc - |= [a=@ b=@] - ^- @ - ?:((gth a b) 0 (con (bex a) $(a +(a)))) - :: - ++ flap |=(a=@ (mix a (dec (bex 256)))) - :: - ++ rang - %+ sear |=([a=@ b=@] ?:((lte a b) (some [a b]) ~)) - (ifix [kel ker] ;~(plug dim:ag ;~(pfix com dim:ag))) - :: - ++ chun - %+ knee *rege |. ~+ - ;~ pose - (cold %ende buc) - (cold %sart ket) - (cold %dote dot) - %+ cook |=(a=(list char) (reel a |=([p=char q=rege] [%pair [%lite p] q]))) - ;~(pfix (jest '\\Q') cape) - |= tub=nail - =+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker))) - =+ bar=(foo tub) - ?~(q.bar (chad tub) (fail tub)) - (cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex)) - %+ cook |=([a=rege] [%capt a 0]) - (ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex) - (ifix [(jest '(?:') per] apex) - (stag %brac ;~(pfix sel seac)) - == - :: - ++ seac - |= tub=nail - ?~ q.tub - (fail tub) - ?: =(i.q.tub '^') - (;~(pfix ket (cook flap sead)) tub) - (sead tub) - :: - ++ sead - %+ knee *@ |. ~+ - ;~ pose - |= tub=nail - ?~ q.tub - (fail tub) - ?. =(i.q.tub ']') - (fail tub) - ?~ t.q.tub - (fail tub) - ?: =(i.t.q.tub '-') - ?~ t.t.q.tub - (fail tub) - ?: =(i.t.t.q.tub ']') - (;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub) - (fail tub) - (;~(pfix ser (cook |=(a=@ (con (bex ']') a)) sade)) tub) - |= tub=nail - ?~ q.tub - (fail tub) - ?. =(i.q.tub '-') - (fail tub) - ?~ t.q.tub - (fail tub) - ?: =(i.t.q.tub '-') - ?~ t.t.q.tub - (fail tub) - ?: =(i.t.t.q.tub ']') - (;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub) - (fail tub) - (;~(pfix hep (cook |=(a=@ (con (bex '-') a)) sade)) tub) - (cook |=(a=[@ @] (con a)) ;~(plug seap sade)) - == - :: - ++ sade - %+ knee *@ |. ~+ - ;~ pose - (cold (bex '-') (jest '-]')) - (cold 0 ser) - (cook |=([p=@ q=@] `@`(con p q)) ;~(plug seap sade)) - == - :: - ++ seap - %+ knee *@ |. ~+ - ;~ pose - unid - %+ ifix (jest '[:')^(jest ':]') - ;~(pose ;~(pfix ket (cook flap chas)) chas) - %+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b)))) - ;~(plug asp ;~(pfix hep alb)) - |= tub=nail - ?~ q.tub - (fail tub) - ?~ t.q.tub - ((cook bex les) tub) - ?. =(i.t.q.tub '-') - ((cook bex les) tub) - ?~ t.t.q.tub - ((cook bex les) tub) - ?: =(i.t.t.q.tub ']') - ((cook bex les) tub) - (fail tub) - ;~(pfix bas escd) - == - :: - ++ cape - %+ knee *tape |. ~+ - ;~ pose - (cold ~ (jest '\\E')) - ;~(plug next cape) - (cook |=(a=char (tape [a ~])) next) - (full (easy ~)) - == - ++ chas :: ascii character set - =- (sear ~(get by -) sym) - %- mo ^- (list ,[@tas @I]) - :~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl - digit/digit graph/graph lower/lower print/print punct/punct - space/space upper/upper word/wordc xdigit/xdigit - == - :: Character sets - ++ alnum :(con lower upper digit) - ++ alpha :(con lower upper) - ++ ascii (ranc 0 127) - ++ blank (con (bex 32) (bex 9)) - ++ cntrl :(con (ranc 0 31) (bex 127)) - ++ digit (ranc '0' '9') - ++ graph (ranc 33 126) - ++ lower (ranc 'a' 'z') - ++ print (ranc 32 126) - ++ punct ;: con - (ranc '!' '/') - (ranc ':' '@') - (ranc '[' '`') - (ranc '{' '~') - == - ++ space :(con (ranc 9 13) (bex ' ')) - ++ upper (ranc 'A' 'Z') - ++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13)) - ++ wordc :(con digit lower upper (bex '_')) - ++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit) - :: - ++ chad - %+ knee *rege |. ~+ - ;~(pose (stag %lite nor) (stag %brac unid) ;~(pfix bas escp)) - :: - ++ escd - %+ cook bex - ;~ pose - (cold 0 (just '0')) - (sear ~(get by (mo a/7 t/9 n/10 v/11 f/12 r/13 ~)) low) - (sear |=(a=@ ?:((lth a 256) (some a) ~)) (bass 8 (stun [2 3] cit))) - ;~(pfix (just 'x') (bass 16 (stun [2 2] hit))) - (ifix [(jest 'x{') ker] (bass 16 (stun [2 2] hit))) - mis - == - :: - ++ escp - ;~ pose - (stag %lite escd) - (sear ~(get by (mo b/%boun w/[%brac wordc] z/%ende ~)) low) - =- (sear ~(get by (mo -)) hig) - ~['A'^%sart 'B'^%bout 'C'^%dote 'Q'^%empt 'W'^[%brac (flap wordc)]] - == - :: - ++ unid - =+ cha=~(get by (mo d/digit s/white w/wordc ~)) - ;~ pfix bas - ;~ pose - (sear cha low) - (cook flap (sear |=(a=@ (cha (add a 32))) hig)) - == == - -- -:: -++ ra :: regex engine - |_ a=rege - ++ proc :: capture numbering - |= b=@ - =- -(+ +>.$(a a)) - ^- [p=@ a=rege] - ?- a - [%capt *] =+ foo=$(a p.a, b +(b)) - [p.foo [%capt a.foo b]] - [%eith *] =+ foo=$(a p.a) - =+ bar=$(a q.a, b p.foo) - [p.bar [%eith a.foo a.bar]] - [%pair *] =+ foo=$(a p.a) - =+ bar=$(a q.a, b p.foo) - [p.bar [%pair a.foo a.bar]] - [%manl *] =+ foo=$(a p.a) - [p.foo [%manl a.foo]] - [%plll *] =+ foo=$(a p.a) - [p.foo [%plll a.foo]] - [%binl *] =+ foo=$(a p.a) - [p.foo [%binl a.foo q.a]] - [%betl *] =+ foo=$(a p.a) - [p.foo [%betl a.foo q.a r.a]] - [%mant *] =+ foo=$(a p.a) - [p.foo [%mant a.foo]] - [%plls *] =+ foo=$(a p.a) - [p.foo [%plls a.foo]] - [%bant *] =+ foo=$(a p.a) - [p.foo [%bant a.foo q.a]] - [%bint *] =+ foo=$(a p.a) - [p.foo [%bint a.foo q.a]] - [%betw *] =+ foo=$(a p.a) - [p.foo [%betw a.foo q.a r.a]] - * [b a] - == - :: - ++ cont - |= [a=(map ,@u tape) b=(map ,@u tape)] - (~(gas by *(map ,@u tape)) (weld (~(tap by a)) (~(tap by b)))) - :: - ++ abor - |= [a=char b=(unit ,[tape (map ,@u tape)])] - ^- (unit ,[tape (map ,@u tape)]) - ?~ b - b - [~ [[a -.u.b] +.u.b]] - :: - ++ matc - |= [b=tape c=tape] - ^- (unit (map ,@u tape)) - =+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c) - (bind foo |*(a=^ (~(put by +.a) 0 -.a))) - :: - ++ chet - |= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape] - ^- (unit ,[tape (map ,@u tape)]) - ?~ b - b - ?~ -.u.b - b - =+ bar=(deep (slag (lent -.u.b) c) %empt d) - ?~ bar - bar - b - ++ blak (some ["" *(map ,@u tape)]) - ++ word |=(a=char =((dis wordc:rags (bex a)) 0)) - ++ deep - |= [b=tape c=rege d=tape] - ^- (unit ,[tape (map ,@u tape)]) - ?- a - %dote ?~(b ~ (some [[i.b ~] *(map ,@u tape)])) - %ende ?~(b blak ~) - %sart ?:(=(b d) blak ~) - %empt blak - %boun =+ ^= luc - ?: =(b d) - & - =+ foo=(slag (dec (sub (lent d) (lent b))) d) - (word -.foo) - =+ cuc=?~(b & (word -.b)) - ?:(!=(luc cuc) blak ~) - %bout =+ ^= luc - ?: =(b d) - & - =+ foo=(slag (dec (sub (lent d) (lent b))) d) - (word -.foo) - =+ cuc=?~(b & (word -.b)) - ?:(=(luc cuc) blak ~) - [%capt *] =+ foo=$(a p.a) - ?~ foo - foo - =+ ft=u.foo - =+ bar=$(a c, b (slag (lent -.ft) b), c %empt) - ?~ bar - bar - [~ [-.ft (~(put by +.ft) q.a -.ft)]] - [%lite *] ?~(b ~ ?:(=(i.b p.a) (some [[i.b ~] *(map ,@u tape)]) ~)) - [%brac *] ?~ b - ~ - ?. =((dis (bex `@`i.b) p.a) 0) - (some [[i.b ~] *(map ,@u tape)]) - ~ - [%eith *] =+ foo=(chet(a c) $(a p.a) b d) - =+ bar=(chet(a c) $(a q.a) b d) - ?~ foo - bar - ?~ bar - foo - =+ ft=u.foo - =+ bt=u.bar - ?: (gte (lent -.ft) (lent -.bt)) - foo - bar - [%pair *] =+ foo=$(a p.a, c [%pair q.a c]) - ?~ foo - foo - =+ ft=u.foo - =+ bar=$(a q.a, b (slag (lent -.ft) b)) - ?~ bar - bar - =+ bt=u.bar - [~ [(weld -.ft -.bt) (cont +.ft +.bt)]] - [%manl *] =+ foo=$(a p.a) - ?~ foo - blak - ?~ -.u.foo - blak - $(a [%eith %empt [%pair p.a [%eith %empt a]]]) - [%mant *] =+ foo=$(a p.a) - ?~ foo - blak - =+ ft=u.foo - ?~ -.ft - blak - $(a [%eith [%pair p.a [%eith a %empt]] %empt]) - [%plls *] $(a [%pair p.a [%mant p.a]]) - [%plll *] $(a [%pair p.a [%manl p.a]]) - [%binl *] =+ min=?:(=(q.a 0) 0 (dec q.a)) - ?: =(q.a 0) - $(a [%manl p.a]) - $(a [%pair p.a [%binl p.a min]]) - [%bant *] ?: =(0 q.a) - blak - $(a [%pair p.a [%bant p.a (dec q.a)]]) - [%bint *] =+ min=?:(=(q.a 0) 0 (dec q.a)) - ?: =(q.a 0) - $(a [%mant p.a]) - $(a [%pair p.a [%bint p.a min]]) - [%betw *] ?: =(0 r.a) - blak - ?: =(q.a 0) - $(a [%eith [%pair p.a [%betw p.a 0 (dec r.a)]] %empt]) - $(a [%pair p.a [%betw p.a (dec q.a) (dec r.a)]]) - [%betl *] ?: =(0 r.a) - blak - ?: =(q.a 0) - $(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]]) - $(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]]) - == - -- -:: -++ rexp :: Regex match - ~/ %rexp - |= [a=tape b=tape] - ^- (unit (unit (map ,@u tape))) - =+ ^= bar - |= [a=@ b=(map ,@u tape)] - ?: =(a 0) - b - =+ c=(~(get by b) a) - ?~ c - $(a (dec a), b (~(put by b) a "")) - $(a (dec a)) - =+ par=(pars a) - ?~ par ~ - =+ poc=(~(proc ra u.par) 1) - =+ c=b - |- - =+ foo=(matc:poc c b) - ?~ foo - ?~ c - [~ ~] - $(c t.c) - [~ [~ (bar (dec p.poc) u.foo)]] -:: -++ repg :: Global regex replace - ~/ %repg - |= [a=tape b=tape c=tape] - ^- (unit tape) - =+ par=(pars a) - ?~ par ~ - =+ poc=(~(proc ra u.par) 1) - =+ d=b - :- ~ - |- - ^- tape - =+ foo=(matc:poc d b) - ?~ foo - ?~ d - ~ - [i.d $(d t.d)] - =+ ft=(need (~(get by u.foo) 0)) - ?~ d - c - (weld c $(d `tape`(slag (lent ft) `tape`d))) + :::::::::::::::::::::::::::::::::::::::::::::::::::::::::: :: section 2eN, pseudo-cryptography :: ::