mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-09-21 15:38:59 +03:00
rege refactoring
This commit is contained in:
parent
afaf131c72
commit
b27098ab6c
183
arvo/hoon.hoon
183
arvo/hoon.hoon
@ -132,7 +132,7 @@
|
||||
$% [%lite p=char] :: literal
|
||||
[%pair p=rege q=rege] :: ordering
|
||||
[%capt p=rege q=@u] :: capture group
|
||||
[%brac p=@] :: p is 256 bitmask
|
||||
[%brac p=@I] :: p is 256 bitmask
|
||||
[%eith p=rege q=rege] :: either
|
||||
[%mant p=rege] :: greedy 0 or more
|
||||
[%plls p=rege] :: greedy 1 or more
|
||||
@ -3677,34 +3677,26 @@
|
||||
++ pars
|
||||
|= [a=tape] :: parse tape to rege
|
||||
^- (unit rege)
|
||||
=+ foo=((full anns) [[1 1] a])
|
||||
=+ foo=((full apex:rags) [[1 1] a])
|
||||
?~ q.foo
|
||||
~
|
||||
[~ p.u.q.foo]
|
||||
::
|
||||
++ nor
|
||||
;~ pose
|
||||
(shim 1 35)
|
||||
(shim 37 39)
|
||||
(shim 44 45)
|
||||
(shim 47 62)
|
||||
(shim 64 90)
|
||||
(shim 93 93)
|
||||
(shim 95 122)
|
||||
(shim 123 123)
|
||||
(shim 125 127)
|
||||
==
|
||||
++ les ;~(pose (shim 32 91) (shim 93 126))
|
||||
++ lep ;~(pose (shim 32 45) (shim 46 90) (shim 95 126))
|
||||
++ alm (shim 32 126)
|
||||
++ alb ;~(pose (shim 32 92) (shim 94 126))
|
||||
++ mis ;~(pose (shim 32 47) (shim 58 64) (shim 91 96) (shim 123 126))
|
||||
::
|
||||
++ anns :: top level rege parse
|
||||
++ rags :: rege parsers
|
||||
=> |%
|
||||
++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char
|
||||
++ les ;~(less bas asp) :: not backslash
|
||||
++ lep ;~(less (mask "^[]\\") asp) :: charset non-control
|
||||
++ asp (shim 32 126) :: printable ascii
|
||||
++ alb ;~(less ser asp) :: charset literal char
|
||||
++ mis ;~(less aln asp) :: non alphanumeric
|
||||
--
|
||||
|%
|
||||
++ apex :: top level
|
||||
%+ knee *rege |. ~+
|
||||
;~ pose
|
||||
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar anns))
|
||||
(stag %eith ;~(plug (easy %empt) ;~(pfix bar anns)))
|
||||
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex))
|
||||
(stag %eith ;~(plug (easy %empt) ;~(pfix bar apex)))
|
||||
(easy %empt)
|
||||
==
|
||||
::
|
||||
@ -3759,10 +3751,10 @@
|
||||
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
|
||||
=+ bar=(foo tub)
|
||||
?~(q.bar (chad tub) (fail tub))
|
||||
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] anns))
|
||||
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex))
|
||||
%+ cook |=([a=rege] [%capt a 0])
|
||||
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] anns)
|
||||
(ifix [(jest '(?:') per] anns)
|
||||
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex)
|
||||
(ifix [(jest '(?:') per] apex)
|
||||
(stag %brac ;~(pfix sel seac))
|
||||
==
|
||||
::
|
||||
@ -3820,32 +3812,10 @@
|
||||
%+ knee *@ |. ~+
|
||||
;~ pose
|
||||
unid
|
||||
(cold lower (jest '[:lower:]'))
|
||||
(cold upper (jest '[:upper:]'))
|
||||
(cold digit (jest '[:digit:]'))
|
||||
(cold print (jest '[:print:]'))
|
||||
(cold graph (jest '[:graph:]'))
|
||||
(cold blank (jest '[:blank:]'))
|
||||
(cold space (jest '[:space:]'))
|
||||
(cold cntrl (jest '[:cntrl:]'))
|
||||
(cold alpha (jest '[:alpha:]'))
|
||||
(cold xdigit (jest '[:xdigit:]'))
|
||||
(cold alnum (jest '[:alnum:]'))
|
||||
(cold punct (jest '[:punct:]'))
|
||||
(cold (flap lower) (jest '[:^lower:]'))
|
||||
(cold (flap upper) (jest '[:^upper:]'))
|
||||
(cold (flap digit) (jest '[:^digit:]'))
|
||||
(cold (flap print) (jest '[:^print:]'))
|
||||
(cold (flap graph) (jest '[:^graph:]'))
|
||||
(cold (flap blank) (jest '[:^blank:]'))
|
||||
(cold (flap space) (jest '[:^space:]'))
|
||||
(cold (flap cntrl) (jest '[:^cntrl:]'))
|
||||
(cold (flap alpha) (jest '[:^alpha:]'))
|
||||
(cold (flap xdigit) (jest '[:^xdigit:]'))
|
||||
(cold (flap alnum) (jest '[:^alnum:]'))
|
||||
(cold (flap punct) (jest '[:^punct:]'))
|
||||
%+ ifix (jest '[:')^(jest ':]')
|
||||
;~(pose ;~(pfix ket (cook flap chas)) chas)
|
||||
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
|
||||
;~(plug alm ;~(pfix hep alb))
|
||||
;~(plug asp ;~(pfix hep alb))
|
||||
|= tub=nail
|
||||
?~ q.tub
|
||||
(fail tub)
|
||||
@ -3869,25 +3839,33 @@
|
||||
(cook |=(a=char (tape [a ~])) next)
|
||||
(full (easy ~))
|
||||
==
|
||||
::
|
||||
++ lower (ranc 'a' 'z')
|
||||
++ upper (ranc 'A' 'Z')
|
||||
++ digit (ranc '0' '9')
|
||||
++ print (ranc 32 126)
|
||||
++ graph (ranc 33 126)
|
||||
++ blank (con (bex 32) (bex 9))
|
||||
++ space :(con (ranc 9 13) (bex ' '))
|
||||
++ cntrl :(con (ranc 0 31) (bex 127))
|
||||
++ alpha :(con lower upper)
|
||||
++ chas :: ascii character set
|
||||
=- (sear ~(get by -) sym)
|
||||
%- mo ^- (list ,[@tas @I])
|
||||
:~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl
|
||||
digit/digit graph/graph lower/lower print/print punct/punct
|
||||
space/space upper/upper word/wordc xdigit/xdigit
|
||||
==
|
||||
:: Character sets
|
||||
++ alnum :(con lower upper digit)
|
||||
++ alpha :(con lower upper)
|
||||
++ ascii (ranc 0 127)
|
||||
++ blank (con (bex 32) (bex 9))
|
||||
++ cntrl :(con (ranc 0 31) (bex 127))
|
||||
++ digit (ranc '0' '9')
|
||||
++ graph (ranc 33 126)
|
||||
++ lower (ranc 'a' 'z')
|
||||
++ print (ranc 32 126)
|
||||
++ punct ;: con
|
||||
(ranc '!' '/')
|
||||
(ranc ':' '@')
|
||||
(ranc '[' '`')
|
||||
(ranc '{' '~')
|
||||
==
|
||||
++ wordc :(con digit lower upper (bex 95))
|
||||
++ space :(con (ranc 9 13) (bex ' '))
|
||||
++ upper (ranc 'A' 'Z')
|
||||
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
|
||||
++ wordc :(con digit lower upper (bex '_'))
|
||||
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
|
||||
::
|
||||
++ chad
|
||||
@ -3944,37 +3922,41 @@
|
||||
(cold wordc (jest '\\w'))
|
||||
(cold (flap wordc) (jest '\\W'))
|
||||
==
|
||||
--
|
||||
::
|
||||
++ ra :: regex engine
|
||||
|_ a=rege
|
||||
++ proc :: capture numbering
|
||||
|= [a=rege b=@]
|
||||
^- [@ rege]
|
||||
|= b=@
|
||||
=- -(+ +>.$(a a))
|
||||
^- [p=@ a=rege]
|
||||
?- a
|
||||
[%capt *] =+ foo=$(a p.a, b +(b))
|
||||
[-.foo [%capt +.foo b]]
|
||||
[p.foo [%capt a.foo b]]
|
||||
[%eith *] =+ foo=$(a p.a)
|
||||
=+ bar=$(a q.a, b -.foo)
|
||||
[-.bar [%eith +.foo +.bar]]
|
||||
=+ bar=$(a q.a, b p.foo)
|
||||
[p.bar [%eith a.foo a.bar]]
|
||||
[%pair *] =+ foo=$(a p.a)
|
||||
=+ bar=$(a q.a, b -.foo)
|
||||
[-.bar [%pair +.foo +.bar]]
|
||||
=+ bar=$(a q.a, b p.foo)
|
||||
[p.bar [%pair a.foo a.bar]]
|
||||
[%manl *] =+ foo=$(a p.a)
|
||||
[-.foo [%manl +.foo]]
|
||||
[p.foo [%manl a.foo]]
|
||||
[%plll *] =+ foo=$(a p.a)
|
||||
[-.foo [%plll +.foo]]
|
||||
[p.foo [%plll a.foo]]
|
||||
[%binl *] =+ foo=$(a p.a)
|
||||
[-.foo [%binl +.foo q.a]]
|
||||
[p.foo [%binl a.foo q.a]]
|
||||
[%betl *] =+ foo=$(a p.a)
|
||||
[-.foo [%betl +.foo q.a r.a]]
|
||||
[p.foo [%betl a.foo q.a r.a]]
|
||||
[%mant *] =+ foo=$(a p.a)
|
||||
[-.foo [%mant +.foo]]
|
||||
[p.foo [%mant a.foo]]
|
||||
[%plls *] =+ foo=$(a p.a)
|
||||
[-.foo [%plls +.foo]]
|
||||
[p.foo [%plls a.foo]]
|
||||
[%bant *] =+ foo=$(a p.a)
|
||||
[-.foo [%bant +.foo q.a]]
|
||||
[p.foo [%bant a.foo q.a]]
|
||||
[%bint *] =+ foo=$(a p.a)
|
||||
[-.foo [%bint +.foo q.a]]
|
||||
[p.foo [%bint a.foo q.a]]
|
||||
[%betw *] =+ foo=$(a p.a)
|
||||
[-.foo [%betw +.foo q.a r.a]]
|
||||
[p.foo [%betw a.foo q.a r.a]]
|
||||
* [b a]
|
||||
==
|
||||
::
|
||||
@ -3990,26 +3972,26 @@
|
||||
[~ [[a -.u.b] +.u.b]]
|
||||
::
|
||||
++ matc
|
||||
|= [a=rege b=tape c=tape]
|
||||
|= [b=tape c=tape]
|
||||
^- (unit (map ,@u tape))
|
||||
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep a b %empt c)
|
||||
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c)
|
||||
(bind foo |*(a=^ (~(put by +.a) 0 -.a)))
|
||||
::
|
||||
++ chet
|
||||
|= [a=(unit ,[tape (map ,@u tape)]) b=rege c=tape d=tape]
|
||||
|= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape]
|
||||
^- (unit ,[tape (map ,@u tape)])
|
||||
?~ a
|
||||
a
|
||||
=+ ft=u.a
|
||||
?~ -.ft
|
||||
a
|
||||
=+ bar=(deep b (slag (lent -.ft) c) %empt d)
|
||||
?~ b
|
||||
b
|
||||
?~ -.u.b
|
||||
b
|
||||
=+ bar=(deep (slag (lent -.u.b) c) %empt d)
|
||||
?~ bar
|
||||
bar
|
||||
a
|
||||
b
|
||||
++ blak (some ["" _(map ,@u tape)])
|
||||
++ word |=(a=char =((dis wordc:rags (bex a)) 0))
|
||||
++ deep
|
||||
|= [a=rege b=tape c=rege d=tape]
|
||||
|= [b=tape c=rege d=tape]
|
||||
^- (unit ,[tape (map ,@u tape)])
|
||||
?- a
|
||||
%dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)]))
|
||||
@ -4020,15 +4002,15 @@
|
||||
?: =(b d)
|
||||
&
|
||||
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
||||
=((dis wordc (bex -.foo)) 0)
|
||||
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0))
|
||||
(word -.foo)
|
||||
=+ cuc=?~(b & (word -.b))
|
||||
?:(!=(luc cuc) blak ~)
|
||||
%bout =+ ^= luc
|
||||
?: =(b d)
|
||||
&
|
||||
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
||||
=((dis wordc (bex -.foo)) 0)
|
||||
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0))
|
||||
(word -.foo)
|
||||
=+ cuc=?~(b & (word -.b))
|
||||
?:(=(luc cuc) blak ~)
|
||||
[%capt *] =+ foo=$(a p.a)
|
||||
?~ foo
|
||||
@ -4044,8 +4026,8 @@
|
||||
?. =((dis (bex `@`i.b) p.a) 0)
|
||||
(some [[i.b ~] _(map ,@u tape)])
|
||||
~
|
||||
[%eith *] =+ foo=(chet $(a p.a) c b d)
|
||||
=+ bar=(chet $(a q.a) c b d)
|
||||
[%eith *] =+ foo=(chet(a c) $(a p.a) b d)
|
||||
=+ bar=(chet(a c) $(a q.a) b d)
|
||||
?~ foo
|
||||
bar
|
||||
?~ bar
|
||||
@ -4101,6 +4083,7 @@
|
||||
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
|
||||
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
|
||||
==
|
||||
--
|
||||
::
|
||||
++ rexp :: Regex match
|
||||
~/ %rexp
|
||||
@ -4116,15 +4099,15 @@
|
||||
$(a (dec a))
|
||||
=+ par=(pars a)
|
||||
?~ par ~
|
||||
=+ poc=(proc u.par 1)
|
||||
=+ poc=(~(proc ra u.par) 1)
|
||||
=+ c=b
|
||||
|-
|
||||
=+ foo=(matc +.poc c b)
|
||||
=+ foo=(matc:poc c b)
|
||||
?~ foo
|
||||
?~ c
|
||||
[~ ~]
|
||||
$(c t.c)
|
||||
[~ [~ (bar (dec -.poc) u.foo)]]
|
||||
[~ [~ (bar (dec p.poc) u.foo)]]
|
||||
::
|
||||
++ repg :: Global regex replace
|
||||
~/ %repg
|
||||
@ -4132,12 +4115,12 @@
|
||||
^- (unit tape)
|
||||
=+ par=(pars a)
|
||||
?~ par ~
|
||||
=+ poc=(proc u.par 1)
|
||||
=+ poc=(~(proc ra u.par) 1)
|
||||
=+ d=b
|
||||
:- ~
|
||||
|-
|
||||
^- tape
|
||||
=+ foo=(matc +.poc d b)
|
||||
=+ foo=(matc:poc d b)
|
||||
?~ foo
|
||||
?~ d
|
||||
~
|
||||
|
Loading…
Reference in New Issue
Block a user