mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-09-21 15:38:59 +03:00
rege refactoring
This commit is contained in:
parent
afaf131c72
commit
b27098ab6c
183
arvo/hoon.hoon
183
arvo/hoon.hoon
@ -132,7 +132,7 @@
|
|||||||
$% [%lite p=char] :: literal
|
$% [%lite p=char] :: literal
|
||||||
[%pair p=rege q=rege] :: ordering
|
[%pair p=rege q=rege] :: ordering
|
||||||
[%capt p=rege q=@u] :: capture group
|
[%capt p=rege q=@u] :: capture group
|
||||||
[%brac p=@] :: p is 256 bitmask
|
[%brac p=@I] :: p is 256 bitmask
|
||||||
[%eith p=rege q=rege] :: either
|
[%eith p=rege q=rege] :: either
|
||||||
[%mant p=rege] :: greedy 0 or more
|
[%mant p=rege] :: greedy 0 or more
|
||||||
[%plls p=rege] :: greedy 1 or more
|
[%plls p=rege] :: greedy 1 or more
|
||||||
@ -3677,34 +3677,26 @@
|
|||||||
++ pars
|
++ pars
|
||||||
|= [a=tape] :: parse tape to rege
|
|= [a=tape] :: parse tape to rege
|
||||||
^- (unit rege)
|
^- (unit rege)
|
||||||
=+ foo=((full anns) [[1 1] a])
|
=+ foo=((full apex:rags) [[1 1] a])
|
||||||
?~ q.foo
|
?~ q.foo
|
||||||
~
|
~
|
||||||
[~ p.u.q.foo]
|
[~ p.u.q.foo]
|
||||||
::
|
::
|
||||||
++ nor
|
++ rags :: rege parsers
|
||||||
;~ pose
|
=> |%
|
||||||
(shim 1 35)
|
++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char
|
||||||
(shim 37 39)
|
++ les ;~(less bas asp) :: not backslash
|
||||||
(shim 44 45)
|
++ lep ;~(less (mask "^[]\\") asp) :: charset non-control
|
||||||
(shim 47 62)
|
++ asp (shim 32 126) :: printable ascii
|
||||||
(shim 64 90)
|
++ alb ;~(less ser asp) :: charset literal char
|
||||||
(shim 93 93)
|
++ mis ;~(less aln asp) :: non alphanumeric
|
||||||
(shim 95 122)
|
--
|
||||||
(shim 123 123)
|
|%
|
||||||
(shim 125 127)
|
++ apex :: top level
|
||||||
==
|
|
||||||
++ les ;~(pose (shim 32 91) (shim 93 126))
|
|
||||||
++ lep ;~(pose (shim 32 45) (shim 46 90) (shim 95 126))
|
|
||||||
++ alm (shim 32 126)
|
|
||||||
++ alb ;~(pose (shim 32 92) (shim 94 126))
|
|
||||||
++ mis ;~(pose (shim 32 47) (shim 58 64) (shim 91 96) (shim 123 126))
|
|
||||||
::
|
|
||||||
++ anns :: top level rege parse
|
|
||||||
%+ knee *rege |. ~+
|
%+ knee *rege |. ~+
|
||||||
;~ pose
|
;~ pose
|
||||||
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar anns))
|
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex))
|
||||||
(stag %eith ;~(plug (easy %empt) ;~(pfix bar anns)))
|
(stag %eith ;~(plug (easy %empt) ;~(pfix bar apex)))
|
||||||
(easy %empt)
|
(easy %empt)
|
||||||
==
|
==
|
||||||
::
|
::
|
||||||
@ -3759,10 +3751,10 @@
|
|||||||
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
|
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
|
||||||
=+ bar=(foo tub)
|
=+ bar=(foo tub)
|
||||||
?~(q.bar (chad tub) (fail tub))
|
?~(q.bar (chad tub) (fail tub))
|
||||||
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] anns))
|
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex))
|
||||||
%+ cook |=([a=rege] [%capt a 0])
|
%+ cook |=([a=rege] [%capt a 0])
|
||||||
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] anns)
|
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex)
|
||||||
(ifix [(jest '(?:') per] anns)
|
(ifix [(jest '(?:') per] apex)
|
||||||
(stag %brac ;~(pfix sel seac))
|
(stag %brac ;~(pfix sel seac))
|
||||||
==
|
==
|
||||||
::
|
::
|
||||||
@ -3820,32 +3812,10 @@
|
|||||||
%+ knee *@ |. ~+
|
%+ knee *@ |. ~+
|
||||||
;~ pose
|
;~ pose
|
||||||
unid
|
unid
|
||||||
(cold lower (jest '[:lower:]'))
|
%+ ifix (jest '[:')^(jest ':]')
|
||||||
(cold upper (jest '[:upper:]'))
|
;~(pose ;~(pfix ket (cook flap chas)) chas)
|
||||||
(cold digit (jest '[:digit:]'))
|
|
||||||
(cold print (jest '[:print:]'))
|
|
||||||
(cold graph (jest '[:graph:]'))
|
|
||||||
(cold blank (jest '[:blank:]'))
|
|
||||||
(cold space (jest '[:space:]'))
|
|
||||||
(cold cntrl (jest '[:cntrl:]'))
|
|
||||||
(cold alpha (jest '[:alpha:]'))
|
|
||||||
(cold xdigit (jest '[:xdigit:]'))
|
|
||||||
(cold alnum (jest '[:alnum:]'))
|
|
||||||
(cold punct (jest '[:punct:]'))
|
|
||||||
(cold (flap lower) (jest '[:^lower:]'))
|
|
||||||
(cold (flap upper) (jest '[:^upper:]'))
|
|
||||||
(cold (flap digit) (jest '[:^digit:]'))
|
|
||||||
(cold (flap print) (jest '[:^print:]'))
|
|
||||||
(cold (flap graph) (jest '[:^graph:]'))
|
|
||||||
(cold (flap blank) (jest '[:^blank:]'))
|
|
||||||
(cold (flap space) (jest '[:^space:]'))
|
|
||||||
(cold (flap cntrl) (jest '[:^cntrl:]'))
|
|
||||||
(cold (flap alpha) (jest '[:^alpha:]'))
|
|
||||||
(cold (flap xdigit) (jest '[:^xdigit:]'))
|
|
||||||
(cold (flap alnum) (jest '[:^alnum:]'))
|
|
||||||
(cold (flap punct) (jest '[:^punct:]'))
|
|
||||||
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
|
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
|
||||||
;~(plug alm ;~(pfix hep alb))
|
;~(plug asp ;~(pfix hep alb))
|
||||||
|= tub=nail
|
|= tub=nail
|
||||||
?~ q.tub
|
?~ q.tub
|
||||||
(fail tub)
|
(fail tub)
|
||||||
@ -3869,25 +3839,33 @@
|
|||||||
(cook |=(a=char (tape [a ~])) next)
|
(cook |=(a=char (tape [a ~])) next)
|
||||||
(full (easy ~))
|
(full (easy ~))
|
||||||
==
|
==
|
||||||
::
|
++ chas :: ascii character set
|
||||||
++ lower (ranc 'a' 'z')
|
=- (sear ~(get by -) sym)
|
||||||
++ upper (ranc 'A' 'Z')
|
%- mo ^- (list ,[@tas @I])
|
||||||
++ digit (ranc '0' '9')
|
:~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl
|
||||||
++ print (ranc 32 126)
|
digit/digit graph/graph lower/lower print/print punct/punct
|
||||||
++ graph (ranc 33 126)
|
space/space upper/upper word/wordc xdigit/xdigit
|
||||||
++ blank (con (bex 32) (bex 9))
|
==
|
||||||
++ space :(con (ranc 9 13) (bex ' '))
|
:: Character sets
|
||||||
++ cntrl :(con (ranc 0 31) (bex 127))
|
|
||||||
++ alpha :(con lower upper)
|
|
||||||
++ alnum :(con lower upper digit)
|
++ alnum :(con lower upper digit)
|
||||||
|
++ alpha :(con lower upper)
|
||||||
|
++ ascii (ranc 0 127)
|
||||||
|
++ blank (con (bex 32) (bex 9))
|
||||||
|
++ cntrl :(con (ranc 0 31) (bex 127))
|
||||||
|
++ digit (ranc '0' '9')
|
||||||
|
++ graph (ranc 33 126)
|
||||||
|
++ lower (ranc 'a' 'z')
|
||||||
|
++ print (ranc 32 126)
|
||||||
++ punct ;: con
|
++ punct ;: con
|
||||||
(ranc '!' '/')
|
(ranc '!' '/')
|
||||||
(ranc ':' '@')
|
(ranc ':' '@')
|
||||||
(ranc '[' '`')
|
(ranc '[' '`')
|
||||||
(ranc '{' '~')
|
(ranc '{' '~')
|
||||||
==
|
==
|
||||||
++ wordc :(con digit lower upper (bex 95))
|
++ space :(con (ranc 9 13) (bex ' '))
|
||||||
|
++ upper (ranc 'A' 'Z')
|
||||||
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
|
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
|
||||||
|
++ wordc :(con digit lower upper (bex '_'))
|
||||||
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
|
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
|
||||||
::
|
::
|
||||||
++ chad
|
++ chad
|
||||||
@ -3944,37 +3922,41 @@
|
|||||||
(cold wordc (jest '\\w'))
|
(cold wordc (jest '\\w'))
|
||||||
(cold (flap wordc) (jest '\\W'))
|
(cold (flap wordc) (jest '\\W'))
|
||||||
==
|
==
|
||||||
|
--
|
||||||
::
|
::
|
||||||
|
++ ra :: regex engine
|
||||||
|
|_ a=rege
|
||||||
++ proc :: capture numbering
|
++ proc :: capture numbering
|
||||||
|= [a=rege b=@]
|
|= b=@
|
||||||
^- [@ rege]
|
=- -(+ +>.$(a a))
|
||||||
|
^- [p=@ a=rege]
|
||||||
?- a
|
?- a
|
||||||
[%capt *] =+ foo=$(a p.a, b +(b))
|
[%capt *] =+ foo=$(a p.a, b +(b))
|
||||||
[-.foo [%capt +.foo b]]
|
[p.foo [%capt a.foo b]]
|
||||||
[%eith *] =+ foo=$(a p.a)
|
[%eith *] =+ foo=$(a p.a)
|
||||||
=+ bar=$(a q.a, b -.foo)
|
=+ bar=$(a q.a, b p.foo)
|
||||||
[-.bar [%eith +.foo +.bar]]
|
[p.bar [%eith a.foo a.bar]]
|
||||||
[%pair *] =+ foo=$(a p.a)
|
[%pair *] =+ foo=$(a p.a)
|
||||||
=+ bar=$(a q.a, b -.foo)
|
=+ bar=$(a q.a, b p.foo)
|
||||||
[-.bar [%pair +.foo +.bar]]
|
[p.bar [%pair a.foo a.bar]]
|
||||||
[%manl *] =+ foo=$(a p.a)
|
[%manl *] =+ foo=$(a p.a)
|
||||||
[-.foo [%manl +.foo]]
|
[p.foo [%manl a.foo]]
|
||||||
[%plll *] =+ foo=$(a p.a)
|
[%plll *] =+ foo=$(a p.a)
|
||||||
[-.foo [%plll +.foo]]
|
[p.foo [%plll a.foo]]
|
||||||
[%binl *] =+ foo=$(a p.a)
|
[%binl *] =+ foo=$(a p.a)
|
||||||
[-.foo [%binl +.foo q.a]]
|
[p.foo [%binl a.foo q.a]]
|
||||||
[%betl *] =+ foo=$(a p.a)
|
[%betl *] =+ foo=$(a p.a)
|
||||||
[-.foo [%betl +.foo q.a r.a]]
|
[p.foo [%betl a.foo q.a r.a]]
|
||||||
[%mant *] =+ foo=$(a p.a)
|
[%mant *] =+ foo=$(a p.a)
|
||||||
[-.foo [%mant +.foo]]
|
[p.foo [%mant a.foo]]
|
||||||
[%plls *] =+ foo=$(a p.a)
|
[%plls *] =+ foo=$(a p.a)
|
||||||
[-.foo [%plls +.foo]]
|
[p.foo [%plls a.foo]]
|
||||||
[%bant *] =+ foo=$(a p.a)
|
[%bant *] =+ foo=$(a p.a)
|
||||||
[-.foo [%bant +.foo q.a]]
|
[p.foo [%bant a.foo q.a]]
|
||||||
[%bint *] =+ foo=$(a p.a)
|
[%bint *] =+ foo=$(a p.a)
|
||||||
[-.foo [%bint +.foo q.a]]
|
[p.foo [%bint a.foo q.a]]
|
||||||
[%betw *] =+ foo=$(a p.a)
|
[%betw *] =+ foo=$(a p.a)
|
||||||
[-.foo [%betw +.foo q.a r.a]]
|
[p.foo [%betw a.foo q.a r.a]]
|
||||||
* [b a]
|
* [b a]
|
||||||
==
|
==
|
||||||
::
|
::
|
||||||
@ -3990,26 +3972,26 @@
|
|||||||
[~ [[a -.u.b] +.u.b]]
|
[~ [[a -.u.b] +.u.b]]
|
||||||
::
|
::
|
||||||
++ matc
|
++ matc
|
||||||
|= [a=rege b=tape c=tape]
|
|= [b=tape c=tape]
|
||||||
^- (unit (map ,@u tape))
|
^- (unit (map ,@u tape))
|
||||||
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep a b %empt c)
|
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c)
|
||||||
(bind foo |*(a=^ (~(put by +.a) 0 -.a)))
|
(bind foo |*(a=^ (~(put by +.a) 0 -.a)))
|
||||||
::
|
::
|
||||||
++ chet
|
++ chet
|
||||||
|= [a=(unit ,[tape (map ,@u tape)]) b=rege c=tape d=tape]
|
|= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape]
|
||||||
^- (unit ,[tape (map ,@u tape)])
|
^- (unit ,[tape (map ,@u tape)])
|
||||||
?~ a
|
?~ b
|
||||||
a
|
b
|
||||||
=+ ft=u.a
|
?~ -.u.b
|
||||||
?~ -.ft
|
b
|
||||||
a
|
=+ bar=(deep (slag (lent -.u.b) c) %empt d)
|
||||||
=+ bar=(deep b (slag (lent -.ft) c) %empt d)
|
|
||||||
?~ bar
|
?~ bar
|
||||||
bar
|
bar
|
||||||
a
|
b
|
||||||
++ blak (some ["" _(map ,@u tape)])
|
++ blak (some ["" _(map ,@u tape)])
|
||||||
|
++ word |=(a=char =((dis wordc:rags (bex a)) 0))
|
||||||
++ deep
|
++ deep
|
||||||
|= [a=rege b=tape c=rege d=tape]
|
|= [b=tape c=rege d=tape]
|
||||||
^- (unit ,[tape (map ,@u tape)])
|
^- (unit ,[tape (map ,@u tape)])
|
||||||
?- a
|
?- a
|
||||||
%dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)]))
|
%dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)]))
|
||||||
@ -4020,15 +4002,15 @@
|
|||||||
?: =(b d)
|
?: =(b d)
|
||||||
&
|
&
|
||||||
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
||||||
=((dis wordc (bex -.foo)) 0)
|
(word -.foo)
|
||||||
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0))
|
=+ cuc=?~(b & (word -.b))
|
||||||
?:(!=(luc cuc) blak ~)
|
?:(!=(luc cuc) blak ~)
|
||||||
%bout =+ ^= luc
|
%bout =+ ^= luc
|
||||||
?: =(b d)
|
?: =(b d)
|
||||||
&
|
&
|
||||||
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
|
||||||
=((dis wordc (bex -.foo)) 0)
|
(word -.foo)
|
||||||
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0))
|
=+ cuc=?~(b & (word -.b))
|
||||||
?:(=(luc cuc) blak ~)
|
?:(=(luc cuc) blak ~)
|
||||||
[%capt *] =+ foo=$(a p.a)
|
[%capt *] =+ foo=$(a p.a)
|
||||||
?~ foo
|
?~ foo
|
||||||
@ -4044,8 +4026,8 @@
|
|||||||
?. =((dis (bex `@`i.b) p.a) 0)
|
?. =((dis (bex `@`i.b) p.a) 0)
|
||||||
(some [[i.b ~] _(map ,@u tape)])
|
(some [[i.b ~] _(map ,@u tape)])
|
||||||
~
|
~
|
||||||
[%eith *] =+ foo=(chet $(a p.a) c b d)
|
[%eith *] =+ foo=(chet(a c) $(a p.a) b d)
|
||||||
=+ bar=(chet $(a q.a) c b d)
|
=+ bar=(chet(a c) $(a q.a) b d)
|
||||||
?~ foo
|
?~ foo
|
||||||
bar
|
bar
|
||||||
?~ bar
|
?~ bar
|
||||||
@ -4101,6 +4083,7 @@
|
|||||||
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
|
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
|
||||||
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
|
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
|
||||||
==
|
==
|
||||||
|
--
|
||||||
::
|
::
|
||||||
++ rexp :: Regex match
|
++ rexp :: Regex match
|
||||||
~/ %rexp
|
~/ %rexp
|
||||||
@ -4116,15 +4099,15 @@
|
|||||||
$(a (dec a))
|
$(a (dec a))
|
||||||
=+ par=(pars a)
|
=+ par=(pars a)
|
||||||
?~ par ~
|
?~ par ~
|
||||||
=+ poc=(proc u.par 1)
|
=+ poc=(~(proc ra u.par) 1)
|
||||||
=+ c=b
|
=+ c=b
|
||||||
|-
|
|-
|
||||||
=+ foo=(matc +.poc c b)
|
=+ foo=(matc:poc c b)
|
||||||
?~ foo
|
?~ foo
|
||||||
?~ c
|
?~ c
|
||||||
[~ ~]
|
[~ ~]
|
||||||
$(c t.c)
|
$(c t.c)
|
||||||
[~ [~ (bar (dec -.poc) u.foo)]]
|
[~ [~ (bar (dec p.poc) u.foo)]]
|
||||||
::
|
::
|
||||||
++ repg :: Global regex replace
|
++ repg :: Global regex replace
|
||||||
~/ %repg
|
~/ %repg
|
||||||
@ -4132,12 +4115,12 @@
|
|||||||
^- (unit tape)
|
^- (unit tape)
|
||||||
=+ par=(pars a)
|
=+ par=(pars a)
|
||||||
?~ par ~
|
?~ par ~
|
||||||
=+ poc=(proc u.par 1)
|
=+ poc=(~(proc ra u.par) 1)
|
||||||
=+ d=b
|
=+ d=b
|
||||||
:- ~
|
:- ~
|
||||||
|-
|
|-
|
||||||
^- tape
|
^- tape
|
||||||
=+ foo=(matc +.poc d b)
|
=+ foo=(matc:poc d b)
|
||||||
?~ foo
|
?~ foo
|
||||||
?~ d
|
?~ d
|
||||||
~
|
~
|
||||||
|
Loading…
Reference in New Issue
Block a user