rege refactoring

This commit is contained in:
Anton Dyudin 2014-10-26 16:16:07 -07:00 committed by Anton Dyudin
parent afaf131c72
commit b27098ab6c

View File

@ -132,7 +132,7 @@
$% [%lite p=char] :: literal
[%pair p=rege q=rege] :: ordering
[%capt p=rege q=@u] :: capture group
[%brac p=@] :: p is 256 bitmask
[%brac p=@I] :: p is 256 bitmask
[%eith p=rege q=rege] :: either
[%mant p=rege] :: greedy 0 or more
[%plls p=rege] :: greedy 1 or more
@ -3677,34 +3677,26 @@
++ pars
|= [a=tape] :: parse tape to rege
^- (unit rege)
=+ foo=((full anns) [[1 1] a])
=+ foo=((full apex:rags) [[1 1] a])
?~ q.foo
~
[~ p.u.q.foo]
::
++ nor
;~ pose
(shim 1 35)
(shim 37 39)
(shim 44 45)
(shim 47 62)
(shim 64 90)
(shim 93 93)
(shim 95 122)
(shim 123 123)
(shim 125 127)
==
++ les ;~(pose (shim 32 91) (shim 93 126))
++ lep ;~(pose (shim 32 45) (shim 46 90) (shim 95 126))
++ alm (shim 32 126)
++ alb ;~(pose (shim 32 92) (shim 94 126))
++ mis ;~(pose (shim 32 47) (shim 58 64) (shim 91 96) (shim 123 126))
::
++ anns :: top level rege parse
++ rags :: rege parsers
=> |%
++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char
++ les ;~(less bas asp) :: not backslash
++ lep ;~(less (mask "^[]\\") asp) :: charset non-control
++ asp (shim 32 126) :: printable ascii
++ alb ;~(less ser asp) :: charset literal char
++ mis ;~(less aln asp) :: non alphanumeric
--
|%
++ apex :: top level
%+ knee *rege |. ~+
;~ pose
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar anns))
(stag %eith ;~(plug (easy %empt) ;~(pfix bar anns)))
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex))
(stag %eith ;~(plug (easy %empt) ;~(pfix bar apex)))
(easy %empt)
==
::
@ -3759,10 +3751,10 @@
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
=+ bar=(foo tub)
?~(q.bar (chad tub) (fail tub))
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] anns))
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex))
%+ cook |=([a=rege] [%capt a 0])
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] anns)
(ifix [(jest '(?:') per] anns)
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex)
(ifix [(jest '(?:') per] apex)
(stag %brac ;~(pfix sel seac))
==
::
@ -3820,32 +3812,10 @@
%+ knee *@ |. ~+
;~ pose
unid
(cold lower (jest '[:lower:]'))
(cold upper (jest '[:upper:]'))
(cold digit (jest '[:digit:]'))
(cold print (jest '[:print:]'))
(cold graph (jest '[:graph:]'))
(cold blank (jest '[:blank:]'))
(cold space (jest '[:space:]'))
(cold cntrl (jest '[:cntrl:]'))
(cold alpha (jest '[:alpha:]'))
(cold xdigit (jest '[:xdigit:]'))
(cold alnum (jest '[:alnum:]'))
(cold punct (jest '[:punct:]'))
(cold (flap lower) (jest '[:^lower:]'))
(cold (flap upper) (jest '[:^upper:]'))
(cold (flap digit) (jest '[:^digit:]'))
(cold (flap print) (jest '[:^print:]'))
(cold (flap graph) (jest '[:^graph:]'))
(cold (flap blank) (jest '[:^blank:]'))
(cold (flap space) (jest '[:^space:]'))
(cold (flap cntrl) (jest '[:^cntrl:]'))
(cold (flap alpha) (jest '[:^alpha:]'))
(cold (flap xdigit) (jest '[:^xdigit:]'))
(cold (flap alnum) (jest '[:^alnum:]'))
(cold (flap punct) (jest '[:^punct:]'))
%+ ifix (jest '[:')^(jest ':]')
;~(pose ;~(pfix ket (cook flap chas)) chas)
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
;~(plug alm ;~(pfix hep alb))
;~(plug asp ;~(pfix hep alb))
|= tub=nail
?~ q.tub
(fail tub)
@ -3869,25 +3839,33 @@
(cook |=(a=char (tape [a ~])) next)
(full (easy ~))
==
::
++ lower (ranc 'a' 'z')
++ upper (ranc 'A' 'Z')
++ digit (ranc '0' '9')
++ print (ranc 32 126)
++ graph (ranc 33 126)
++ blank (con (bex 32) (bex 9))
++ space :(con (ranc 9 13) (bex ' '))
++ cntrl :(con (ranc 0 31) (bex 127))
++ alpha :(con lower upper)
++ chas :: ascii character set
=- (sear ~(get by -) sym)
%- mo ^- (list ,[@tas @I])
:~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl
digit/digit graph/graph lower/lower print/print punct/punct
space/space upper/upper word/wordc xdigit/xdigit
==
:: Character sets
++ alnum :(con lower upper digit)
++ alpha :(con lower upper)
++ ascii (ranc 0 127)
++ blank (con (bex 32) (bex 9))
++ cntrl :(con (ranc 0 31) (bex 127))
++ digit (ranc '0' '9')
++ graph (ranc 33 126)
++ lower (ranc 'a' 'z')
++ print (ranc 32 126)
++ punct ;: con
(ranc '!' '/')
(ranc ':' '@')
(ranc '[' '`')
(ranc '{' '~')
==
++ wordc :(con digit lower upper (bex 95))
++ space :(con (ranc 9 13) (bex ' '))
++ upper (ranc 'A' 'Z')
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
++ wordc :(con digit lower upper (bex '_'))
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
::
++ chad
@ -3944,37 +3922,41 @@
(cold wordc (jest '\\w'))
(cold (flap wordc) (jest '\\W'))
==
--
::
++ ra :: regex engine
|_ a=rege
++ proc :: capture numbering
|= [a=rege b=@]
^- [@ rege]
|= b=@
=- -(+ +>.$(a a))
^- [p=@ a=rege]
?- a
[%capt *] =+ foo=$(a p.a, b +(b))
[-.foo [%capt +.foo b]]
[p.foo [%capt a.foo b]]
[%eith *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b -.foo)
[-.bar [%eith +.foo +.bar]]
=+ bar=$(a q.a, b p.foo)
[p.bar [%eith a.foo a.bar]]
[%pair *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b -.foo)
[-.bar [%pair +.foo +.bar]]
=+ bar=$(a q.a, b p.foo)
[p.bar [%pair a.foo a.bar]]
[%manl *] =+ foo=$(a p.a)
[-.foo [%manl +.foo]]
[p.foo [%manl a.foo]]
[%plll *] =+ foo=$(a p.a)
[-.foo [%plll +.foo]]
[p.foo [%plll a.foo]]
[%binl *] =+ foo=$(a p.a)
[-.foo [%binl +.foo q.a]]
[p.foo [%binl a.foo q.a]]
[%betl *] =+ foo=$(a p.a)
[-.foo [%betl +.foo q.a r.a]]
[p.foo [%betl a.foo q.a r.a]]
[%mant *] =+ foo=$(a p.a)
[-.foo [%mant +.foo]]
[p.foo [%mant a.foo]]
[%plls *] =+ foo=$(a p.a)
[-.foo [%plls +.foo]]
[p.foo [%plls a.foo]]
[%bant *] =+ foo=$(a p.a)
[-.foo [%bant +.foo q.a]]
[p.foo [%bant a.foo q.a]]
[%bint *] =+ foo=$(a p.a)
[-.foo [%bint +.foo q.a]]
[p.foo [%bint a.foo q.a]]
[%betw *] =+ foo=$(a p.a)
[-.foo [%betw +.foo q.a r.a]]
[p.foo [%betw a.foo q.a r.a]]
* [b a]
==
::
@ -3990,26 +3972,26 @@
[~ [[a -.u.b] +.u.b]]
::
++ matc
|= [a=rege b=tape c=tape]
|= [b=tape c=tape]
^- (unit (map ,@u tape))
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep a b %empt c)
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c)
(bind foo |*(a=^ (~(put by +.a) 0 -.a)))
::
++ chet
|= [a=(unit ,[tape (map ,@u tape)]) b=rege c=tape d=tape]
|= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape]
^- (unit ,[tape (map ,@u tape)])
?~ a
a
=+ ft=u.a
?~ -.ft
a
=+ bar=(deep b (slag (lent -.ft) c) %empt d)
?~ b
b
?~ -.u.b
b
=+ bar=(deep (slag (lent -.u.b) c) %empt d)
?~ bar
bar
a
b
++ blak (some ["" _(map ,@u tape)])
++ word |=(a=char =((dis wordc:rags (bex a)) 0))
++ deep
|= [a=rege b=tape c=rege d=tape]
|= [b=tape c=rege d=tape]
^- (unit ,[tape (map ,@u tape)])
?- a
%dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)]))
@ -4020,15 +4002,15 @@
?: =(b d)
&
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
=((dis wordc (bex -.foo)) 0)
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0))
(word -.foo)
=+ cuc=?~(b & (word -.b))
?:(!=(luc cuc) blak ~)
%bout =+ ^= luc
?: =(b d)
&
=+ foo=(slag (dec (sub (lent d) (lent b))) d)
=((dis wordc (bex -.foo)) 0)
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0))
(word -.foo)
=+ cuc=?~(b & (word -.b))
?:(=(luc cuc) blak ~)
[%capt *] =+ foo=$(a p.a)
?~ foo
@ -4044,8 +4026,8 @@
?. =((dis (bex `@`i.b) p.a) 0)
(some [[i.b ~] _(map ,@u tape)])
~
[%eith *] =+ foo=(chet $(a p.a) c b d)
=+ bar=(chet $(a q.a) c b d)
[%eith *] =+ foo=(chet(a c) $(a p.a) b d)
=+ bar=(chet(a c) $(a q.a) b d)
?~ foo
bar
?~ bar
@ -4101,6 +4083,7 @@
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
==
--
::
++ rexp :: Regex match
~/ %rexp
@ -4116,15 +4099,15 @@
$(a (dec a))
=+ par=(pars a)
?~ par ~
=+ poc=(proc u.par 1)
=+ poc=(~(proc ra u.par) 1)
=+ c=b
|-
=+ foo=(matc +.poc c b)
=+ foo=(matc:poc c b)
?~ foo
?~ c
[~ ~]
$(c t.c)
[~ [~ (bar (dec -.poc) u.foo)]]
[~ [~ (bar (dec p.poc) u.foo)]]
::
++ repg :: Global regex replace
~/ %repg
@ -4132,12 +4115,12 @@
^- (unit tape)
=+ par=(pars a)
?~ par ~
=+ poc=(proc u.par 1)
=+ poc=(~(proc ra u.par) 1)
=+ d=b
:- ~
|-
^- tape
=+ foo=(matc +.poc d b)
=+ foo=(matc:poc d b)
?~ foo
?~ d
~