rege refactoring

This commit is contained in:
Anton Dyudin 2014-10-26 16:16:07 -07:00 committed by Anton Dyudin
parent afaf131c72
commit b27098ab6c

View File

@ -132,7 +132,7 @@
$% [%lite p=char] :: literal $% [%lite p=char] :: literal
[%pair p=rege q=rege] :: ordering [%pair p=rege q=rege] :: ordering
[%capt p=rege q=@u] :: capture group [%capt p=rege q=@u] :: capture group
[%brac p=@] :: p is 256 bitmask [%brac p=@I] :: p is 256 bitmask
[%eith p=rege q=rege] :: either [%eith p=rege q=rege] :: either
[%mant p=rege] :: greedy 0 or more [%mant p=rege] :: greedy 0 or more
[%plls p=rege] :: greedy 1 or more [%plls p=rege] :: greedy 1 or more
@ -3677,34 +3677,26 @@
++ pars ++ pars
|= [a=tape] :: parse tape to rege |= [a=tape] :: parse tape to rege
^- (unit rege) ^- (unit rege)
=+ foo=((full anns) [[1 1] a]) =+ foo=((full apex:rags) [[1 1] a])
?~ q.foo ?~ q.foo
~ ~
[~ p.u.q.foo] [~ p.u.q.foo]
:: ::
++ nor ++ rags :: rege parsers
;~ pose => |%
(shim 1 35) ++ nor ;~(less (mask "^$()|*?+.[\\") (shim 1 127)) :: non-control char
(shim 37 39) ++ les ;~(less bas asp) :: not backslash
(shim 44 45) ++ lep ;~(less (mask "^[]\\") asp) :: charset non-control
(shim 47 62) ++ asp (shim 32 126) :: printable ascii
(shim 64 90) ++ alb ;~(less ser asp) :: charset literal char
(shim 93 93) ++ mis ;~(less aln asp) :: non alphanumeric
(shim 95 122) --
(shim 123 123) |%
(shim 125 127) ++ apex :: top level
==
++ les ;~(pose (shim 32 91) (shim 93 126))
++ lep ;~(pose (shim 32 45) (shim 46 90) (shim 95 126))
++ alm (shim 32 126)
++ alb ;~(pose (shim 32 92) (shim 94 126))
++ mis ;~(pose (shim 32 47) (shim 58 64) (shim 91 96) (shim 123 126))
::
++ anns :: top level rege parse
%+ knee *rege |. ~+ %+ knee *rege |. ~+
;~ pose ;~ pose
;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar anns)) ;~((bend |=(a=[rege rege] (some [%eith a]))) mall ;~(pfix bar apex))
(stag %eith ;~(plug (easy %empt) ;~(pfix bar anns))) (stag %eith ;~(plug (easy %empt) ;~(pfix bar apex)))
(easy %empt) (easy %empt)
== ==
:: ::
@ -3759,10 +3751,10 @@
=+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker))) =+ foo=;~(plug kel dim:ag ;~(pose ker (jest ',}') ;~(plug com dim:ag ker)))
=+ bar=(foo tub) =+ bar=(foo tub)
?~(q.bar (chad tub) (fail tub)) ?~(q.bar (chad tub) (fail tub))
(cook |=([a=rege] [%capt a 0]) (ifix [pel per] anns)) (cook |=([a=rege] [%capt a 0]) (ifix [pel per] apex))
%+ cook |=([a=rege] [%capt a 0]) %+ cook |=([a=rege] [%capt a 0])
(ifix [;~(plug (jest '(?P<') (plus aln) gar) per] anns) (ifix [;~(plug (jest '(?P<') (plus aln) gar) per] apex)
(ifix [(jest '(?:') per] anns) (ifix [(jest '(?:') per] apex)
(stag %brac ;~(pfix sel seac)) (stag %brac ;~(pfix sel seac))
== ==
:: ::
@ -3820,32 +3812,10 @@
%+ knee *@ |. ~+ %+ knee *@ |. ~+
;~ pose ;~ pose
unid unid
(cold lower (jest '[:lower:]')) %+ ifix (jest '[:')^(jest ':]')
(cold upper (jest '[:upper:]')) ;~(pose ;~(pfix ket (cook flap chas)) chas)
(cold digit (jest '[:digit:]'))
(cold print (jest '[:print:]'))
(cold graph (jest '[:graph:]'))
(cold blank (jest '[:blank:]'))
(cold space (jest '[:space:]'))
(cold cntrl (jest '[:cntrl:]'))
(cold alpha (jest '[:alpha:]'))
(cold xdigit (jest '[:xdigit:]'))
(cold alnum (jest '[:alnum:]'))
(cold punct (jest '[:punct:]'))
(cold (flap lower) (jest '[:^lower:]'))
(cold (flap upper) (jest '[:^upper:]'))
(cold (flap digit) (jest '[:^digit:]'))
(cold (flap print) (jest '[:^print:]'))
(cold (flap graph) (jest '[:^graph:]'))
(cold (flap blank) (jest '[:^blank:]'))
(cold (flap space) (jest '[:^space:]'))
(cold (flap cntrl) (jest '[:^cntrl:]'))
(cold (flap alpha) (jest '[:^alpha:]'))
(cold (flap xdigit) (jest '[:^xdigit:]'))
(cold (flap alnum) (jest '[:^alnum:]'))
(cold (flap punct) (jest '[:^punct:]'))
%+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b)))) %+ sear |=([a=@ b=@] ?:((gth a b) ~ (some (ranc a b))))
;~(plug alm ;~(pfix hep alb)) ;~(plug asp ;~(pfix hep alb))
|= tub=nail |= tub=nail
?~ q.tub ?~ q.tub
(fail tub) (fail tub)
@ -3869,25 +3839,33 @@
(cook |=(a=char (tape [a ~])) next) (cook |=(a=char (tape [a ~])) next)
(full (easy ~)) (full (easy ~))
== ==
:: ++ chas :: ascii character set
++ lower (ranc 'a' 'z') =- (sear ~(get by -) sym)
++ upper (ranc 'A' 'Z') %- mo ^- (list ,[@tas @I])
++ digit (ranc '0' '9') :~ alnum/alnum alpha/alpha ascii/ascii blank/blank cntrl/cntrl
++ print (ranc 32 126) digit/digit graph/graph lower/lower print/print punct/punct
++ graph (ranc 33 126) space/space upper/upper word/wordc xdigit/xdigit
++ blank (con (bex 32) (bex 9)) ==
++ space :(con (ranc 9 13) (bex ' ')) :: Character sets
++ cntrl :(con (ranc 0 31) (bex 127))
++ alpha :(con lower upper)
++ alnum :(con lower upper digit) ++ alnum :(con lower upper digit)
++ alpha :(con lower upper)
++ ascii (ranc 0 127)
++ blank (con (bex 32) (bex 9))
++ cntrl :(con (ranc 0 31) (bex 127))
++ digit (ranc '0' '9')
++ graph (ranc 33 126)
++ lower (ranc 'a' 'z')
++ print (ranc 32 126)
++ punct ;: con ++ punct ;: con
(ranc '!' '/') (ranc '!' '/')
(ranc ':' '@') (ranc ':' '@')
(ranc '[' '`') (ranc '[' '`')
(ranc '{' '~') (ranc '{' '~')
== ==
++ wordc :(con digit lower upper (bex 95)) ++ space :(con (ranc 9 13) (bex ' '))
++ upper (ranc 'A' 'Z')
++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13)) ++ white :(con (bex ' ') (ranc 9 10) (ranc 12 13))
++ wordc :(con digit lower upper (bex '_'))
++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit) ++ xdigit :(con (ranc 'a' 'f') (ranc 'A' 'F') digit)
:: ::
++ chad ++ chad
@ -3944,37 +3922,41 @@
(cold wordc (jest '\\w')) (cold wordc (jest '\\w'))
(cold (flap wordc) (jest '\\W')) (cold (flap wordc) (jest '\\W'))
== ==
--
:: ::
++ ra :: regex engine
|_ a=rege
++ proc :: capture numbering ++ proc :: capture numbering
|= [a=rege b=@] |= b=@
^- [@ rege] =- -(+ +>.$(a a))
^- [p=@ a=rege]
?- a ?- a
[%capt *] =+ foo=$(a p.a, b +(b)) [%capt *] =+ foo=$(a p.a, b +(b))
[-.foo [%capt +.foo b]] [p.foo [%capt a.foo b]]
[%eith *] =+ foo=$(a p.a) [%eith *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b -.foo) =+ bar=$(a q.a, b p.foo)
[-.bar [%eith +.foo +.bar]] [p.bar [%eith a.foo a.bar]]
[%pair *] =+ foo=$(a p.a) [%pair *] =+ foo=$(a p.a)
=+ bar=$(a q.a, b -.foo) =+ bar=$(a q.a, b p.foo)
[-.bar [%pair +.foo +.bar]] [p.bar [%pair a.foo a.bar]]
[%manl *] =+ foo=$(a p.a) [%manl *] =+ foo=$(a p.a)
[-.foo [%manl +.foo]] [p.foo [%manl a.foo]]
[%plll *] =+ foo=$(a p.a) [%plll *] =+ foo=$(a p.a)
[-.foo [%plll +.foo]] [p.foo [%plll a.foo]]
[%binl *] =+ foo=$(a p.a) [%binl *] =+ foo=$(a p.a)
[-.foo [%binl +.foo q.a]] [p.foo [%binl a.foo q.a]]
[%betl *] =+ foo=$(a p.a) [%betl *] =+ foo=$(a p.a)
[-.foo [%betl +.foo q.a r.a]] [p.foo [%betl a.foo q.a r.a]]
[%mant *] =+ foo=$(a p.a) [%mant *] =+ foo=$(a p.a)
[-.foo [%mant +.foo]] [p.foo [%mant a.foo]]
[%plls *] =+ foo=$(a p.a) [%plls *] =+ foo=$(a p.a)
[-.foo [%plls +.foo]] [p.foo [%plls a.foo]]
[%bant *] =+ foo=$(a p.a) [%bant *] =+ foo=$(a p.a)
[-.foo [%bant +.foo q.a]] [p.foo [%bant a.foo q.a]]
[%bint *] =+ foo=$(a p.a) [%bint *] =+ foo=$(a p.a)
[-.foo [%bint +.foo q.a]] [p.foo [%bint a.foo q.a]]
[%betw *] =+ foo=$(a p.a) [%betw *] =+ foo=$(a p.a)
[-.foo [%betw +.foo q.a r.a]] [p.foo [%betw a.foo q.a r.a]]
* [b a] * [b a]
== ==
:: ::
@ -3990,26 +3972,26 @@
[~ [[a -.u.b] +.u.b]] [~ [[a -.u.b] +.u.b]]
:: ::
++ matc ++ matc
|= [a=rege b=tape c=tape] |= [b=tape c=tape]
^- (unit (map ,@u tape)) ^- (unit (map ,@u tape))
=+ foo=`(unit ,[tape (map ,@u tape)])`(deep a b %empt c) =+ foo=`(unit ,[tape (map ,@u tape)])`(deep b %empt c)
(bind foo |*(a=^ (~(put by +.a) 0 -.a))) (bind foo |*(a=^ (~(put by +.a) 0 -.a)))
:: ::
++ chet ++ chet
|= [a=(unit ,[tape (map ,@u tape)]) b=rege c=tape d=tape] |= [b=(unit ,[tape (map ,@u tape)]) c=tape d=tape]
^- (unit ,[tape (map ,@u tape)]) ^- (unit ,[tape (map ,@u tape)])
?~ a ?~ b
a b
=+ ft=u.a ?~ -.u.b
?~ -.ft b
a =+ bar=(deep (slag (lent -.u.b) c) %empt d)
=+ bar=(deep b (slag (lent -.ft) c) %empt d)
?~ bar ?~ bar
bar bar
a b
++ blak (some ["" _(map ,@u tape)]) ++ blak (some ["" _(map ,@u tape)])
++ word |=(a=char =((dis wordc:rags (bex a)) 0))
++ deep ++ deep
|= [a=rege b=tape c=rege d=tape] |= [b=tape c=rege d=tape]
^- (unit ,[tape (map ,@u tape)]) ^- (unit ,[tape (map ,@u tape)])
?- a ?- a
%dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)])) %dote ?~(b ~ (some [[i.b ~] _(map ,@u tape)]))
@ -4020,15 +4002,15 @@
?: =(b d) ?: =(b d)
& &
=+ foo=(slag (dec (sub (lent d) (lent b))) d) =+ foo=(slag (dec (sub (lent d) (lent b))) d)
=((dis wordc (bex -.foo)) 0) (word -.foo)
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0)) =+ cuc=?~(b & (word -.b))
?:(!=(luc cuc) blak ~) ?:(!=(luc cuc) blak ~)
%bout =+ ^= luc %bout =+ ^= luc
?: =(b d) ?: =(b d)
& &
=+ foo=(slag (dec (sub (lent d) (lent b))) d) =+ foo=(slag (dec (sub (lent d) (lent b))) d)
=((dis wordc (bex -.foo)) 0) (word -.foo)
=+ cuc=?~(b & =((dis wordc (bex -.b)) 0)) =+ cuc=?~(b & (word -.b))
?:(=(luc cuc) blak ~) ?:(=(luc cuc) blak ~)
[%capt *] =+ foo=$(a p.a) [%capt *] =+ foo=$(a p.a)
?~ foo ?~ foo
@ -4044,8 +4026,8 @@
?. =((dis (bex `@`i.b) p.a) 0) ?. =((dis (bex `@`i.b) p.a) 0)
(some [[i.b ~] _(map ,@u tape)]) (some [[i.b ~] _(map ,@u tape)])
~ ~
[%eith *] =+ foo=(chet $(a p.a) c b d) [%eith *] =+ foo=(chet(a c) $(a p.a) b d)
=+ bar=(chet $(a q.a) c b d) =+ bar=(chet(a c) $(a q.a) b d)
?~ foo ?~ foo
bar bar
?~ bar ?~ bar
@ -4101,6 +4083,7 @@
$(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]]) $(a [%eith %empt [%pair p.a [%betl p.a 0 (dec r.a)]]])
$(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]]) $(a [%pair p.a [%betl p.a (dec q.a) (dec r.a)]])
== ==
--
:: ::
++ rexp :: Regex match ++ rexp :: Regex match
~/ %rexp ~/ %rexp
@ -4116,15 +4099,15 @@
$(a (dec a)) $(a (dec a))
=+ par=(pars a) =+ par=(pars a)
?~ par ~ ?~ par ~
=+ poc=(proc u.par 1) =+ poc=(~(proc ra u.par) 1)
=+ c=b =+ c=b
|- |-
=+ foo=(matc +.poc c b) =+ foo=(matc:poc c b)
?~ foo ?~ foo
?~ c ?~ c
[~ ~] [~ ~]
$(c t.c) $(c t.c)
[~ [~ (bar (dec -.poc) u.foo)]] [~ [~ (bar (dec p.poc) u.foo)]]
:: ::
++ repg :: Global regex replace ++ repg :: Global regex replace
~/ %repg ~/ %repg
@ -4132,12 +4115,12 @@
^- (unit tape) ^- (unit tape)
=+ par=(pars a) =+ par=(pars a)
?~ par ~ ?~ par ~
=+ poc=(proc u.par 1) =+ poc=(~(proc ra u.par) 1)
=+ d=b =+ d=b
:- ~ :- ~
|- |-
^- tape ^- tape
=+ foo=(matc +.poc d b) =+ foo=(matc:poc d b)
?~ foo ?~ foo
?~ d ?~ d
~ ~