Add validation functions for UTF8

This commit is contained in:
Alex Shelkovnykov 2023-04-10 16:17:47 -06:00
parent 3d93e9f527
commit 1472dadf4d

View File

@ -4503,6 +4503,106 @@
:: :: ++wish:de-json:html :: :: ++wish:de-json:html
++ wish :: with whitespace ++ wish :: with whitespace
|*(sef=rule ;~(pfix spac sef)) |*(sef=rule ;~(pfix spac sef))
:: XX: These gates should be moved to hoon.hoon
:: :: ++sune:de-json:html
++ sune :: cord UTF-8 sanity
|= b=@t
^- ?
?: =(0 b) &
?. (sung b) |
$(b (rsh [3 (teff b)] b))
:: :: ++sung:de-json:html
++ sung :: char UTF-8 sanity
|^ |= b=@t
^- ?
=+ len=(teff b)
?: =(4 len) (quad b)
?: =(3 len) (tres b)
?: =(2 len) (dos b)
(lte (end 3 b) 127)
::
++ dos
|= b=@t
^- ?
=+ :- one=(cut 3 [0 1] b)
two=(cut 3 [1 1] b)
?& (rang one 194 223)
(cont two)
==
::
++ tres
|= b=@t
^- ?
=+ :+ one=(cut 3 [0 1] b)
two=(cut 3 [1 1] b)
tre=(cut 3 [2 1] b)
?&
?|
?& |((rang one 225 236) (rang one 238 239))
(cont two)
==
::
?& =(224 one)
(rang two 160 191)
==
::
?& =(237 one)
(rang two 128 159)
==
==
::
(cont tre)
==
::
++ quad
|= b=@t
^- ?
=+ :^ one=(cut 3 [0 1] b)
two=(cut 3 [1 1] b)
tre=(cut 3 [2 1] b)
for=(cut 3 [3 1] b)
?&
?|
?& (rang one 241 243)
(cont two)
==
::
?& =(240 one)
(rang two 144 191)
==
::
?& =(244 one)
(rang two 128 143)
==
==
::
(cont tre)
(cont for)
==
::
++ cont
|= a=@
^- ?
(rang a 128 191)
::
++ rang
|= [a=@ bot=@ top=@]
^- ?
?> (lte bot top)
&((gte a bot) (lte a top))
--
:: XX: This +teff should overwrite the existing +teff
:: :: ++teff:de-json:html
++ teff :: UTF-8 length
|= a=@t
^- @
=+ b=(end 3 a)
?: =(0 b)
?> =(`@`0 a) 0
?: (lte b 127) 1
?: (lte b 223) 2
?: (lte b 239) 3
4
-- ::de-json -- ::de-json
:: :: ++en-xml:html :: :: ++en-xml:html
++ en-xml :: xml printer ++ en-xml :: xml printer