mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-12-18 20:31:40 +03:00
Fix JSON parser decoding for escaped unicode
JSON supports Unicode as both UTF8 sequences and escaped UTF16. Unicode points U+10000 to U+10FFFF are encoded as two consecutive escaped UTF16 units known as a surrogate pair. The JSON decoder was previously treating surrogate pairs as two individual escaped UTF16 units. In addition, the JSON standard allows hexadecimal letters to be either uppercase or lowercase, which the parser was not respecting. See: - https://www.crockford.com/mckeeman.html - https://www.json.org/json-en.html - https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF Git issue: #1776
This commit is contained in:
parent
e4ddd6994c
commit
3d93e9f527
@ -4400,10 +4400,12 @@
|
||||
=* lip
|
||||
^- (list (pair @t @))
|
||||
[b+8 t+9 n+10 f+12 r+13 ~]
|
||||
=* wow `(map @t @)`(malt lip)
|
||||
=* wow
|
||||
^~
|
||||
^- (map @t @)
|
||||
(malt lip)
|
||||
(sear ~(get by wow) low)
|
||||
=* tuf ;~(pfix (just 'u') (cook tuft qix:ab))
|
||||
;~(pose doq fas bas loo tuf)
|
||||
;~(pose doq fas bas loo unic)
|
||||
==
|
||||
:: :: ++expo:de-json:html
|
||||
++ expo :: exponent
|
||||
@ -4455,6 +4457,49 @@
|
||||
:: :: ++spac:de-json:html
|
||||
++ spac :: whitespace
|
||||
(star (mask [`@`9 `@`10 `@`13 ' ' ~]))
|
||||
:: :: ++unic:de-json:html
|
||||
++ unic :: escaped UTF16
|
||||
=* lob 0x0
|
||||
=* hsb 0xd800
|
||||
=* lsb 0xdc00
|
||||
=* hib 0xe000
|
||||
=* hil 0x1.0000
|
||||
|^
|
||||
%+ cook
|
||||
|= a=@
|
||||
^- @t
|
||||
(tuft a)
|
||||
;~ pfix (just 'u')
|
||||
;~(pose solo pair)
|
||||
==
|
||||
++ quad :: parse num from 4 hex
|
||||
(bass 16 (stun [4 4] hit))
|
||||
++ meat :: gen gate for sear:
|
||||
|= [bot=@ux top=@ux flp=?] :: accept num in range,
|
||||
|= sur=@ux :: optionally reduce
|
||||
^- (unit @)
|
||||
?. &((gte sur bot) (lth sur top))
|
||||
~
|
||||
%- some
|
||||
?. flp sur
|
||||
(sub sur bot)
|
||||
++ solo :: single valid UTF16
|
||||
;~ pose
|
||||
(sear (meat lob hsb |) quad)
|
||||
(sear (meat hib hil |) quad)
|
||||
==
|
||||
++ pair :: UTF16 surrogate pair
|
||||
%+ cook
|
||||
|= [hig=@ low=@]
|
||||
^- @t
|
||||
:(add hil low (lsh [1 5] hig))
|
||||
;~ plug
|
||||
(sear (meat hsb lsb &) quad)
|
||||
;~ pfix (jest '\\u')
|
||||
(sear (meat lsb hib &) quad)
|
||||
==
|
||||
==
|
||||
--
|
||||
:: :: ++wish:de-json:html
|
||||
++ wish :: with whitespace
|
||||
|*(sef=rule ;~(pfix spac sef))
|
||||
|
Loading…
Reference in New Issue
Block a user