mirror of
https://github.com/HigherOrderCO/Bend.git
synced 2024-08-15 14:50:42 +03:00
Add utf8 and ascii encoding functions
This commit is contained in:
parent
78f0968281
commit
cc60c926a8
@ -175,3 +175,120 @@ hvm to_u24:
|
||||
# Casts any native number to an i24.
|
||||
hvm to_i24:
|
||||
($([i24] ret) ret)
|
||||
|
||||
# Encoding
|
||||
|
||||
Utf8/MAX_RUNE = '\u{0010FFFF}'
|
||||
Utf8/RUNE_ERROR = '\u{FFFD}'
|
||||
#Utf8/RUNE_SELF = 0x80
|
||||
#Utf8/MAX = 4
|
||||
|
||||
Utf8/t1 = 0b00000000
|
||||
Utf8/tx = 0b10000000
|
||||
Utf8/t2 = 0b11000000
|
||||
Utf8/t3 = 0b11100000
|
||||
Utf8/t4 = 0b11110000
|
||||
|
||||
Utf8/maskx = 0b00111111
|
||||
Utf8/mask2 = 0b00011111
|
||||
Utf8/mask3 = 0b00001111
|
||||
Utf8/mask4 = 0b00000111
|
||||
|
||||
Utf8/rune1max = (- (<< 1 7) 1)
|
||||
Utf8/rune2max = (- (<< 1 11) 1)
|
||||
Utf8/rune3max = (- (<< 1 16) 1)
|
||||
|
||||
Encoding/List/append x [] = [x]
|
||||
Encoding/List/append x (List/Cons y ys) = (List/Cons y (Encoding/List/append x ys))
|
||||
|
||||
String/append x "" = (String/Cons x String/Nil)
|
||||
String/append x (String/Cons y ys) = (String/Cons y (String/append x ys))
|
||||
|
||||
Bytes/decode_utf8 bytes acc =
|
||||
let (got, _len, rest) = (Utf8/decode_rune bytes)
|
||||
match rest {
|
||||
List/Nil: (String/append got acc)
|
||||
List/Cons: (Bytes/decode_utf8 rest (String/append got acc))
|
||||
}
|
||||
|
||||
Utf8/decode_rune [] = (0, 0, [])
|
||||
Utf8/decode_rune [a] = if (<= a 0x7F) { (a, 1, []) } else { (Utf8/RUNE_ERROR, 0, []) }
|
||||
Utf8/decode_rune [a, b] =
|
||||
if (<= a 0x7F) {
|
||||
(a, 1, [b])
|
||||
} else {
|
||||
if (== (& a 0xE0) 0xC0) {
|
||||
let r = (| (<< (& a Utf8/mask2) 6) (& b Utf8/maskx))
|
||||
(r, 2, [])
|
||||
} else {
|
||||
(Utf8/RUNE_ERROR, 0, [])
|
||||
}
|
||||
}
|
||||
Utf8/decode_rune [a, b, c] =
|
||||
if (<= a 0x7F) {
|
||||
(a, 1, [b, c])
|
||||
} else {
|
||||
if (== (& a 0xE0) 0xC0) {
|
||||
let r = (| (<< (& a Utf8/mask2) 6) (& b Utf8/maskx))
|
||||
(r, 2, [c])
|
||||
} else {
|
||||
if (== (& a 0xF0) 0xE0) {
|
||||
let r = (| (<< (& a Utf8/mask3) 12) (| (<< (& b Utf8/maskx) 6) (& c Utf8/maskx)))
|
||||
(r, 3, [])
|
||||
} else {
|
||||
(Utf8/RUNE_ERROR, 0, [])
|
||||
}
|
||||
}
|
||||
}
|
||||
Utf8/decode_rune (List/Cons a (List/Cons b (List/Cons c (List/Cons d rest)))) =
|
||||
if (<= a 0x7F) {
|
||||
(a, 1, (List/Cons b (List/Cons c (List/Cons d rest))))
|
||||
} else {
|
||||
if (== (& a 0xE0) 0xC0) {
|
||||
let r = (| (<< (& a Utf8/mask2) 6) (& b Utf8/maskx))
|
||||
(r, 2, (List/Cons c (List/Cons d rest)))
|
||||
} else {
|
||||
if (== (& a 0xF0) 0xE0) {
|
||||
let r = (| (<< (& a Utf8/mask3) 12) (| (<< (& b Utf8/maskx) 6) (& c Utf8/maskx)))
|
||||
(r, 3, (List/Cons d rest))
|
||||
} else {
|
||||
if (== (& a 0xF8) 0xF0) {
|
||||
let r = (| (<< (& a Utf8/mask4) 18) (| (<< (& b Utf8/maskx) 12) (| (<< (& c Utf8/maskx) 6) (& d Utf8/maskx))))
|
||||
(r, 4, [])
|
||||
} else {
|
||||
(Utf8/RUNE_ERROR, 0, rest)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String/encode_utf8 (String/Nil) acc = acc
|
||||
String/encode_utf8 (String/Cons x xs) acc =
|
||||
if (<= x Utf8/rune1max) {
|
||||
(String/encode_utf8 xs (Encoding/List/append x acc))
|
||||
} else {
|
||||
if (<= x Utf8/rune2max) {
|
||||
let b1 = (| Utf8/t2 (>> x 6))
|
||||
let b2 = (| Utf8/tx (& x Utf8/maskx))
|
||||
(String/encode_utf8 xs (Encoding/List/append b2 (Encoding/List/append b1 acc)))
|
||||
} else {
|
||||
if (<= x Utf8/rune3max) {
|
||||
let b1 = (| Utf8/t3 (>> x 12))
|
||||
let b2 = (| Utf8/tx (& (>> x 6) Utf8/maskx))
|
||||
let b3 = (| Utf8/tx (& x Utf8/maskx))
|
||||
(String/encode_utf8 xs (Encoding/List/append b3 (Encoding/List/append b2 (Encoding/List/append b1 acc))))
|
||||
} else {
|
||||
let b1 = (| Utf8/t4 (>> x 18))
|
||||
let b2 = (| Utf8/tx (& (>> x 12) Utf8/maskx))
|
||||
let b3 = (| Utf8/tx (& (>> x 6) Utf8/maskx))
|
||||
let b4 = (| Utf8/tx (& x Utf8/maskx))
|
||||
(String/encode_utf8 xs (Encoding/List/append b4 (Encoding/List/append b3 (Encoding/List/append b2 (Encoding/List/append b1 acc)))))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Bytes/decode_ascii (List/Cons x xs) = (String/Cons x (Bytes/decode_ascii xs))
|
||||
Bytes/decode_ascii (List/Nil) = (String/Nil)
|
||||
|
||||
String/encode_ascii (String/Cons x xs) = (List/Cons x (String/encode_ascii xs))
|
||||
String/encode_ascii (String/Nil) = (List/Nil)
|
||||
|
4
tests/golden_tests/run_file/encode_decode_utf8.bend
Normal file
4
tests/golden_tests/run_file/encode_decode_utf8.bend
Normal file
@ -0,0 +1,4 @@
|
||||
def main:
|
||||
use bytes = [72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140, 33]
|
||||
use s = "Hello, 世界!"
|
||||
return (String/encode_utf8(s, []), Bytes/decode_utf8(bytes, []))
|
9
tests/snapshots/run_file__encode_decode_utf8.bend.snap
Normal file
9
tests/snapshots/run_file__encode_decode_utf8.bend.snap
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
source: tests/golden_tests.rs
|
||||
input_file: tests/golden_tests/run_file/encode_decode_utf8.bend
|
||||
---
|
||||
NumScott:
|
||||
([72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140, 33], "Hello, 世界!")
|
||||
|
||||
Scott:
|
||||
([72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140, 33], "Hello, 世界!")
|
Loading…
Reference in New Issue
Block a user