Fix crash reported in #923.

This commit is contained in:
Jorge Acereda 2020-11-16 23:46:16 +01:00
parent 9629195a62
commit 2d91f49600
3 changed files with 16 additions and 5 deletions

View File

@ -134,9 +134,16 @@ Array String_chars(const String *ps) {
chars.capacity = l;
data = CARP_MALLOC(chars.capacity * sizeof(*data));
for (size_t si = 0, di = 0; di < l; si++) {
if (!utf8decode(&state, &cp, us[si])) {
data[di++] = cp;
cp = 0;
uint32_t r = utf8decode(&state, &cp, us[si]);
switch (r) {
case UTF8_ACCEPT:
data[di++] = cp;
cp = 0;
break;
case UTF8_REJECT:
data[di++] = 0xfffd; // REPLACEMENT CHARACTER
cp = 0;
break;
}
}
chars.data = data;

View File

@ -49,10 +49,10 @@ static size_t utf8encode(char *s, uint32_t c) {
}
// Adapted from: http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
static const uint32_t UTF8_ACCEPT = 0;
static const uint32_t UTF8_REJECT = 12;
static uint32_t inline utf8decode(uint32_t *state, uint32_t *codep,
uint32_t byte) {
const uint32_t UTF8_ACCEPT = 0;
const uint32_t UTF8_REJECT = 12;
static const uint8_t utf8d[] = {
// clang-format off
// The first part of the table maps bytes to character classes that

View File

@ -311,4 +311,8 @@
"hellö"
&(from-bytes &[104b 101b 108b 108b 195b 182b])
"from-bytes works correctly")
(assert-equal test
&[\<5C> \<5C>]
&(chars &(from-bytes &[255b 255b]))
"check for invalid UTF-8 sequences")
)