mirror of
https://github.com/kovidgoyal/kitty.git
synced 2024-11-11 01:28:19 +03:00
Represent malformed UTF-8 with the replacement character
This commit is contained in:
parent
8a83014f51
commit
969bd05fc5
@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
// Macros {{{
|
// Macros {{{
|
||||||
|
|
||||||
#define SET_STATE(state) self->vte_state = state; self->parser_buf_pos = 0; self->utf8_state = UTF8_ACCEPT;
|
#define SET_STATE(x) self->vte_state = VTE_##x; self->parser_buf_pos = 0; self->utf8.state = UTF8_ACCEPT; self->utf8.prev = UTF8_ACCEPT;
|
||||||
|
|
||||||
#define IS_DIGIT \
|
#define IS_DIGIT \
|
||||||
case '0': \
|
case '0': \
|
||||||
@ -145,7 +145,7 @@ typedef struct PS {
|
|||||||
id_type window_id;
|
id_type window_id;
|
||||||
|
|
||||||
unsigned parser_buf_pos;
|
unsigned parser_buf_pos;
|
||||||
UTF8State utf8_state;
|
struct { UTF8State prev, state; } utf8;
|
||||||
VTEState vte_state;
|
VTEState vte_state;
|
||||||
|
|
||||||
// this is used only during dispatch of a single byte, its present here just to avoid adding an extra parameter to accumulate_osc()
|
// this is used only during dispatch of a single byte, its present here just to avoid adding an extra parameter to accumulate_osc()
|
||||||
@ -173,15 +173,21 @@ typedef struct PS {
|
|||||||
static void
|
static void
|
||||||
draw_byte(PS *self, uint8_t b) {
|
draw_byte(PS *self, uint8_t b) {
|
||||||
uint32_t ch;
|
uint32_t ch;
|
||||||
switch (decode_utf8(&self->utf8_state, &ch, b)) {
|
switch (decode_utf8(&self->utf8.state, &ch, b)) {
|
||||||
case UTF8_ACCEPT:
|
case UTF8_ACCEPT:
|
||||||
REPORT_DRAW(ch);
|
REPORT_DRAW(ch);
|
||||||
screen_draw(self->screen, ch, true);
|
screen_draw(self->screen, ch, true);
|
||||||
break;
|
break;
|
||||||
case UTF8_REJECT:
|
case UTF8_REJECT: {
|
||||||
self->utf8_state = UTF8_ACCEPT;
|
bool prev_was_accept = self->utf8.prev == UTF8_ACCEPT;
|
||||||
break;
|
self->utf8.state = UTF8_ACCEPT; self->utf8.prev = UTF8_ACCEPT;
|
||||||
|
ch = 0xfffd; // unicode replacement char
|
||||||
|
REPORT_DRAW(ch);
|
||||||
|
screen_draw(self->screen, ch, true);
|
||||||
|
if (!prev_was_accept) draw_byte(self, b);
|
||||||
|
} break;
|
||||||
}
|
}
|
||||||
|
self->utf8.prev = self->utf8.state;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -206,7 +212,7 @@ dispatch_normal_mode_byte(PS *self) {
|
|||||||
case SO:
|
case SO:
|
||||||
REPORT_ERROR("Ignoring request to change charset as we only support UTF-8"); break;
|
REPORT_ERROR("Ignoring request to change charset as we only support UTF-8"); break;
|
||||||
case ESC:
|
case ESC:
|
||||||
SET_STATE(VTE_ESC); break;
|
SET_STATE(ESC); break;
|
||||||
case NUL:
|
case NUL:
|
||||||
case DEL:
|
case DEL:
|
||||||
break; // no-op
|
break; // no-op
|
||||||
@ -237,23 +243,23 @@ screen_nel(Screen *screen) { screen_carriage_return(screen); screen_linefeed(scr
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
dispatch_esc_mode_byte(PS *self) {
|
dispatch_esc_mode_byte(PS *self) {
|
||||||
#define CALL_ED(name) REPORT_COMMAND(name); name(self->screen); SET_STATE(VTE_NORMAL);
|
#define CALL_ED(name) REPORT_COMMAND(name); name(self->screen); SET_STATE(NORMAL);
|
||||||
#define CALL_ED1(name, ch) REPORT_COMMAND(name, ch); name(self->screen, ch); SET_STATE(VTE_NORMAL);
|
#define CALL_ED1(name, ch) REPORT_COMMAND(name, ch); name(self->screen, ch); SET_STATE(NORMAL);
|
||||||
#define CALL_ED2(name, a, b) REPORT_COMMAND(name, a, b); name(self->screen, a, b); SET_STATE(VTE_NORMAL);
|
#define CALL_ED2(name, a, b) REPORT_COMMAND(name, a, b); name(self->screen, a, b); SET_STATE(NORMAL);
|
||||||
uint8_t ch = self->input_data[self->input_pos++];
|
uint8_t ch = self->input_data[self->input_pos++];
|
||||||
switch(self->parser_buf_pos) {
|
switch(self->parser_buf_pos) {
|
||||||
case 0:
|
case 0:
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case ESC_DCS:
|
case ESC_DCS:
|
||||||
SET_STATE(VTE_DCS); break;
|
SET_STATE(DCS); break;
|
||||||
case ESC_OSC:
|
case ESC_OSC:
|
||||||
SET_STATE(VTE_OSC); break;
|
SET_STATE(OSC); break;
|
||||||
case ESC_CSI:
|
case ESC_CSI:
|
||||||
SET_STATE(VTE_CSI); break;
|
SET_STATE(CSI); break;
|
||||||
case ESC_APC:
|
case ESC_APC:
|
||||||
SET_STATE(VTE_APC); break;
|
SET_STATE(APC); break;
|
||||||
case ESC_PM:
|
case ESC_PM:
|
||||||
SET_STATE(VTE_PM); break;
|
SET_STATE(PM); break;
|
||||||
case ESC_RIS:
|
case ESC_RIS:
|
||||||
CALL_ED(screen_reset); break;
|
CALL_ED(screen_reset); break;
|
||||||
case ESC_IND:
|
case ESC_IND:
|
||||||
@ -277,7 +283,7 @@ dispatch_esc_mode_byte(PS *self) {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
REPORT_ERROR("%s0x%x", "Unknown char after ESC: ", ch);
|
REPORT_ERROR("%s0x%x", "Unknown char after ESC: ", ch);
|
||||||
SET_STATE(VTE_NORMAL); break;
|
SET_STATE(NORMAL); break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -325,7 +331,7 @@ dispatch_esc_mode_byte(PS *self) {
|
|||||||
default:
|
default:
|
||||||
REPORT_ERROR("Unhandled charset related escape code: 0x%x 0x%x", self->parser_buf[0], ch); break;
|
REPORT_ERROR("Unhandled charset related escape code: 0x%x 0x%x", self->parser_buf[0], ch); break;
|
||||||
}
|
}
|
||||||
SET_STATE(VTE_NORMAL);
|
SET_STATE(NORMAL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#undef CALL_ED
|
#undef CALL_ED
|
||||||
@ -552,7 +558,7 @@ END_ALLOW_CASE_RANGE
|
|||||||
if (self->parser_buf_pos > 0 && self->parser_buf[self->parser_buf_pos-1] == ESC) {
|
if (self->parser_buf_pos > 0 && self->parser_buf[self->parser_buf_pos-1] == ESC) {
|
||||||
if (ch == '\\') { self->parser_buf_pos--; return true; }
|
if (ch == '\\') { self->parser_buf_pos--; return true; }
|
||||||
REPORT_ERROR("DCS sequence contained ESC without trailing \\ at pos: %u ignoring the sequence", self->parser_buf_pos);
|
REPORT_ERROR("DCS sequence contained ESC without trailing \\ at pos: %u ignoring the sequence", self->parser_buf_pos);
|
||||||
SET_STATE(VTE_ESC); return false;
|
SET_STATE(ESC); return false;
|
||||||
}
|
}
|
||||||
if (self->parser_buf_pos >= PARSER_BUF_SZ - 1) {
|
if (self->parser_buf_pos >= PARSER_BUF_SZ - 1) {
|
||||||
REPORT_ERROR("DCS sequence too long, truncating.");
|
REPORT_ERROR("DCS sequence too long, truncating.");
|
||||||
@ -682,7 +688,7 @@ accumulate_csi(PS *self) {
|
|||||||
#define ENSURE_SPACE \
|
#define ENSURE_SPACE \
|
||||||
if (self->parser_buf_pos > PARSER_BUF_SZ - 1) { \
|
if (self->parser_buf_pos > PARSER_BUF_SZ - 1) { \
|
||||||
REPORT_ERROR("CSI sequence too long, ignoring"); \
|
REPORT_ERROR("CSI sequence too long, ignoring"); \
|
||||||
SET_STATE(VTE_NORMAL); \
|
SET_STATE(NORMAL); \
|
||||||
return false; \
|
return false; \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -701,7 +707,7 @@ accumulate_csi(PS *self) {
|
|||||||
case '=':
|
case '=':
|
||||||
if (self->parser_buf_pos != 0) {
|
if (self->parser_buf_pos != 0) {
|
||||||
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
||||||
SET_STATE(VTE_NORMAL);
|
SET_STATE(NORMAL);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ENSURE_SPACE;
|
ENSURE_SPACE;
|
||||||
@ -733,11 +739,11 @@ END_ALLOW_CASE_RANGE
|
|||||||
break;
|
break;
|
||||||
case NUL:
|
case NUL:
|
||||||
case DEL:
|
case DEL:
|
||||||
SET_STATE(VTE_NORMAL);
|
SET_STATE(NORMAL);
|
||||||
break; // no-op
|
break; // no-op
|
||||||
default:
|
default:
|
||||||
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
||||||
SET_STATE(VTE_NORMAL);
|
SET_STATE(NORMAL);
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1345,25 +1351,25 @@ accumulate_oth(PS *self) {
|
|||||||
dispatch##_esc_mode_byte(self); \
|
dispatch##_esc_mode_byte(self); \
|
||||||
break; \
|
break; \
|
||||||
case VTE_CSI: \
|
case VTE_CSI: \
|
||||||
if (accumulate_csi(self)) { dispatch##_csi(self); SET_STATE(VTE_NORMAL); watch_for_pending; } \
|
if (accumulate_csi(self)) { dispatch##_csi(self); SET_STATE(NORMAL); watch_for_pending; } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_OSC: \
|
case VTE_OSC: \
|
||||||
if (accumulate_osc(self)) { \
|
if (accumulate_osc(self)) { \
|
||||||
dispatch##_osc(self); \
|
dispatch##_osc(self); \
|
||||||
if (self->extended_osc_code) { \
|
if (self->extended_osc_code) { \
|
||||||
self->input_pos--; \
|
self->input_pos--; \
|
||||||
if (accumulate_osc(self)) { dispatch##_osc(self); SET_STATE(VTE_NORMAL); } \
|
if (accumulate_osc(self)) { dispatch##_osc(self); SET_STATE(NORMAL); } \
|
||||||
} else { SET_STATE(VTE_NORMAL); } \
|
} else { SET_STATE(NORMAL); } \
|
||||||
} \
|
} \
|
||||||
break; \
|
break; \
|
||||||
case VTE_APC: \
|
case VTE_APC: \
|
||||||
if (accumulate_oth(self)) { dispatch##_apc(self); SET_STATE(VTE_NORMAL); } \
|
if (accumulate_oth(self)) { dispatch##_apc(self); SET_STATE(NORMAL); } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_PM: \
|
case VTE_PM: \
|
||||||
if (accumulate_oth(self)) { dispatch##_pm(self); SET_STATE(VTE_NORMAL); } \
|
if (accumulate_oth(self)) { dispatch##_pm(self); SET_STATE(NORMAL); } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_DCS: \
|
case VTE_DCS: \
|
||||||
if (accumulate_dcs(self)) { dispatch##_dcs(self); SET_STATE(VTE_NORMAL); watch_for_pending; } \
|
if (accumulate_dcs(self)) { dispatch##_dcs(self); SET_STATE(NORMAL); watch_for_pending; } \
|
||||||
if (self->vte_state == ESC) { self->input_pos--; dispatch##_esc_mode_byte(self); } \
|
if (self->vte_state == ESC) { self->input_pos--; dispatch##_esc_mode_byte(self); } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_NORMAL: \
|
case VTE_NORMAL: \
|
||||||
@ -1389,7 +1395,7 @@ pending_normal_mode_byte(PS *self) {
|
|||||||
uint8_t ch = self->input_data[self->input_pos++];
|
uint8_t ch = self->input_data[self->input_pos++];
|
||||||
switch(ch) {
|
switch(ch) {
|
||||||
case ESC:
|
case ESC:
|
||||||
SET_STATE(VTE_ESC); break;
|
SET_STATE(ESC); break;
|
||||||
default:
|
default:
|
||||||
ensure_pending_space(self, 1);
|
ensure_pending_space(self, 1);
|
||||||
self->pending_mode.buf[self->pending_mode.used++] = ch;
|
self->pending_mode.buf[self->pending_mode.used++] = ch;
|
||||||
@ -1405,20 +1411,20 @@ pending_esc_mode_byte(PS *self) {
|
|||||||
self->pending_mode.buf[self->pending_mode.used++] = ESC;
|
self->pending_mode.buf[self->pending_mode.used++] = ESC;
|
||||||
self->pending_mode.buf[self->pending_mode.used++] = self->parser_buf[self->parser_buf_pos - 1];
|
self->pending_mode.buf[self->pending_mode.used++] = self->parser_buf[self->parser_buf_pos - 1];
|
||||||
self->pending_mode.buf[self->pending_mode.used++] = ch;
|
self->pending_mode.buf[self->pending_mode.used++] = ch;
|
||||||
SET_STATE(VTE_NORMAL);
|
SET_STATE(NORMAL);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case ESC_DCS:
|
case ESC_DCS:
|
||||||
SET_STATE(VTE_DCS); break;
|
SET_STATE(DCS); break;
|
||||||
case ESC_OSC:
|
case ESC_OSC:
|
||||||
SET_STATE(VTE_OSC); break;
|
SET_STATE(OSC); break;
|
||||||
case ESC_CSI:
|
case ESC_CSI:
|
||||||
SET_STATE(VTE_CSI); break;
|
SET_STATE(CSI); break;
|
||||||
case ESC_APC:
|
case ESC_APC:
|
||||||
SET_STATE(VTE_APC); break;
|
SET_STATE(APC); break;
|
||||||
case ESC_PM:
|
case ESC_PM:
|
||||||
SET_STATE(VTE_PM); break;
|
SET_STATE(PM); break;
|
||||||
IS_ESCAPED_CHAR:
|
IS_ESCAPED_CHAR:
|
||||||
self->parser_buf[self->parser_buf_pos++] = ch;
|
self->parser_buf[self->parser_buf_pos++] = ch;
|
||||||
break;
|
break;
|
||||||
@ -1426,7 +1432,7 @@ pending_esc_mode_byte(PS *self) {
|
|||||||
ensure_pending_space(self, 2);
|
ensure_pending_space(self, 2);
|
||||||
self->pending_mode.buf[self->pending_mode.used++] = ESC;
|
self->pending_mode.buf[self->pending_mode.used++] = ESC;
|
||||||
self->pending_mode.buf[self->pending_mode.used++] = ch;
|
self->pending_mode.buf[self->pending_mode.used++] = ch;
|
||||||
SET_STATE(VTE_NORMAL); break;
|
SET_STATE(NORMAL); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1651,7 +1657,8 @@ free_vt_parser(Parser* self) {
|
|||||||
static void
|
static void
|
||||||
reset(PS *self) {
|
reset(PS *self) {
|
||||||
self->vte_state = VTE_NORMAL;
|
self->vte_state = VTE_NORMAL;
|
||||||
self->utf8_state = UTF8_ACCEPT;
|
self->utf8.state = UTF8_ACCEPT;
|
||||||
|
self->utf8.prev = UTF8_ACCEPT;
|
||||||
self->parser_buf_pos = 0;
|
self->parser_buf_pos = 0;
|
||||||
|
|
||||||
self->pending_mode.activated_at = 0;
|
self->pending_mode.activated_at = 0;
|
||||||
|
@ -84,6 +84,24 @@ class TestParser(BaseTest):
|
|||||||
c1_controls = '\x84\x85\x88\x8d\x8e\x8f\x90\x96\x97\x98\x9a\x9b\x9c\x9d\x9e\x9f'
|
c1_controls = '\x84\x85\x88\x8d\x8e\x8f\x90\x96\x97\x98\x9a\x9b\x9c\x9d\x9e\x9f'
|
||||||
pb(c1_controls, c1_controls)
|
pb(c1_controls, c1_controls)
|
||||||
self.assertFalse(str(s.line(1)) + str(s.line(2)) + str(s.line(3)))
|
self.assertFalse(str(s.line(1)) + str(s.line(2)) + str(s.line(3)))
|
||||||
|
pb('😀'.encode()[:-1])
|
||||||
|
pb('\x1b\x1b%a', ('Unknown char after ESC: 0x1b',), ('draw', '%a'))
|
||||||
|
|
||||||
|
def test_utf8_parsing(self):
|
||||||
|
s = self.create_screen()
|
||||||
|
pb = partial(self.parse_bytes_dump, s)
|
||||||
|
pb(b'"\xbf"', '"\ufffd"')
|
||||||
|
pb(b'"\x80"', '"\ufffd"')
|
||||||
|
pb(b'"\x80\xbf"', '"\ufffd\ufffd"')
|
||||||
|
pb(b'"\x80\xbf\x80"', '"\ufffd\ufffd\ufffd"')
|
||||||
|
pb(b'"\xc0 "', '"\ufffd "')
|
||||||
|
pb(b'"\xfe"', '"\ufffd"')
|
||||||
|
pb(b'"\xff"', '"\ufffd"')
|
||||||
|
pb(b'"\xff\xfe"', '"\ufffd\ufffd"')
|
||||||
|
pb(b'"\xfe\xfe\xff\xff"', '"\ufffd\ufffd\ufffd\ufffd"')
|
||||||
|
pb(b'"\xef\xbf"', '"\ufffd"')
|
||||||
|
pb(b'"\xe0\xa0"', '"\ufffd"')
|
||||||
|
pb(b'"\xf0\x9f\x98"', '"\ufffd"')
|
||||||
|
|
||||||
def test_esc_codes(self):
|
def test_esc_codes(self):
|
||||||
s = self.create_screen()
|
s = self.create_screen()
|
||||||
@ -395,6 +413,11 @@ class TestParser(BaseTest):
|
|||||||
pb('\033[?2026h\033', ('screen_set_mode', 2026, 1),)
|
pb('\033[?2026h\033', ('screen_set_mode', 2026, 1),)
|
||||||
s.set_pending_activated_at(0.00001)
|
s.set_pending_activated_at(0.00001)
|
||||||
pb(']8;;\x07', ('set_active_hyperlink', None, None))
|
pb(']8;;\x07', ('set_active_hyperlink', None, None))
|
||||||
|
pb('😀'.encode()[:-1])
|
||||||
|
pb('\033[?2026h', ('screen_set_mode', 2026, 1),)
|
||||||
|
pb('😀'.encode()[-1:])
|
||||||
|
pb('\033[?2026l', '\ufffd', ('screen_reset_mode', 2026, 1),)
|
||||||
|
pb('a', ('draw', 'a'))
|
||||||
|
|
||||||
def test_oth_codes(self):
|
def test_oth_codes(self):
|
||||||
s = self.create_screen()
|
s = self.create_screen()
|
||||||
|
Loading…
Reference in New Issue
Block a user