diff --git a/kitty/screen.c b/kitty/screen.c index f0d7bb8e8..b92e36dd0 100644 --- a/kitty/screen.c +++ b/kitty/screen.c @@ -674,10 +674,28 @@ ensure_cursor_not_on_wide_char_trailer_for_insert(Screen *self, text_loop_state static void draw_text_loop(Screen *self, const uint32_t *chars, size_t num_chars, text_loop_state *s) { init_text_loop_line(self, s); - if (chars[0] < 0x7f || !is_combining_char(chars[0])) ensure_cursor_not_on_wide_char_trailer_for_insert(self, s); + if ((' ' >= chars[0] && chars[0] < 0x7f) || !is_combining_char(chars[0])) ensure_cursor_not_on_wide_char_trailer_for_insert(self, s); for (size_t i = 0; i < num_chars; i++) { uint32_t ch = chars[i]; - if (ch < ' ') continue; + if (ch < ' ') { + switch (ch) { + case BEL: + screen_bell(self); break; + case BS: + screen_backspace(self); break; + case HT: + screen_tab(self); break; + case LF: + case VT: + case FF: + screen_linefeed(self); init_text_loop_line(self, s); break; + case CR: + screen_carriage_return(self); break; + default: + break; + } + continue; + } int char_width = 1; if (ch > 0x7f) { // not printable ASCII if (is_ignored_char(ch)) continue; diff --git a/kitty/simd-string.c b/kitty/simd-string.c index 1dfb0c878..c7c751045 100644 --- a/kitty/simd-string.c +++ b/kitty/simd-string.c @@ -34,27 +34,26 @@ find_either_of_two_bytes(const uint8_t *haystack, const size_t sz, const uint8_t // UTF-8 {{{ -static unsigned +static bool utf8_decode_to_sentinel_scalar(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel) { - unsigned num_consumed = 0, num_output = 0; - while (num_consumed < src_sz && num_output < arraysz(d->output)) { - const uint8_t ch = src[num_consumed++]; - if (ch < ' ') { + d->output_sz = 0; d->num_consumed = 0; + while (d->num_consumed < src_sz && d->output_sz < arraysz(d->output)) { + const uint8_t ch = src[d->num_consumed++]; + if (ch == sentinel) { + if (d->state.cur != UTF8_ACCEPT) d->output[d->output_sz++] = 0xfffd; zero_at_ptr(&d->state); - if (num_output) { d->output_chars_callback(d->callback_data, d->output, num_output); num_output = 0; } - d->control_byte_callback(d->callback_data, ch); - if (ch == sentinel) break; + return true; } else { switch(decode_utf8(&d->state.cur, &d->state.codep, ch)) { case UTF8_ACCEPT: - d->output[num_output++] = d->state.codep; + d->output[d->output_sz++] = d->state.codep; break; case UTF8_REJECT: { const bool prev_was_accept = d->state.prev == UTF8_ACCEPT; zero_at_ptr(&d->state); - d->output[num_output++] = 0xfffd; - if (!prev_was_accept) { - num_consumed--; + d->output[d->output_sz++] = 0xfffd; + if (!prev_was_accept && d->num_consumed) { + d->num_consumed--; continue; // so that prev is correct } } break; @@ -62,13 +61,12 @@ utf8_decode_to_sentinel_scalar(UTF8Decoder *d, const uint8_t *src, const size_t } d->state.prev = d->state.cur; } - if (num_output) d->output_chars_callback(d->callback_data, d->output, num_output); - return num_consumed; + return false; } -static unsigned (*utf8_decode_to_sentinel_impl)(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel) = utf8_decode_to_sentinel_scalar; +static bool (*utf8_decode_to_sentinel_impl)(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel) = utf8_decode_to_sentinel_scalar; -unsigned +bool utf8_decode_to_sentinel(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel) { return utf8_decode_to_sentinel_impl(d, src, src_sz, sentinel); } @@ -76,22 +74,6 @@ utf8_decode_to_sentinel(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, // }}} // Boilerplate {{{ -static void -test_control_byte_callback(void *l, uint8_t ch) { - if (!PyErr_Occurred()) { - RAII_PyObject(c, PyLong_FromUnsignedLong((unsigned long)ch)); - if (c) PyList_Append((PyObject*)l, c); - } -} - -static void -test_output_chars_callback(void *l, const uint32_t *chars, unsigned sz) { - if (!PyErr_Occurred()) { - RAII_PyObject(c, PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, chars, (Py_ssize_t)sz)); - if (c) PyList_Append((PyObject*)l, c); - } -} - static PyObject* test_utf8_decode_to_sentinel(PyObject *self UNUSED, PyObject *args) { const uint8_t *src; Py_ssize_t src_sz; @@ -99,24 +81,20 @@ test_utf8_decode_to_sentinel(PyObject *self UNUSED, PyObject *args) { static UTF8Decoder d = {0}; unsigned char sentinel = 0x1b; if (!PyArg_ParseTuple(args, "s#|iB", &src, &src_sz, &which_function, &sentinel)) return NULL; - RAII_PyObject(ans, PyList_New(0)); - d.callback_data = ans; - d.control_byte_callback = test_control_byte_callback; - d.output_chars_callback = test_output_chars_callback; - unsigned long consumed; + bool found_sentinel = false; switch(which_function) { case -1: zero_at_ptr(&d); Py_RETURN_NONE; case 1: - consumed = utf8_decode_to_sentinel_scalar(&d, src, src_sz, sentinel); break; + found_sentinel = utf8_decode_to_sentinel_scalar(&d, src, src_sz, sentinel); break; case 2: - consumed = utf8_decode_to_sentinel_128(&d, src, src_sz, sentinel); break; + found_sentinel = utf8_decode_to_sentinel_128(&d, src, src_sz, sentinel); break; case 3: - consumed = utf8_decode_to_sentinel_256(&d, src, src_sz, sentinel); break; + found_sentinel = utf8_decode_to_sentinel_256(&d, src, src_sz, sentinel); break; default: - consumed = utf8_decode_to_sentinel(&d, src, src_sz, sentinel); break; + found_sentinel = utf8_decode_to_sentinel(&d, src, src_sz, sentinel); break; } - return Py_BuildValue("kO", consumed, ans); + return Py_BuildValue("ON", found_sentinel ? Py_True : Py_False, PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, d.output, d.output_sz)); } // }}} diff --git a/kitty/simd-string.h b/kitty/simd-string.h index df807ac70..2fe2d55d3 100644 --- a/kitty/simd-string.h +++ b/kitty/simd-string.h @@ -17,14 +17,12 @@ typedef void (*output_chars_callback)(void *data, const uint32_t *chars, unsigne typedef struct UTF8Decoder { alignas(512/8) uint32_t output[512/8]; // we can process at most 512 bits of input (AVX512) so we get at most 64 chars of output - struct { uint32_t cur, prev, codep; } state; + unsigned output_sz, num_consumed; - void *callback_data; - control_byte_callback control_byte_callback; - output_chars_callback output_chars_callback; + struct { uint32_t cur, prev, codep; } state; } UTF8Decoder; static inline void utf8_decoder_reset(UTF8Decoder *self) { zero_at_ptr(&self->state); } -unsigned utf8_decode_to_sentinel(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel); +bool utf8_decode_to_sentinel(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel); // Pass a PyModule PyObject* as the argument. Must be called once at application startup bool init_simd(void* module); diff --git a/kitty/vt-parser.c b/kitty/vt-parser.c index 518d1730b..c096b9af1 100644 --- a/kitty/vt-parser.c +++ b/kitty/vt-parser.c @@ -74,15 +74,6 @@ _report_params(PyObject *dump_callback, id_type window_id, const char *name, int Py_XDECREF(PyObject_CallFunction(dump_callback, "Kss", window_id, name, buf)); PyErr_Clear(); } -static void -_report_draw(PyObject *dump_callback, id_type window_id, const uint32_t *chars, unsigned num) { - RAII_PyObject(s, PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, chars, num)); - if (s) { - RAII_PyObject(t, PyObject_CallFunction(dump_callback, "KsO", window_id, "draw", s)); - if (t == NULL) PyErr_Clear(); - } -} - #define DUMP_UNUSED #define REPORT_ERROR(...) _report_error(self->dump_callback, self->window_id, __VA_ARGS__); @@ -100,7 +91,24 @@ _report_draw(PyObject *dump_callback, id_type window_id, const uint32_t *chars, #define REPORT_COMMAND(...) GET_MACRO(__VA_ARGS__, REPORT_COMMAND3, REPORT_COMMAND2, REPORT_COMMAND1, SENTINEL)(__VA_ARGS__) #define REPORT_VA_COMMAND(...) Py_XDECREF(PyObject_CallFunction(self->dump_callback, __VA_ARGS__)); PyErr_Clear(); -#define REPORT_DRAW(chars, num) _report_draw(self->dump_callback, self->window_id, chars, num); +#define REPORT_DRAW(chars, num) { \ + for (unsigned i = 0; i < num; i++) { \ + uint32_t ch = chars[i]; \ + switch(ch) { \ + case BEL: REPORT_COMMAND(screen_bell); break; \ + case BS: REPORT_COMMAND(screen_backspace); break; \ + case HT: REPORT_COMMAND(screen_tab); break; \ + case LF: case VT: case FF: REPORT_COMMAND(screen_linefeed); break; \ + case CR: REPORT_COMMAND(screen_carriage_return); break; \ + default: \ + if (ch >= ' ') { \ + RAII_PyObject(t, PyObject_CallFunction(self->dump_callback, "KsC", self->window_id, "draw", ch)); \ + if (t == NULL) PyErr_Clear(); \ + } \ + } \ + } \ +} + #define REPORT_PARAMS(name, params, num, is_group, region) _report_params(self->dump_callback, self->window_id, name, params, num_params, is_group, region) @@ -117,7 +125,7 @@ _report_draw(PyObject *dump_callback, id_type window_id, const uint32_t *chars, #define REPORT_ERROR(...) log_error(ERROR_PREFIX " " __VA_ARGS__); #define REPORT_COMMAND(...) #define REPORT_VA_COMMAND(...) -#define REPORT_DRAW(chars, num) +#define REPORT_DRAW(...) #define REPORT_PARAMS(...) #define REPORT_OSC(name, string) #define REPORT_OSC2(name, code, string) @@ -219,46 +227,21 @@ reset_csi(ParsedCSI *csi) { // Normal mode {{{ static void -dispatch_single_byte_control(void *s, uint8_t ch) { -#define CALL_SCREEN_HANDLER(name) REPORT_COMMAND(name); name(self->screen); break; - PS *self = s; - switch(ch) { - case BEL: - CALL_SCREEN_HANDLER(screen_bell); - case BS: - CALL_SCREEN_HANDLER(screen_backspace); - case HT: - CALL_SCREEN_HANDLER(screen_tab); - case LF: - case VT: - case FF: - CALL_SCREEN_HANDLER(screen_linefeed); - case CR: - CALL_SCREEN_HANDLER(screen_carriage_return); - case SI: - REPORT_ERROR("Ignoring request to change charset as we only support UTF-8"); break; - case SO: - REPORT_ERROR("Ignoring request to change charset as we only support UTF-8"); break; - case ESC: - SET_STATE(ESC); break; - default: - break; - } -#undef CALL_SCREEN_HANDLER -} - -static void -dispatch_output_chars(void *s, const uint32_t *chars, unsigned sz) { - PS *self = s; - REPORT_DRAW(chars, sz); - screen_draw_text(self->screen, chars, sz); +dispatch_single_byte_control(PS *self, uint32_t ch) { + screen_draw_text(self->screen, &ch, 1); } static void consume_normal(PS *self) { do { - self->read.pos += utf8_decode_to_sentinel(&self->utf8_decoder, self->buf + self->read.pos, self->read.sz - self->read.pos, ESC); - } while (self->read.pos < self->read.sz && self->vte_state == VTE_NORMAL); + const bool sentinel_found = utf8_decode_to_sentinel(&self->utf8_decoder, self->buf + self->read.pos, self->read.sz - self->read.pos, ESC); + self->read.pos += self->utf8_decoder.num_consumed; + if (self->utf8_decoder.output_sz) { + REPORT_DRAW(self->utf8_decoder.output, self->utf8_decoder.output_sz); + screen_draw_text(self->screen, self->utf8_decoder.output, self->utf8_decoder.output_sz); + } + if (sentinel_found) { SET_STATE(ESC); break; } + } while (self->read.pos < self->read.sz); } // }}} @@ -1555,10 +1538,6 @@ run_worker(void *p, ParseData *pd, bool flush) { pd->input_read = true; self->dump_callback = pd->dump_callback; self->now = pd->now; self->screen = p; - // these are here as they need to be specialized to dump/non dump versions - self->utf8_decoder.control_byte_callback = dispatch_single_byte_control; - self->utf8_decoder.output_chars_callback = dispatch_output_chars; - self->utf8_decoder.callback_data = self; do { end_with_lock; { do_parse_vt(self); diff --git a/kitty_tests/parser.py b/kitty_tests/parser.py index bbc13b632..9dfc6753c 100644 --- a/kitty_tests/parser.py +++ b/kitty_tests/parser.py @@ -175,7 +175,7 @@ class TestParser(BaseTest): pb(c1_controls, c1_controls) self.assertFalse(str(s.line(1)) + str(s.line(2)) + str(s.line(3))) pb('😀'.encode()[:-1]) - pb('\x1b\x1b%a', ('Unknown char after ESC: 0x1b',), ('draw', '%a')) + pb('\x1b\x1b%a', '\ufffd', ('Unknown char after ESC: 0x1b',), ('draw', '%a')) def test_utf8_parsing(self): s = self.create_screen() @@ -515,7 +515,7 @@ class TestParser(BaseTest): s.set_pending_activated_at(0.00001) pb(']8;;\x07', ('set_active_hyperlink', None, None)) pb('😀'.encode()[:-1]) - pb('\033[?2026h', ('screen_start_pending_mode',),) + pb('\033[?2026h', '\ufffd', ('screen_start_pending_mode',),) pb('😀'.encode()[-1:]) pb('\033[?2026l', '\ufffd', ('screen_stop_pending_mode',),) pb('a', ('draw', 'a'))