Native code implementation for expanding ANSI escape codes

This commit is contained in:
Kovid Goyal 2022-11-25 17:33:41 +05:30
parent 4d3f3b5e91
commit cfc6bd4da5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 92 additions and 3 deletions

View File

@ -26,8 +26,6 @@ def ctrl_mask_char(ch: str) -> str:
o = ord(ch)
except Exception:
return ch
if o > 127:
return ch
return chr(o & 0b0011111)

View File

@ -233,8 +233,77 @@ wrapped_kittens(PyObject *self UNUSED, PyObject *args UNUSED) {
return ans;
}
static PyObject*
expand_ansi_c_escapes(PyObject *self UNUSED, PyObject *src) {
enum { NORMAL, PREV_ESC, HEX_DIGIT, OCT_DIGIT, CONTROL_CHAR } state = NORMAL;
if (PyUnicode_READY(src) != 0) return NULL;
int max_num_hex_digits = 0, hex_digit_idx = 0;
char hex_digits[16];
Py_ssize_t idx = 0, dest_idx = 0;
PyObject *dest = PyUnicode_New(PyUnicode_GET_LENGTH(src)*2, 1114111);
if (dest == NULL) return NULL;
const int kind = PyUnicode_KIND(src), dest_kind = PyUnicode_KIND(dest);
const void *data = PyUnicode_DATA(src), *dest_data = PyUnicode_DATA(dest);
#define w(ch) { PyUnicode_WRITE(dest_kind, dest_data, dest_idx, ch); dest_idx++; }
#define write_digits(base) { hex_digits[hex_digit_idx] = 0; if (hex_digit_idx > 0) w(strtol(hex_digits, NULL, base)); hex_digit_idx = 0; state = NORMAL; }
#define add_digit(base) { hex_digits[hex_digit_idx++] = ch; if (idx >= PyUnicode_GET_LENGTH(src)) write_digits(base); }
START_ALLOW_CASE_RANGE
while (idx < PyUnicode_GET_LENGTH(src)) {
Py_UCS4 ch = PyUnicode_READ(kind, data, idx); idx++;
switch(state) {
case NORMAL: {
if (ch == '\\' && idx < PyUnicode_GET_LENGTH(src)) {
state = PREV_ESC;
continue;
}
w(ch);
} break;
case CONTROL_CHAR: w(ch & 0x1f); state = NORMAL; break;
case HEX_DIGIT: {
if (hex_digit_idx < max_num_hex_digits && (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))) add_digit(16)
else { write_digits(16); w(ch); }
}; break;
case OCT_DIGIT: {
if ('0' <= ch && ch <= '7' && hex_digit_idx < max_num_hex_digits) add_digit(16)
else { write_digits(8); w(ch); }
}; break;
case PREV_ESC: {
state = NORMAL;
switch(ch) {
default: w('\\'); w(ch); break;
case 'a': w(7); break;
case 'b': w(8); break;
case 'c': if (idx < PyUnicode_GET_LENGTH(src)) {state = CONTROL_CHAR;} else {w('\\'); w(ch);}; break;
case 'e': case 'E': w(27); break;
case 'f': w(12); break;
case 'n': w(10); break;
case 'r': w(13); break;
case 't': w(9); break;
case 'v': w(11); break;
case 'x': max_num_hex_digits = 2; hex_digit_idx = 0; state = HEX_DIGIT; break;
case 'u': max_num_hex_digits = 4; hex_digit_idx = 0; state = HEX_DIGIT; break;
case 'U': max_num_hex_digits = 8; hex_digit_idx = 0; state = HEX_DIGIT; break;
case '0' ... '7': max_num_hex_digits = 3; hex_digits[0] = ch; hex_digit_idx = 1; state = OCT_DIGIT; break;
case '\\': w('\\'); break;
case '?': w('?'); break;
case '"': w('"'); break;
case '\'': w('\''); break;
}
} break;
}
}
#undef add_digit
#undef write_digits
#undef w
END_ALLOW_CASE_RANGE
PyObject *ans = PyUnicode_FromKindAndData(dest_kind, dest_data, dest_idx);
Py_DECREF(dest);
return ans;
}
static PyMethodDef module_methods[] = {
{"wcwidth", (PyCFunction)wcwidth_wrap, METH_O, ""},
{"expand_ansi_c_escapes", (PyCFunction)expand_ansi_c_escapes, METH_O, ""},
{"get_docs_ref_map", (PyCFunction)get_docs_ref_map, METH_NOARGS, ""},
{"getpeereid", (PyCFunction)py_getpeereid, METH_VARARGS, ""},
{"wcswidth", (PyCFunction)wcswidth_std, METH_O, ""},

View File

@ -1494,3 +1494,4 @@ def run_with_activation_token(func: Callable[[str], None]) -> None: ...
def make_x11_window_a_dock_window(x11_window_id: int, strut: Tuple[int, int, int, int, int, int, int, int, int, int, int, int]) -> None: ...
def unicode_database_version() -> Tuple[int, int, int]: ...
def wrapped_kittens() -> FrozenSet[str]: ...
def expand_ansi_c_escapes(test: str) -> str: ...

View File

@ -9,7 +9,7 @@ from kitty.config import build_ansi_color_table, defaults
from kitty.fast_data_types import (
Color, ColorProfile, Cursor as C, HistoryBuf, LineBuf,
parse_input_from_terminal, strip_csi, truncate_point_for_length, wcswidth,
wcwidth
wcwidth, expand_ansi_c_escapes
)
from kitty.rgb import to_color
from kitty.utils import is_path_in_temp_dir, sanitize_title
@ -568,3 +568,24 @@ class TestDataTypes(BaseTest):
self.assertNotIn(b'\x1b[201~', q)
self.assertNotIn('\x9b201~'.encode('utf-8'), q)
self.assertIn(b'ab', q)
def test_expand_ansi_c_escapes(self):
for src, expected in {
'abc': 'abc',
r'a\ab': 'a\ab',
r'a\eb': 'a\x1bb',
r'a\r\nb': 'a\r\nb',
r'a\c b': 'a\0b',
r'a\c': 'a\\c',
r'a\x1bb': 'a\x1bb',
r'a\x1b': 'a\x1b',
r'a\x1': 'a\x01',
r'a\x1g': 'a\x01g',
r'a\z\"': 'a\\z"',
r'a\123b': 'a\123b',
r'a\128b': 'a\0128b',
r'a\u1234e': 'a\u1234e',
r'a\U1f1eez': 'a\U0001f1eez',
}.items():
actual = expand_ansi_c_escapes(src)
self.ae(expected, actual)