From 96793a296c2945962e6aced49e9af024bfefc1cf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 8 May 2018 14:48:01 +0530 Subject: [PATCH] diff kitten has working syntax highlighting --- kittens/diff/collect.py | 13 +++- kittens/diff/highlight.py | 20 +++--- kittens/diff/render.py | 74 +++++++++++----------- kittens/diff/speedup.c | 127 ++++++++++++++++++++++++++++++++++++++ kitty/screen.c | 6 +- kitty_tests/diff.py | 26 +++++--- 6 files changed, 202 insertions(+), 64 deletions(-) diff --git a/kittens/diff/collect.py b/kittens/diff/collect.py index df7e249d1..8209e35ff 100644 --- a/kittens/diff/collect.py +++ b/kittens/diff/collect.py @@ -11,6 +11,15 @@ path_name_map = {} +class Segment: + + __slots__ = ('start', 'end', 'start_code', 'end_code') + + def __init__(self, start, start_code): + self.start = start + self.start_code = start_code + + class Collection: def __init__(self): @@ -53,7 +62,7 @@ def __iter__(self): data = self.renames[path] else: data = None - yield path, self.type_map[path], data + yield path, typ, data def collect_files(collection, left, right): @@ -157,4 +166,4 @@ def set_highlight_data(data): def highlights_for_path(path): - return highlight_data.get(path, ()) + return highlight_data.get(path, []) diff --git a/kittens/diff/highlight.py b/kittens/diff/highlight.py index cb6fdba45..fe47b7a7c 100644 --- a/kittens/diff/highlight.py +++ b/kittens/diff/highlight.py @@ -13,7 +13,7 @@ from kitty.rgb import color_as_sgr, parse_sharp -from .collect import data_for_path, lines_for_path +from .collect import Segment, data_for_path, lines_for_path class DiffFormatter(Formatter): @@ -85,15 +85,6 @@ def highlight_data(code, filename): split_pat = re.compile(r'(\033\[.*?m)') -class Segment: - - __slots__ = ('start', 'end', 'start_code', 'end_code') - - def __init__(self, start, start_code): - self.start = start - self.start_code = start_code - - def highlight_line(line): ans = [] current = None @@ -127,9 +118,12 @@ def highlight_collection(collection): ans = {} with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor: for path, item_type, other_path in collection: - is_binary = isinstance(data_for_path(path), bytes) - if not is_binary: - jobs[executor.submit(highlight_for_diff, path)] = path + if item_type != 'rename': + for p in (path, other_path): + if p: + is_binary = isinstance(data_for_path(p), bytes) + if not is_binary: + jobs[executor.submit(highlight_for_diff, p)] = p for future in concurrent.futures.as_completed(jobs): path = jobs[future] try: diff --git a/kittens/diff/render.py b/kittens/diff/render.py index 0e286b798..6049a9351 100644 --- a/kittens/diff/render.py +++ b/kittens/diff/render.py @@ -8,10 +8,11 @@ from kitty.fast_data_types import truncate_point_for_length, wcswidth from .collect import ( - data_for_path, highlights_for_path, lines_for_path, path_name_map, + Segment, data_for_path, highlights_for_path, lines_for_path, path_name_map, sanitize ) from .config import formats +from .diff_speedup import split_with_highlights as _split_with_highlights class HunkRef: @@ -153,38 +154,20 @@ def split_to_size(line, width): line = line[p:] -def split_to_size_with_center(line, width, prefix_count, suffix_count, start, stop): +def truncate_points(line, width): + pos = 0 sz = len(line) - if prefix_count + suffix_count == sz: - yield from split_to_size(line, width) - return - suffix_pos = sz - suffix_count - pos = state = 0 - while line: - p = truncate_point_for_length(line, width) - if state is 0: - if pos + p > prefix_count: - state = 1 - a, line = line[:p], line[p:] - if pos + p > suffix_pos: - a = a[:suffix_pos - pos] + stop + a[suffix_pos - pos:] - state = 2 - yield a[:prefix_count - pos] + start + a[prefix_count - pos:] - else: - yield line[:p] - line = line[p:] - elif state is 1: - if pos + p > suffix_pos: - state = 2 - a, line = line[:p], line[p:] - yield start + a[:suffix_pos - pos] + stop + a[suffix_pos - pos:] - else: - yield start + line[:p] - line = line[p:] - elif state is 2: - yield line[:p] - line = line[p:] - pos += p + while True: + pos = truncate_point_for_length(line, width, pos) + if pos < sz: + yield pos + else: + break + + +def split_with_highlights(line, width, highlights, bg_highlight=None): + truncate_pts = list(truncate_points(line, width)) + return _split_with_highlights(line, truncate_pts, highlights, bg_highlight) margin_bg_map = {'filler': filler_format, 'remove': removed_margin_format, 'add': added_margin_format, 'context': margin_format} @@ -207,12 +190,12 @@ def __init__(self, left_path, right_path, available_cols, margin_size): def left_highlights_for_line(self, line_num): if line_num < len(self.left_hdata): return self.left_hdata[line_num] - return () + return [] def right_highlights_for_line(self, line_num): if line_num < len(self.right_hdata): return self.right_hdata[line_num] - return () + return [] def render_diff_line(number, text, ltype, margin_size, available_cols): @@ -237,9 +220,18 @@ def hunk_title(hunk_num, hunk, margin_size, available_cols): def render_half_line(line_number, line, highlights, ltype, margin_size, available_cols, changed_center=None): + bg_highlight = None if changed_center is not None and changed_center[0]: - start, stop = highlight_boundaries(ltype) - lines = split_to_size_with_center(line, available_cols, changed_center[0], changed_center[1], start, stop) + prefix_count, suffix_count = changed_center + line_sz = len(line) + if prefix_count + suffix_count < line_sz: + start, stop = highlight_boundaries(ltype) + seg = Segment(prefix_count, start) + seg.end = line_sz - suffix_count + seg.end_code = stop + bg_highlight = seg + if highlights or bg_highlight: + lines = split_with_highlights(line, available_cols, highlights, bg_highlight) else: lines = split_to_size(line, available_cols) line_number = str(line_number + 1) @@ -253,7 +245,11 @@ def lines_for_chunk(data, hunk_num, chunk, chunk_num): for i in range(chunk.left_count): left_line_number = chunk.left_start + i right_line_number = chunk.right_start + i - lines = split_to_size(data.left_lines[left_line_number], data.available_cols) + highlights = data.left_highlights_for_line(left_line_number) + if highlights: + lines = split_with_highlights(data.left_lines[left_line_number], data.available_cols, highlights) + else: + lines = split_to_size(data.left_lines[left_line_number], data.available_cols) ref = Reference(data.left_path, HunkRef(hunk_num, chunk_num, i)) left_line_number = str(left_line_number + 1) right_line_number = str(right_line_number + 1) @@ -277,7 +273,7 @@ def lines_for_chunk(data, hunk_num, chunk, chunk_num): 'remove', data.margin_size, data.available_cols, None if chunk.centers is None else chunk.centers[i])) if i < chunk.right_count: - rln = chunk.left_start + i + rln = chunk.right_start + i rl.extend(render_half_line( rln, data.right_lines[rln], data.right_highlights_for_line(rln), 'add', data.margin_size, data.available_cols, @@ -320,7 +316,7 @@ def all_lines(path, args, columns, margin_size, is_add=True): hdata = highlights_for_path(path) def highlights(num): - return hdata[num] if num < len(hdata) else () + return hdata[num] if num < len(hdata) else [] for line_number, line in enumerate(lines): h = render_half_line(line_number, line, highlights(line_number), ltype, margin_size, available_cols) diff --git a/kittens/diff/speedup.c b/kittens/diff/speedup.c index 234d883e5..059ce85eb 100644 --- a/kittens/diff/speedup.c +++ b/kittens/diff/speedup.c @@ -29,8 +29,135 @@ changed_center(PyObject *self UNUSED, PyObject *args) { return Py_BuildValue("II", prefix_count, suffix_count); } +typedef struct { + unsigned int start_pos, end_pos, current_pos; + PyObject *start_code, *end_code; +} Segment; + +typedef struct { + Segment sg; + unsigned int num, pos; +} SegmentPointer; + +static const Segment EMPTY_SEGMENT = { .current_pos = UINT_MAX }; + +static inline bool +convert_segment(PyObject *highlight, Segment *dest) { + PyObject *val = NULL; +#define I +#define A(x, d, c) { \ + val = PyObject_GetAttrString(highlight, #x); \ + if (val == NULL) return false; \ + dest->d = c(val); Py_DECREF(val); \ +} + A(start, start_pos, PyLong_AsUnsignedLong); + A(end, end_pos, PyLong_AsUnsignedLong); + dest->current_pos = dest->start_pos; + A(start_code, start_code, I); + A(end_code, end_code, I); + if (!PyUnicode_Check(dest->start_code)) { PyErr_SetString(PyExc_TypeError, "start_code is not a string"); return false; } + if (!PyUnicode_Check(dest->end_code)) { PyErr_SetString(PyExc_TypeError, "end_code is not a string"); return false; } +#undef A +#undef I + return true; +} + +static inline bool +next_segment(SegmentPointer *s, PyObject *highlights) { + if (s->pos < s->num) { + if (!convert_segment(PyList_GET_ITEM(highlights, s->pos), &s->sg)) return false; + s->pos++; + } else s->sg.current_pos = UINT_MAX; + return true; +} + +static inline bool +insert_code(PyObject *code, Py_UCS4 *buf, size_t bufsz, unsigned int *buf_pos) { + unsigned int csz = PyUnicode_GET_LENGTH(code); + if (*buf_pos + csz >= bufsz) return false; + for (unsigned int s = 0; s < csz; s++) buf[(*buf_pos)++] = PyUnicode_READ(PyUnicode_KIND(code), PyUnicode_DATA(code), s); + return true; +} + +static inline bool +add_line(Segment *bg_segment, Segment *fg_segment, Py_UCS4 *buf, size_t bufsz, unsigned int *buf_pos, PyObject *ans) { + bool bg_is_active = bg_segment->current_pos == bg_segment->end_pos, fg_is_active = fg_segment->current_pos == fg_segment->end_pos; + if (bg_is_active) { if(!insert_code(bg_segment->end_code, buf, bufsz, buf_pos)) return false; } + if (fg_is_active) { if(!insert_code(fg_segment->end_code, buf, bufsz, buf_pos)) return false; } + PyObject *wl = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, *buf_pos); + if (!wl) return false; + int ret = PyList_Append(ans, wl); Py_DECREF(wl); if (ret != 0) return false; + *buf_pos = 0; + if (bg_is_active) { if(!insert_code(bg_segment->start_code, buf, bufsz, buf_pos)) return false; } + if (fg_is_active) { if(!insert_code(fg_segment->start_code, buf, bufsz, buf_pos)) return false; } + return true; +} + +static PyObject* +split_with_highlights(PyObject *self UNUSED, PyObject *args) { + PyObject *line, *truncate_points_py, *fg_highlights, *bg_highlight; + if (!PyArg_ParseTuple(args, "UO!O!O", &line, &PyList_Type, &truncate_points_py, &PyList_Type, &fg_highlights, &bg_highlight)) return NULL; + PyObject *ans = PyList_New(0); + if (!ans) return NULL; + static unsigned int truncate_points[256]; + unsigned int num_truncate_pts = PyList_GET_SIZE(truncate_points_py), truncate_pos = 0, truncate_point; + for (unsigned int i = 0; i < MIN(num_truncate_pts, arraysz(truncate_points)); i++) { + truncate_points[i] = PyLong_AsUnsignedLong(PyList_GET_ITEM(truncate_points_py, i)); + } + SegmentPointer fg_segment = { .sg = EMPTY_SEGMENT, .num = PyList_GET_SIZE(fg_highlights)}, bg_segment = { .sg = EMPTY_SEGMENT }; + if (bg_highlight != Py_None) { if (!convert_segment(bg_highlight, &bg_segment.sg)) { Py_CLEAR(ans); return NULL; }; bg_segment.num = 1; } +#define CHECK_CALL(func, ...) if (!func(__VA_ARGS__)) { Py_CLEAR(ans); if (!PyErr_Occurred()) PyErr_SetString(PyExc_ValueError, "line too long"); return NULL; } + CHECK_CALL(next_segment, &fg_segment, fg_highlights); + +#define NEXT_TRUNCATE_POINT truncate_point = (truncate_pos < num_truncate_pts) ? truncate_points[truncate_pos++] : UINT_MAX + NEXT_TRUNCATE_POINT; + +#define INSERT_CODE(x) { CHECK_CALL(insert_code, x, buf, arraysz(buf), &buf_pos); } + +#define ADD_LINE CHECK_CALL(add_line, &bg_segment.sg, &fg_segment.sg, buf, arraysz(buf), &buf_pos, ans); + +#define ADD_CHAR(x) { \ + buf[buf_pos++] = x; \ + if (buf_pos >= arraysz(buf)) { Py_CLEAR(ans); PyErr_SetString(PyExc_ValueError, "line too long"); return NULL; } \ +} +#define CHECK_SEGMENT(sgp, is_fg) { \ + if (i == sgp.sg.current_pos) { \ + INSERT_CODE(sgp.sg.current_pos == sgp.sg.start_pos ? sgp.sg.start_code : sgp.sg.end_code); \ + if (sgp.sg.current_pos == sgp.sg.start_pos) sgp.sg.current_pos = sgp.sg.end_pos; \ + else { \ + if (is_fg) { \ + CHECK_CALL(next_segment, &fg_segment, fg_highlights); \ + if (sgp.sg.current_pos == i) { \ + INSERT_CODE(sgp.sg.start_code); \ + sgp.sg.current_pos = sgp.sg.end_pos; \ + } \ + } else sgp.sg.current_pos = UINT_MAX; \ + } \ + }\ +} + + const unsigned int line_sz = PyUnicode_GET_LENGTH(line); + static Py_UCS4 buf[4096]; + unsigned int i = 0, buf_pos = 0; + for (; i < line_sz; i++) { + if (i == truncate_point) { ADD_LINE; NEXT_TRUNCATE_POINT; } + CHECK_SEGMENT(bg_segment, false); + CHECK_SEGMENT(fg_segment, true) + ADD_CHAR(PyUnicode_READ(PyUnicode_KIND(line), PyUnicode_DATA(line), i)); + } + if (buf_pos) ADD_LINE; + return ans; +#undef INSERT_CODE +#undef CHECK_SEGMENT +#undef CHECK_CALL +#undef ADD_CHAR +#undef ADD_LINE +#undef NEXT_TRUNCATE_POINT +} + static PyMethodDef module_methods[] = { {"changed_center", (PyCFunction)changed_center, METH_VARARGS, ""}, + {"split_with_highlights", (PyCFunction)split_with_highlights, METH_VARARGS, ""}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/kitty/screen.c b/kitty/screen.c index e5c30ecc1..abbad857b 100644 --- a/kitty/screen.c +++ b/kitty/screen.c @@ -1524,8 +1524,8 @@ screen_wcswidth(PyObject UNUSED *self, PyObject *str) { static PyObject* screen_truncate_point_for_length(PyObject UNUSED *self, PyObject *args) { - PyObject *str; unsigned int num_cells; - if (!PyArg_ParseTuple(args, "OI", &str, &num_cells)) return NULL; + PyObject *str; unsigned int num_cells, start_pos = 0; + if (!PyArg_ParseTuple(args, "OI|I", &str, &num_cells, &start_pos)) return NULL; if (PyUnicode_READY(str) != 0) return NULL; int kind = PyUnicode_KIND(str); void *data = PyUnicode_DATA(str); @@ -1534,7 +1534,7 @@ screen_truncate_point_for_length(PyObject UNUSED *self, PyObject *args) { int prev_width = 0; bool in_sgr = false; unsigned long width_so_far = 0; - for (i = 0; i < len && width_so_far < num_cells; i++) { + for (i = start_pos; i < len && width_so_far < num_cells; i++) { char_type ch = PyUnicode_READ(kind, data, i); if (in_sgr) { if (ch == 'm') in_sgr = false; diff --git a/kitty_tests/diff.py b/kitty_tests/diff.py index b32398bc9..7c7960dd3 100644 --- a/kitty_tests/diff.py +++ b/kitty_tests/diff.py @@ -22,13 +22,25 @@ def test_changed_center(self): for src in (left, right): self.assertEqual((prefix, suffix), (src[:pc], src[-sc:] if sc else '')) - def test_split_to_size(self): - from kittens.diff.render import split_to_size_with_center + def test_split_with_highlights(self): + from kittens.diff.render import split_with_highlights, Segment, truncate_points + self.ae(list(truncate_points('1234567890ab', 3)), [3, 6, 9]) for line, width, prefix_count, suffix_count, expected in [ ('abcdefgh', 20, 2, 3, ('abSScdeEEfgh',)), - ('abcdefgh', 20, 2, 0, ('abSScdefgh',)), - ('abcdefgh', 3, 2, 3, ('abSSc', 'SSdeEEf', 'gh')), - ('abcdefgh', 2, 4, 1, ('ab', 'cd', 'SSef', 'SSgEEh')), + ('abcdefgh', 20, 2, 0, ('abSScdefghEE',)), + ('abcdefgh', 3, 2, 3, ('abSScEE', 'SSdeEEf', 'gh')), + ('abcdefgh', 2, 4, 1, ('ab', 'cd', 'SSefEE', 'SSgEEh')), ]: - self.ae(expected, tuple(split_to_size_with_center( - line, width, prefix_count, suffix_count, 'SS', 'EE'))) + seg = Segment(prefix_count, 'SS') + seg.end = len(line) - suffix_count + seg.end_code = 'EE' + self.ae(expected, tuple(split_with_highlights(line, width, [], seg))) + + def h(s, e, w): + ans = Segment(s, 'S{}S'.format(w)) + ans.end = e + ans.end_code = 'E{}E'.format(w) + return ans + + highlights = [h(0, 1, 1), h(1, 3, 2)] + self.ae(['S1SaE1ES2SbcE2Ed'], split_with_highlights('abcd', 10, highlights))