From 96793a296c2945962e6aced49e9af024bfefc1cf Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 8 May 2018 14:48:01 +0530
Subject: [PATCH] diff kitten has working syntax highlighting

---
 kittens/diff/collect.py   |  13 +++-
 kittens/diff/highlight.py |  20 +++---
 kittens/diff/render.py    |  74 +++++++++++-----------
 kittens/diff/speedup.c    | 127 ++++++++++++++++++++++++++++++++++++++
 kitty/screen.c            |   6 +-
 kitty_tests/diff.py       |  26 +++++---
 6 files changed, 202 insertions(+), 64 deletions(-)

diff --git a/kittens/diff/collect.py b/kittens/diff/collect.py
index df7e249d1..8209e35ff 100644
--- a/kittens/diff/collect.py
+++ b/kittens/diff/collect.py
@@ -11,6 +11,15 @@
 path_name_map = {}
 
 
+class Segment:
+
+    __slots__ = ('start', 'end', 'start_code', 'end_code')
+
+    def __init__(self, start, start_code):
+        self.start = start
+        self.start_code = start_code
+
+
 class Collection:
 
     def __init__(self):
@@ -53,7 +62,7 @@ def __iter__(self):
                 data = self.renames[path]
             else:
                 data = None
-            yield path, self.type_map[path], data
+            yield path, typ, data
 
 
 def collect_files(collection, left, right):
@@ -157,4 +166,4 @@ def set_highlight_data(data):
 
 
 def highlights_for_path(path):
-    return highlight_data.get(path, ())
+    return highlight_data.get(path, [])
diff --git a/kittens/diff/highlight.py b/kittens/diff/highlight.py
index cb6fdba45..fe47b7a7c 100644
--- a/kittens/diff/highlight.py
+++ b/kittens/diff/highlight.py
@@ -13,7 +13,7 @@
 
 from kitty.rgb import color_as_sgr, parse_sharp
 
-from .collect import data_for_path, lines_for_path
+from .collect import Segment, data_for_path, lines_for_path
 
 
 class DiffFormatter(Formatter):
@@ -85,15 +85,6 @@ def highlight_data(code, filename):
 split_pat = re.compile(r'(\033\[.*?m)')
 
 
-class Segment:
-
-    __slots__ = ('start', 'end', 'start_code', 'end_code')
-
-    def __init__(self, start, start_code):
-        self.start = start
-        self.start_code = start_code
-
-
 def highlight_line(line):
     ans = []
     current = None
@@ -127,9 +118,12 @@ def highlight_collection(collection):
     ans = {}
     with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
         for path, item_type, other_path in collection:
-            is_binary = isinstance(data_for_path(path), bytes)
-            if not is_binary:
-                jobs[executor.submit(highlight_for_diff, path)] = path
+            if item_type != 'rename':
+                for p in (path, other_path):
+                    if p:
+                        is_binary = isinstance(data_for_path(p), bytes)
+                        if not is_binary:
+                            jobs[executor.submit(highlight_for_diff, p)] = p
         for future in concurrent.futures.as_completed(jobs):
             path = jobs[future]
             try:
diff --git a/kittens/diff/render.py b/kittens/diff/render.py
index 0e286b798..6049a9351 100644
--- a/kittens/diff/render.py
+++ b/kittens/diff/render.py
@@ -8,10 +8,11 @@
 from kitty.fast_data_types import truncate_point_for_length, wcswidth
 
 from .collect import (
-    data_for_path, highlights_for_path, lines_for_path, path_name_map,
+    Segment, data_for_path, highlights_for_path, lines_for_path, path_name_map,
     sanitize
 )
 from .config import formats
+from .diff_speedup import split_with_highlights as _split_with_highlights
 
 
 class HunkRef:
@@ -153,38 +154,20 @@ def split_to_size(line, width):
         line = line[p:]
 
 
-def split_to_size_with_center(line, width, prefix_count, suffix_count, start, stop):
+def truncate_points(line, width):
+    pos = 0
     sz = len(line)
-    if prefix_count + suffix_count == sz:
-        yield from split_to_size(line, width)
-        return
-    suffix_pos = sz - suffix_count
-    pos = state = 0
-    while line:
-        p = truncate_point_for_length(line, width)
-        if state is 0:
-            if pos + p > prefix_count:
-                state = 1
-                a, line = line[:p], line[p:]
-                if pos + p > suffix_pos:
-                    a = a[:suffix_pos - pos] + stop + a[suffix_pos - pos:]
-                    state = 2
-                yield a[:prefix_count - pos] + start + a[prefix_count - pos:]
-            else:
-                yield line[:p]
-                line = line[p:]
-        elif state is 1:
-            if pos + p > suffix_pos:
-                state = 2
-                a, line = line[:p], line[p:]
-                yield start + a[:suffix_pos - pos] + stop + a[suffix_pos - pos:]
-            else:
-                yield start + line[:p]
-                line = line[p:]
-        elif state is 2:
-            yield line[:p]
-            line = line[p:]
-        pos += p
+    while True:
+        pos = truncate_point_for_length(line, width, pos)
+        if pos < sz:
+            yield pos
+        else:
+            break
+
+
+def split_with_highlights(line, width, highlights, bg_highlight=None):
+    truncate_pts = list(truncate_points(line, width))
+    return _split_with_highlights(line, truncate_pts, highlights, bg_highlight)
 
 
 margin_bg_map = {'filler': filler_format, 'remove': removed_margin_format, 'add': added_margin_format, 'context': margin_format}
@@ -207,12 +190,12 @@ def __init__(self, left_path, right_path, available_cols, margin_size):
     def left_highlights_for_line(self, line_num):
         if line_num < len(self.left_hdata):
             return self.left_hdata[line_num]
-        return ()
+        return []
 
     def right_highlights_for_line(self, line_num):
         if line_num < len(self.right_hdata):
             return self.right_hdata[line_num]
-        return ()
+        return []
 
 
 def render_diff_line(number, text, ltype, margin_size, available_cols):
@@ -237,9 +220,18 @@ def hunk_title(hunk_num, hunk, margin_size, available_cols):
 
 
 def render_half_line(line_number, line, highlights, ltype, margin_size, available_cols, changed_center=None):
+    bg_highlight = None
     if changed_center is not None and changed_center[0]:
-        start, stop = highlight_boundaries(ltype)
-        lines = split_to_size_with_center(line, available_cols, changed_center[0], changed_center[1], start, stop)
+        prefix_count, suffix_count = changed_center
+        line_sz = len(line)
+        if prefix_count + suffix_count < line_sz:
+            start, stop = highlight_boundaries(ltype)
+            seg = Segment(prefix_count, start)
+            seg.end = line_sz - suffix_count
+            seg.end_code = stop
+            bg_highlight = seg
+    if highlights or bg_highlight:
+        lines = split_with_highlights(line, available_cols, highlights, bg_highlight)
     else:
         lines = split_to_size(line, available_cols)
     line_number = str(line_number + 1)
@@ -253,7 +245,11 @@ def lines_for_chunk(data, hunk_num, chunk, chunk_num):
         for i in range(chunk.left_count):
             left_line_number = chunk.left_start + i
             right_line_number = chunk.right_start + i
-            lines = split_to_size(data.left_lines[left_line_number], data.available_cols)
+            highlights = data.left_highlights_for_line(left_line_number)
+            if highlights:
+                lines = split_with_highlights(data.left_lines[left_line_number], data.available_cols, highlights)
+            else:
+                lines = split_to_size(data.left_lines[left_line_number], data.available_cols)
             ref = Reference(data.left_path, HunkRef(hunk_num, chunk_num, i))
             left_line_number = str(left_line_number + 1)
             right_line_number = str(right_line_number + 1)
@@ -277,7 +273,7 @@ def lines_for_chunk(data, hunk_num, chunk, chunk_num):
                     'remove', data.margin_size, data.available_cols,
                     None if chunk.centers is None else chunk.centers[i]))
             if i < chunk.right_count:
-                rln = chunk.left_start + i
+                rln = chunk.right_start + i
                 rl.extend(render_half_line(
                     rln, data.right_lines[rln], data.right_highlights_for_line(rln),
                     'add', data.margin_size, data.available_cols,
@@ -320,7 +316,7 @@ def all_lines(path, args, columns, margin_size, is_add=True):
     hdata = highlights_for_path(path)
 
     def highlights(num):
-        return hdata[num] if num < len(hdata) else ()
+        return hdata[num] if num < len(hdata) else []
 
     for line_number, line in enumerate(lines):
         h = render_half_line(line_number, line, highlights(line_number), ltype, margin_size, available_cols)
diff --git a/kittens/diff/speedup.c b/kittens/diff/speedup.c
index 234d883e5..059ce85eb 100644
--- a/kittens/diff/speedup.c
+++ b/kittens/diff/speedup.c
@@ -29,8 +29,135 @@ changed_center(PyObject *self UNUSED, PyObject *args) {
     return Py_BuildValue("II", prefix_count, suffix_count);
 }
 
+typedef struct {
+    unsigned int start_pos, end_pos, current_pos;
+    PyObject *start_code, *end_code;
+} Segment;
+
+typedef struct {
+    Segment sg;
+    unsigned int num, pos;
+} SegmentPointer;
+
+static const Segment EMPTY_SEGMENT = { .current_pos = UINT_MAX };
+
+static inline bool
+convert_segment(PyObject *highlight, Segment *dest) {
+    PyObject *val = NULL;
+#define I
+#define A(x, d, c) { \
+    val = PyObject_GetAttrString(highlight, #x); \
+    if (val == NULL) return false; \
+    dest->d = c(val); Py_DECREF(val); \
+}
+    A(start, start_pos, PyLong_AsUnsignedLong);
+    A(end, end_pos, PyLong_AsUnsignedLong);
+    dest->current_pos = dest->start_pos;
+    A(start_code, start_code, I);
+    A(end_code, end_code, I);
+    if (!PyUnicode_Check(dest->start_code)) { PyErr_SetString(PyExc_TypeError, "start_code is not a string"); return false; }
+    if (!PyUnicode_Check(dest->end_code)) { PyErr_SetString(PyExc_TypeError, "end_code is not a string"); return false; }
+#undef A
+#undef I
+    return true;
+}
+
+static inline bool
+next_segment(SegmentPointer *s, PyObject *highlights) {
+    if (s->pos < s->num) {
+        if (!convert_segment(PyList_GET_ITEM(highlights, s->pos), &s->sg)) return false;
+        s->pos++;
+    } else s->sg.current_pos = UINT_MAX;
+    return true;
+}
+
+static inline bool
+insert_code(PyObject *code, Py_UCS4 *buf, size_t bufsz, unsigned int *buf_pos) {
+    unsigned int csz = PyUnicode_GET_LENGTH(code);
+    if (*buf_pos + csz >= bufsz) return false;
+    for (unsigned int s = 0; s < csz; s++) buf[(*buf_pos)++] = PyUnicode_READ(PyUnicode_KIND(code), PyUnicode_DATA(code), s);
+    return true;
+}
+
+static inline bool
+add_line(Segment *bg_segment, Segment *fg_segment, Py_UCS4 *buf, size_t bufsz, unsigned int *buf_pos, PyObject *ans) {
+    bool bg_is_active = bg_segment->current_pos == bg_segment->end_pos, fg_is_active = fg_segment->current_pos == fg_segment->end_pos;
+    if (bg_is_active) { if(!insert_code(bg_segment->end_code, buf, bufsz, buf_pos)) return false; }
+    if (fg_is_active) { if(!insert_code(fg_segment->end_code, buf, bufsz, buf_pos)) return false; }
+    PyObject *wl = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, *buf_pos);
+    if (!wl) return false;
+    int ret = PyList_Append(ans, wl); Py_DECREF(wl); if (ret != 0) return false;
+    *buf_pos = 0;
+    if (bg_is_active) { if(!insert_code(bg_segment->start_code, buf, bufsz, buf_pos)) return false; }
+    if (fg_is_active) { if(!insert_code(fg_segment->start_code, buf, bufsz, buf_pos)) return false; }
+    return true;
+}
+
+static PyObject*
+split_with_highlights(PyObject *self UNUSED, PyObject *args) {
+    PyObject *line, *truncate_points_py, *fg_highlights, *bg_highlight;
+    if (!PyArg_ParseTuple(args, "UO!O!O", &line, &PyList_Type, &truncate_points_py, &PyList_Type, &fg_highlights, &bg_highlight)) return NULL;
+    PyObject *ans = PyList_New(0);
+    if (!ans) return NULL;
+    static unsigned int truncate_points[256];
+    unsigned int num_truncate_pts = PyList_GET_SIZE(truncate_points_py), truncate_pos = 0, truncate_point;
+    for (unsigned int i = 0; i < MIN(num_truncate_pts, arraysz(truncate_points)); i++) {
+        truncate_points[i] = PyLong_AsUnsignedLong(PyList_GET_ITEM(truncate_points_py, i));
+    }
+    SegmentPointer fg_segment = { .sg = EMPTY_SEGMENT, .num = PyList_GET_SIZE(fg_highlights)}, bg_segment = { .sg = EMPTY_SEGMENT };
+    if (bg_highlight != Py_None) { if (!convert_segment(bg_highlight, &bg_segment.sg)) { Py_CLEAR(ans); return NULL; }; bg_segment.num = 1; }
+#define CHECK_CALL(func, ...) if (!func(__VA_ARGS__)) { Py_CLEAR(ans); if (!PyErr_Occurred()) PyErr_SetString(PyExc_ValueError, "line too long"); return NULL; }
+    CHECK_CALL(next_segment, &fg_segment, fg_highlights);
+
+#define NEXT_TRUNCATE_POINT truncate_point = (truncate_pos < num_truncate_pts) ? truncate_points[truncate_pos++] : UINT_MAX
+    NEXT_TRUNCATE_POINT;
+
+#define INSERT_CODE(x) { CHECK_CALL(insert_code, x, buf, arraysz(buf), &buf_pos); }
+
+#define ADD_LINE CHECK_CALL(add_line, &bg_segment.sg, &fg_segment.sg, buf, arraysz(buf), &buf_pos, ans);
+
+#define ADD_CHAR(x) { \
+    buf[buf_pos++] = x; \
+    if (buf_pos >= arraysz(buf)) { Py_CLEAR(ans); PyErr_SetString(PyExc_ValueError, "line too long"); return NULL; } \
+}
+#define CHECK_SEGMENT(sgp, is_fg) { \
+    if (i == sgp.sg.current_pos) { \
+        INSERT_CODE(sgp.sg.current_pos == sgp.sg.start_pos ? sgp.sg.start_code : sgp.sg.end_code); \
+        if (sgp.sg.current_pos == sgp.sg.start_pos) sgp.sg.current_pos = sgp.sg.end_pos; \
+        else { \
+            if (is_fg) { \
+                CHECK_CALL(next_segment, &fg_segment, fg_highlights); \
+                if (sgp.sg.current_pos == i) { \
+                    INSERT_CODE(sgp.sg.start_code); \
+                    sgp.sg.current_pos = sgp.sg.end_pos; \
+                } \
+            } else sgp.sg.current_pos = UINT_MAX; \
+        } \
+    }\
+}
+
+    const unsigned int line_sz = PyUnicode_GET_LENGTH(line);
+    static Py_UCS4 buf[4096];
+    unsigned int i = 0, buf_pos = 0;
+    for (; i < line_sz; i++) {
+        if (i == truncate_point) { ADD_LINE; NEXT_TRUNCATE_POINT; }
+        CHECK_SEGMENT(bg_segment, false);
+        CHECK_SEGMENT(fg_segment, true)
+        ADD_CHAR(PyUnicode_READ(PyUnicode_KIND(line), PyUnicode_DATA(line), i));
+    }
+    if (buf_pos) ADD_LINE;
+    return ans;
+#undef INSERT_CODE
+#undef CHECK_SEGMENT
+#undef CHECK_CALL
+#undef ADD_CHAR
+#undef ADD_LINE
+#undef NEXT_TRUNCATE_POINT
+}
+
 static PyMethodDef module_methods[] = {
     {"changed_center", (PyCFunction)changed_center, METH_VARARGS, ""},
+    {"split_with_highlights", (PyCFunction)split_with_highlights, METH_VARARGS, ""},
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
 
diff --git a/kitty/screen.c b/kitty/screen.c
index e5c30ecc1..abbad857b 100644
--- a/kitty/screen.c
+++ b/kitty/screen.c
@@ -1524,8 +1524,8 @@ screen_wcswidth(PyObject UNUSED *self, PyObject *str) {
 
 static PyObject*
 screen_truncate_point_for_length(PyObject UNUSED *self, PyObject *args) {
-    PyObject *str; unsigned int num_cells;
-    if (!PyArg_ParseTuple(args, "OI", &str, &num_cells)) return NULL;
+    PyObject *str; unsigned int num_cells, start_pos = 0;
+    if (!PyArg_ParseTuple(args, "OI|I", &str, &num_cells, &start_pos)) return NULL;
     if (PyUnicode_READY(str) != 0) return NULL;
     int kind = PyUnicode_KIND(str);
     void *data = PyUnicode_DATA(str);
@@ -1534,7 +1534,7 @@ screen_truncate_point_for_length(PyObject UNUSED *self, PyObject *args) {
     int prev_width = 0;
     bool in_sgr = false;
     unsigned long width_so_far = 0;
-    for (i = 0; i < len && width_so_far < num_cells; i++) {
+    for (i = start_pos; i < len && width_so_far < num_cells; i++) {
         char_type ch = PyUnicode_READ(kind, data, i);
         if (in_sgr) {
             if (ch == 'm') in_sgr = false;
diff --git a/kitty_tests/diff.py b/kitty_tests/diff.py
index b32398bc9..7c7960dd3 100644
--- a/kitty_tests/diff.py
+++ b/kitty_tests/diff.py
@@ -22,13 +22,25 @@ def test_changed_center(self):
             for src in (left, right):
                 self.assertEqual((prefix, suffix), (src[:pc], src[-sc:] if sc else ''))
 
-    def test_split_to_size(self):
-        from kittens.diff.render import split_to_size_with_center
+    def test_split_with_highlights(self):
+        from kittens.diff.render import split_with_highlights, Segment, truncate_points
+        self.ae(list(truncate_points('1234567890ab', 3)), [3, 6, 9])
         for line, width, prefix_count, suffix_count, expected in [
                 ('abcdefgh', 20, 2, 3, ('abSScdeEEfgh',)),
-                ('abcdefgh', 20, 2, 0, ('abSScdefgh',)),
-                ('abcdefgh', 3, 2, 3, ('abSSc', 'SSdeEEf', 'gh')),
-                ('abcdefgh', 2, 4, 1, ('ab', 'cd', 'SSef', 'SSgEEh')),
+                ('abcdefgh', 20, 2, 0, ('abSScdefghEE',)),
+                ('abcdefgh', 3, 2, 3, ('abSScEE', 'SSdeEEf', 'gh')),
+                ('abcdefgh', 2, 4, 1, ('ab', 'cd', 'SSefEE', 'SSgEEh')),
         ]:
-            self.ae(expected, tuple(split_to_size_with_center(
-                line, width, prefix_count, suffix_count, 'SS', 'EE')))
+            seg = Segment(prefix_count, 'SS')
+            seg.end = len(line) - suffix_count
+            seg.end_code = 'EE'
+            self.ae(expected, tuple(split_with_highlights(line, width, [], seg)))
+
+        def h(s, e, w):
+            ans = Segment(s, 'S{}S'.format(w))
+            ans.end = e
+            ans.end_code = 'E{}E'.format(w)
+            return ans
+
+        highlights = [h(0, 1, 1), h(1, 3, 2)]
+        self.ae(['S1SaE1ES2SbcE2Ed'], split_with_highlights('abcd', 10, highlights))