From be9d876997806d2a73c9038b11e6364be76e979c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 May 2018 20:15:05 +0530 Subject: [PATCH] find common prefix and suffix of two strings --- kittens/diff/git.py | 16 ++++++++++++- kittens/diff/speedup.c | 52 ++++++++++++++++++++++++++++++++++++++++++ kitty_tests/diff.py | 23 +++++++++++++++++++ setup.py | 9 +++++--- 4 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 kittens/diff/speedup.c create mode 100644 kitty_tests/diff.py diff --git a/kittens/diff/git.py b/kittens/diff/git.py index f6b44db5a..73e5477ab 100644 --- a/kittens/diff/git.py +++ b/kittens/diff/git.py @@ -6,6 +6,10 @@ import os import subprocess +from .collect import lines_for_path + +left_lines = right_lines = None + def run_diff(file1, file2, context=3): # returns: ok, is_different, patch @@ -22,13 +26,14 @@ def run_diff(file1, file2, context=3): class Chunk: - __slots__ = ('is_context', 'left_start', 'right_start', 'left_count', 'right_count') + __slots__ = ('is_context', 'left_start', 'right_start', 'left_count', 'right_count', 'is_change') def __init__(self, left_start, right_start, is_context=False): self.is_context = is_context self.left_start = left_start self.right_start = right_start self.left_count = self.right_count = 0 + self.is_change = False def add_line(self): self.right_count += 1 @@ -40,6 +45,10 @@ def context_line(self): self.left_count += 1 self.right_count += 1 + def finalize(self): + if not self.is_context and self.left_count == self.right_count: + self.is_change = True + def __repr__(self): return 'Chunk(is_context={}, left_start={}, left_count={}, right_start={}, right_count={})'.format( self.is_context, self.left_start, self.left_count, self.right_start, self.right_count) @@ -102,6 +111,8 @@ def finalize(self): raise ValueError('Left side line mismatch {} != {}'.format(c.left_start + c.left_count, self.left_start + self.left_count)) if c.right_start + c.right_count != self.right_start + self.right_count: raise ValueError('Left side line mismatch {} != {}'.format(c.right_start + c.right_count, self.right_start + self.right_count)) + for c in self.chunks: + c.finalize() def parse_range(x): @@ -166,6 +177,7 @@ def add_diff(self, file1, file2): self.jobs.append(file1) def __call__(self, context=3): + global left_lines, right_lines ans = {} with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: jobs = {executor.submit(run_diff, key, self.jmap[key], context): key for key in self.jobs} @@ -179,6 +191,8 @@ def __call__(self, context=3): return 'Running git diff for {} vs. {} generated an exception: {}'.format(key[0], key[1], e) if not ok: return output + '\nRunning git diff for {} vs. {} failed'.format(key[0], key[1]) + left_lines = lines_for_path(key[0]) + right_lines = lines_for_path(key[1]) try: patch = parse_patch(output) except Exception: diff --git a/kittens/diff/speedup.c b/kittens/diff/speedup.c new file mode 100644 index 000000000..234d883e5 --- /dev/null +++ b/kittens/diff/speedup.c @@ -0,0 +1,52 @@ +/* + * speedup.c + * Copyright (C) 2018 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "data-types.h" + +static PyObject* +changed_center(PyObject *self UNUSED, PyObject *args) { + unsigned int prefix_count = 0, suffix_count = 0; + PyObject *lp, *rp; + if (!PyArg_ParseTuple(args, "UU", &lp, &rp)) return NULL; + const size_t left_len = PyUnicode_GET_LENGTH(lp), right_len = PyUnicode_GET_LENGTH(rp); + +#define R(which, index) PyUnicode_READ(PyUnicode_KIND(which), PyUnicode_DATA(which), index) + while(prefix_count < MIN(left_len, right_len)) { + if (R(lp, prefix_count) != R(rp, prefix_count)) break; + prefix_count++; + } + if (left_len && right_len && prefix_count < MIN(left_len, right_len)) { + while(suffix_count < MIN(left_len - prefix_count, right_len - prefix_count)) { + if(R(lp, left_len - 1 - suffix_count) != R(rp, right_len - 1 - suffix_count)) break; + suffix_count++; + } + } +#undef R + return Py_BuildValue("II", prefix_count, suffix_count); +} + +static PyMethodDef module_methods[] = { + {"changed_center", (PyCFunction)changed_center, METH_VARARGS, ""}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "diff_speedup", /* name of module */ + .m_doc = NULL, + .m_size = -1, + .m_methods = module_methods +}; + +EXPORTED PyMODINIT_FUNC +PyInit_diff_speedup(void) { + PyObject *m; + + m = PyModule_Create(&module); + if (m == NULL) return NULL; + return m; +} diff --git a/kitty_tests/diff.py b/kitty_tests/diff.py new file mode 100644 index 000000000..48cd313cc --- /dev/null +++ b/kitty_tests/diff.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2018, Kovid Goyal + + +from . import BaseTest + + +class TestDiff(BaseTest): + + def test_changed_center(self): + from kittens.diff.diff_speedup import changed_center + for left, right, prefix, suffix in [ + ('abc', 'def', '', ''), + ('', 'def', '', ''), + ('abc', '', '', ''), + ('abc', 'abc', 'abc', ''), + ('abc', 'abcdef', 'abc', ''), + ('aa111bb', 'aa2bb', 'aa', 'bb'), + ]: + pc, sc = changed_center(left, right) + for src in (left, right): + self.assertEqual((prefix, suffix), (src[:pc], src[-sc:] if sc else '')) diff --git a/setup.py b/setup.py index 2bcca8f59..3aac49780 100755 --- a/setup.py +++ b/setup.py @@ -453,9 +453,12 @@ def kittens_env(): def compile_kittens(incremental, compilation_database, all_keys): - sources = ['kittens/unicode_input/unicode_names.c'] - all_headers = ['kittens/unicode_input/names.h', 'kitty/data-types.h'] - compile_c_extension(kittens_env(), 'kittens/unicode_input/unicode_names', incremental, compilation_database, all_keys, sources, all_headers) + kenv = kittens_env() + for sources, all_headers, dest in [ + (['kittens/unicode_input/unicode_names.c'], ['kittens/unicode_input/names.h', 'kitty/data-types.h'], 'kittens/unicode_input/unicode_names'), + (['kittens/diff/speedup.c'], ['kitty/data-types.h'], 'kittens/diff/diff_speedup'), + ]: + compile_c_extension(kenv, dest, incremental, compilation_database, all_keys, sources, all_headers) def build(args, native_optimizations=True):