find common prefix and suffix of two strings

This commit is contained in:
Kovid Goyal 2018-05-03 20:15:05 +05:30
parent 866b53f384
commit be9d876997
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 96 additions and 4 deletions

View File

@ -6,6 +6,10 @@
import os
import subprocess
from .collect import lines_for_path
left_lines = right_lines = None
def run_diff(file1, file2, context=3):
# returns: ok, is_different, patch
@ -22,13 +26,14 @@ def run_diff(file1, file2, context=3):
class Chunk:
__slots__ = ('is_context', 'left_start', 'right_start', 'left_count', 'right_count')
__slots__ = ('is_context', 'left_start', 'right_start', 'left_count', 'right_count', 'is_change')
def __init__(self, left_start, right_start, is_context=False):
self.is_context = is_context
self.left_start = left_start
self.right_start = right_start
self.left_count = self.right_count = 0
self.is_change = False
def add_line(self):
self.right_count += 1
@ -40,6 +45,10 @@ def context_line(self):
self.left_count += 1
self.right_count += 1
def finalize(self):
if not self.is_context and self.left_count == self.right_count:
self.is_change = True
def __repr__(self):
return 'Chunk(is_context={}, left_start={}, left_count={}, right_start={}, right_count={})'.format(
self.is_context, self.left_start, self.left_count, self.right_start, self.right_count)
@ -102,6 +111,8 @@ def finalize(self):
raise ValueError('Left side line mismatch {} != {}'.format(c.left_start + c.left_count, self.left_start + self.left_count))
if c.right_start + c.right_count != self.right_start + self.right_count:
raise ValueError('Left side line mismatch {} != {}'.format(c.right_start + c.right_count, self.right_start + self.right_count))
for c in self.chunks:
c.finalize()
def parse_range(x):
@ -166,6 +177,7 @@ def add_diff(self, file1, file2):
self.jobs.append(file1)
def __call__(self, context=3):
global left_lines, right_lines
ans = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
jobs = {executor.submit(run_diff, key, self.jmap[key], context): key for key in self.jobs}
@ -179,6 +191,8 @@ def __call__(self, context=3):
return 'Running git diff for {} vs. {} generated an exception: {}'.format(key[0], key[1], e)
if not ok:
return output + '\nRunning git diff for {} vs. {} failed'.format(key[0], key[1])
left_lines = lines_for_path(key[0])
right_lines = lines_for_path(key[1])
try:
patch = parse_patch(output)
except Exception:

52
kittens/diff/speedup.c Normal file
View File

@ -0,0 +1,52 @@
/*
* speedup.c
* Copyright (C) 2018 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "data-types.h"
static PyObject*
changed_center(PyObject *self UNUSED, PyObject *args) {
unsigned int prefix_count = 0, suffix_count = 0;
PyObject *lp, *rp;
if (!PyArg_ParseTuple(args, "UU", &lp, &rp)) return NULL;
const size_t left_len = PyUnicode_GET_LENGTH(lp), right_len = PyUnicode_GET_LENGTH(rp);
#define R(which, index) PyUnicode_READ(PyUnicode_KIND(which), PyUnicode_DATA(which), index)
while(prefix_count < MIN(left_len, right_len)) {
if (R(lp, prefix_count) != R(rp, prefix_count)) break;
prefix_count++;
}
if (left_len && right_len && prefix_count < MIN(left_len, right_len)) {
while(suffix_count < MIN(left_len - prefix_count, right_len - prefix_count)) {
if(R(lp, left_len - 1 - suffix_count) != R(rp, right_len - 1 - suffix_count)) break;
suffix_count++;
}
}
#undef R
return Py_BuildValue("II", prefix_count, suffix_count);
}
static PyMethodDef module_methods[] = {
{"changed_center", (PyCFunction)changed_center, METH_VARARGS, ""},
{NULL, NULL, 0, NULL} /* Sentinel */
};
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "diff_speedup", /* name of module */
.m_doc = NULL,
.m_size = -1,
.m_methods = module_methods
};
EXPORTED PyMODINIT_FUNC
PyInit_diff_speedup(void) {
PyObject *m;
m = PyModule_Create(&module);
if (m == NULL) return NULL;
return m;
}

23
kitty_tests/diff.py Normal file
View File

@ -0,0 +1,23 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
from . import BaseTest
class TestDiff(BaseTest):
def test_changed_center(self):
from kittens.diff.diff_speedup import changed_center
for left, right, prefix, suffix in [
('abc', 'def', '', ''),
('', 'def', '', ''),
('abc', '', '', ''),
('abc', 'abc', 'abc', ''),
('abc', 'abcdef', 'abc', ''),
('aa111bb', 'aa2bb', 'aa', 'bb'),
]:
pc, sc = changed_center(left, right)
for src in (left, right):
self.assertEqual((prefix, suffix), (src[:pc], src[-sc:] if sc else ''))

View File

@ -453,9 +453,12 @@ def kittens_env():
def compile_kittens(incremental, compilation_database, all_keys):
sources = ['kittens/unicode_input/unicode_names.c']
all_headers = ['kittens/unicode_input/names.h', 'kitty/data-types.h']
compile_c_extension(kittens_env(), 'kittens/unicode_input/unicode_names', incremental, compilation_database, all_keys, sources, all_headers)
kenv = kittens_env()
for sources, all_headers, dest in [
(['kittens/unicode_input/unicode_names.c'], ['kittens/unicode_input/names.h', 'kitty/data-types.h'], 'kittens/unicode_input/unicode_names'),
(['kittens/diff/speedup.c'], ['kitty/data-types.h'], 'kittens/diff/diff_speedup'),
]:
compile_c_extension(kenv, dest, incremental, compilation_database, all_keys, sources, all_headers)
def build(args, native_optimizations=True):