Start work on porting Go rsync implementation to C

This commit is contained in:
Kovid Goyal 2023-07-14 15:45:04 +05:30
parent 85df15bfc3
commit fabb6bd8cc
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 280 additions and 484 deletions

View File

@ -85,7 +85,7 @@ Run-time dependencies:
* ``zlib``
* ``libpng``
* ``liblcms2``
* ``librsync``
* ``libxxhash``
* ``openssl``
* ``freetype`` (not needed on macOS)
* ``fontconfig`` (not needed on macOS)
@ -114,7 +114,7 @@ Build-time dependencies:
- ``liblcms2-dev``
- ``libssl-dev``
- ``libpython3-dev``
- ``librsync-dev``
- ``libxxhash-dev``
Build and run from source with Nix

View File

@ -0,0 +1,276 @@
/*
* algorithm.c
* Copyright (C) 2023 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "data-types.h"
#define XXH_INLINE_ALL
#include <xxhash.h>
static PyObject *RsyncError = NULL;
typedef void*(*new_hash_t)(void);
typedef void(*delete_hash_t)(void*);
typedef bool(*reset_hash_t)(void*);
typedef bool(*update_hash_t)(void*, const void *input, size_t length);
typedef void(*digest_hash_t)(const void*, void *output);
typedef uint64_t(*digest_hash64_t)(const void*);
typedef struct hasher_t {
size_t hash_size, block_size;
void *state;
new_hash_t new;
delete_hash_t delete;
reset_hash_t reset;
update_hash_t update;
digest_hash_t digest;
digest_hash64_t digest64;
} hasher_t;
static void xxh64_delete(void* s) { XXH3_freeState(s); }
static bool xxh64_reset(void* s) { return XXH3_64bits_reset(s) == XXH_OK; }
static void* xxh64_create(void) { void *ans = XXH3_createState(); if (ans != NULL) xxh64_reset(ans); return ans; }
static bool xxh64_update(void* s, const void *input, size_t length) { return XXH3_64bits_update(s, input, length) == XXH_OK; }
static uint64_t xxh64_digest64(const void* s) { return XXH3_64bits_digest(s); }
static void xxh64_digest(const void* s, void *output) {
XXH64_hash_t ans = XXH3_64bits_digest(s);
XXH64_canonical_t c;
XXH64_canonicalFromHash(&c, ans);
memcpy(output, c.digest, sizeof(c.digest));
}
static hasher_t
xxh64_hasher(void) {
hasher_t ans = {
.hash_size=sizeof(XXH64_hash_t), .block_size = 64,
.new=xxh64_create, .delete=xxh64_delete, .reset=xxh64_reset, .update=xxh64_update, .digest=xxh64_digest, .digest64=xxh64_digest64
};
return ans;
}
static bool xxh128_reset(void* s) { return XXH3_128bits_reset(s) == XXH_OK; }
static void* xxh128_create(void) { void *ans = XXH3_createState(); if (ans != NULL) xxh128_reset(ans); return ans; }
static bool xxh128_update(void* s, const void *input, size_t length) { return XXH3_128bits_update(s, input, length) == XXH_OK; }
static void xxh128_digest(const void* s, void *output) {
XXH128_hash_t ans = XXH3_128bits_digest(s);
XXH128_canonical_t c;
XXH128_canonicalFromHash(&c, ans);
memcpy(output, c.digest, sizeof(c.digest));
}
static hasher_t
xxh128_hasher(void) {
hasher_t ans = {
.hash_size=sizeof(XXH128_hash_t), .block_size = 64,
.new=xxh128_create, .delete=xxh64_delete, .reset=xxh128_reset, .update=xxh128_update, .digest=xxh128_digest,
};
return ans;
}
typedef hasher_t(*hasher_constructor_t)(void);
typedef struct Rsync {
size_t block_size;
hasher_constructor_t hasher_constructor, checksummer_constructor;
hasher_t hasher, checksummer;
void *buffer; size_t buffer_cap, buffer_sz;
} Rsync;
static void
free_rsync(Rsync* r) {
if (r->hasher.state) { r->hasher.delete(r->hasher.state); r->hasher.state = NULL; }
if (r->checksummer.state) { r->checksummer.delete(r->checksummer.state); r->checksummer.state = NULL; }
if (r->buffer) { free(r->buffer); r->buffer = NULL; }
free(r);
}
static Rsync*
new_rsync(size_t block_size, int strong_hash_type, int checksum_type) {
Rsync *ans = calloc(1, sizeof(Rsync));
if (ans != NULL) {
ans->block_size = block_size;
if (strong_hash_type == 0) ans->hasher_constructor = xxh64_hasher;
if (checksum_type == 0) ans->checksummer_constructor = xxh128_hasher;
if (ans->hasher_constructor == NULL) { free_rsync(ans); return NULL; }
if (ans->checksummer_constructor == NULL) { free_rsync(ans); return NULL; }
ans->hasher = ans->hasher_constructor();
ans->checksummer = ans->checksummer_constructor();
ans->buffer = malloc(block_size);
if (ans->buffer == NULL) { free(ans); return NULL; }
ans->buffer_cap = block_size;
}
return ans;
}
typedef struct rolling_checksum {
uint32_t alpha, beta, val, l, first_byte_of_previous_window;
} rolling_checksum;
static const uint32_t _M = (1 << 16);
static uint32_t
rolling_checksum_full(rolling_checksum *self, uint8_t *data, uint32_t len) {
uint32_t alpha = 0, beta = 0;
self->l = len;
for (uint32_t i = 0; i < len; i++) {
alpha += data[i];
beta += (self->l - i) * data[i];
}
self->first_byte_of_previous_window = data[0];
self->alpha = alpha % _M;
self->beta = beta % _M;
self->val = self->alpha + _M*self->beta;
return self->val;
}
static void
rolling_checksum_add_one_byte(rolling_checksum *self, uint8_t first_byte, uint8_t last_byte) {
self->alpha = (self->alpha - self->first_byte_of_previous_window + last_byte) % _M;
self->beta = (self->beta - (self->l)*self->first_byte_of_previous_window + self->alpha) % _M;
self->val = self->alpha + _M*self->beta;
self->first_byte_of_previous_window = first_byte;
}
// Python interface {{{
typedef struct {
PyObject_HEAD
hasher_t h;
const char *name;
} Hasher;
static int
Hasher_init(PyObject *s, PyObject *args, PyObject *kwds) {
Hasher *self = (Hasher*)s;
static char *kwlist[] = {"which", "data", NULL};
const char *which = "xxh3-64";
FREE_BUFFER_AFTER_FUNCTION Py_buffer data = {0};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|sy*", kwlist, &which, &data)) return -1;
if (strcmp(which, "xxh3-64") == 0) {
self->h = xxh64_hasher();
self->name = "xxh3-64";
} else if (strcmp(which, "xxh3-128") == 0) {
self->h = xxh128_hasher();
self->name = "xxh3-128";
} else {
PyErr_Format(PyExc_KeyError, "Unknown hash type: %s", which);
return -1;
}
self->h.state = self->h.new();
if (self->h.state == NULL) { PyErr_NoMemory(); return -1; }
if (data.buf && data.len > 0) {
self->h.update(self->h.state, data.buf, data.len);
}
return 0;
}
static void
Hasher_dealloc(PyObject *self) {
Hasher *h = (Hasher*)self;
if (h->h.state) { h->h.delete(h->h.state); h->h.state = NULL; }
Py_TYPE(self)->tp_free(self);
}
static PyObject*
update(Hasher *self, PyObject *o) {
FREE_BUFFER_AFTER_FUNCTION Py_buffer data = {0};
if (PyObject_GetBuffer(o, &data, PyBUF_SIMPLE) == -1) return NULL;
if (data.buf && data.len > 0) {
self->h.update(self->h.state, data.buf, data.len);
}
Py_RETURN_NONE;
}
static PyObject*
digest(Hasher *self) {
PyObject *ans = PyBytes_FromStringAndSize(NULL, self->h.hash_size);
if (ans) self->h.digest(self->h.state, PyBytes_AS_STRING(ans));
return ans;
}
static PyObject*
hexdigest(Hasher *self) {
uint8_t digest[64]; char hexdigest[128];
self->h.digest(self->h.state, digest);
static const char * hex = "0123456789abcdef";
char *pout = hexdigest; const uint8_t *pin = digest;
for (; pin < digest + self->h.hash_size; pin++) {
*pout++ = hex[(*pin>>4) & 0xF];
*pout++ = hex[ *pin & 0xF];
}
return PyUnicode_FromStringAndSize(hexdigest, self->h.hash_size * 2);
}
static PyObject*
Hasher_digest_size(Hasher* self, void* closure UNUSED) { return PyLong_FromSize_t(self->h.hash_size); }
static PyObject*
Hasher_block_size(Hasher* self, void* closure UNUSED) { return PyLong_FromSize_t(self->h.block_size); }
static PyObject*
Hasher_name(Hasher* self, void* closure UNUSED) { return PyUnicode_FromString(self->name); }
static PyMethodDef Hasher_methods[] = {
METHODB(update, METH_O),
METHODB(digest, METH_NOARGS),
METHODB(hexdigest, METH_NOARGS),
{NULL} /* Sentinel */
};
PyGetSetDef Hasher_getsets[] = {
{"digest_size", (getter)Hasher_digest_size, NULL, NULL, NULL},
{"block_size", (getter)Hasher_block_size, NULL, NULL, NULL},
{"name", (getter)Hasher_name, NULL, NULL, NULL},
{NULL}
};
PyTypeObject Hasher_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "rsync.Hasher",
.tp_basicsize = sizeof(Hasher),
.tp_dealloc = Hasher_dealloc,
.tp_flags = Py_TPFLAGS_DEFAULT,
.tp_doc = "Hasher",
.tp_methods = Hasher_methods,
.tp_new = PyType_GenericNew,
.tp_init = Hasher_init,
.tp_getset = Hasher_getsets,
};
static PyMethodDef module_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
static int
exec_module(PyObject *m) {
RsyncError = PyErr_NewException("rsync.RsyncError", NULL, NULL);
if (RsyncError == NULL) return -1;
PyModule_AddObject(m, "RsyncError", RsyncError);
if (PyType_Ready(&Hasher_Type) < 0) return -1;
Py_INCREF(&Hasher_Type);
if (PyModule_AddObject(m, "Hasher", (PyObject *) &Hasher_Type) < 0) return -1;
return 0;
}
IGNORE_PEDANTIC_WARNINGS
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
END_IGNORE_PEDANTIC_WARNINGS
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "rsync", /* name of module */
.m_doc = NULL,
.m_slots = slots,
.m_methods = module_methods
};
EXPORTED PyMODINIT_FUNC
PyInit_rsync(void) {
return PyModuleDef_Init(&module);
}
// }}}

View File

@ -1,163 +0,0 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
import os
import tempfile
from typing import IO, TYPE_CHECKING, Iterator, Union
from .rsync import IO_BUFFER_SIZE, RsyncError, begin_create_delta, begin_create_signature, begin_load_signature, begin_patch, build_hash_table, iter_job
if TYPE_CHECKING:
from .rsync import JobCapsule, SignatureCapsule
class StreamingJob:
expected_input_size = IO_BUFFER_SIZE
def __init__(self, job: 'JobCapsule', output_buf_size: int = IO_BUFFER_SIZE):
self.job = job
self.finished = False
self.calls_with_no_data = 0
self.output_buf = bytearray(output_buf_size)
self.uncomsumed_data = b''
def __call__(self, input_data: Union[memoryview, bytes] = b'') -> Iterator[memoryview]:
if self.finished:
if input_data:
raise RsyncError('There was too much input data')
return memoryview(self.output_buf)[:0]
if self.uncomsumed_data:
input_data = self.uncomsumed_data + bytes(input_data)
self.uncomsumed_data = b''
while True:
self.finished, sz_of_unused_input, output_size = iter_job(self.job, input_data, self.output_buf)
if output_size:
yield memoryview(self.output_buf)[:output_size]
consumed_some_input = sz_of_unused_input < len(input_data)
produced_some_output = output_size > 0
if self.finished or (not sz_of_unused_input and len(input_data)) or (not consumed_some_input and not produced_some_output):
break
input_data = memoryview(input_data)[-sz_of_unused_input:]
if sz_of_unused_input:
self.uncomsumed_data = bytes(input_data[-sz_of_unused_input:])
def get_remaining_output(self) -> Iterator[memoryview]:
if not self.finished:
yield from self()
if not self.finished:
raise RsyncError('Insufficient input data')
if self.uncomsumed_data:
raise RsyncError(f'{len(self.uncomsumed_data)} bytes of unconsumed input data')
def drive_job_on_file(f: IO[bytes], job: 'JobCapsule', input_buf_size: int = IO_BUFFER_SIZE, output_buf_size: int = IO_BUFFER_SIZE) -> Iterator[memoryview]:
sj = StreamingJob(job, output_buf_size=output_buf_size)
input_buf = bytearray(input_buf_size)
while not sj.finished:
sz = f.readinto(input_buf) # type: ignore
if not sz:
del input_buf
yield from sj.get_remaining_output()
break
yield from sj(memoryview(input_buf)[:sz])
def signature_of_file(path: str) -> Iterator[memoryview]:
with open(path, 'rb') as f:
f.seek(0, os.SEEK_END)
fsz = f.tell()
job, block_len, strong_len = begin_create_signature(fsz)
strong_len = max(0, strong_len)
f.seek(0)
# see whole.c in librsync source for size calculations
yield from drive_job_on_file(f, job, input_buf_size=4 * block_len, output_buf_size=12 + 4 * (4 + (strong_len or IO_BUFFER_SIZE)))
class LoadSignature(StreamingJob):
# see whole.c in librsync source for size calculations
expected_input_size = 16 * 1024
autocommit = True
def __init__(self) -> None:
job, self.signature = begin_load_signature()
super().__init__(job, output_buf_size=0)
def add_chunk(self, chunk: bytes) -> None:
for ignored in self(chunk):
pass
def commit(self) -> None:
for ignored in self.get_remaining_output():
pass
build_hash_table(self.signature)
def delta_for_file(path: str, sig: 'SignatureCapsule') -> Iterator[memoryview]:
job = begin_create_delta(sig)
with open(path, 'rb') as f:
# see whole.c in librsync source for size calculations
yield from drive_job_on_file(f, job, input_buf_size=8 * IO_BUFFER_SIZE, output_buf_size=4 * IO_BUFFER_SIZE)
class PatchFile(StreamingJob):
# see whole.c in librsync source for size calculations
expected_input_size = IO_BUFFER_SIZE
def __init__(self, src_path: str, output_path: str = ''):
self.overwrite_src = not output_path
self.src_file = open(src_path, 'rb')
if self.overwrite_src:
self.dest_file: IO[bytes] = tempfile.NamedTemporaryFile(mode='wb', dir=os.path.dirname(os.path.abspath(os.path.realpath(src_path))), delete=False)
else:
self.dest_file = open(output_path, 'wb')
job = begin_patch(self.read_from_src)
super().__init__(job, output_buf_size=4 * IO_BUFFER_SIZE)
def tell(self) -> int:
if self.dest_file.closed:
return os.path.getsize(self.src_file.name if self.overwrite_src else self.dest_file.name)
return self.dest_file.tell()
def read_from_src(self, b: memoryview, pos: int) -> int:
self.src_file.seek(pos)
return self.src_file.readinto(b)
def close(self) -> None:
if not self.src_file.closed:
self.get_remaining_output()
self.src_file.close()
count = 100
while not self.finished and count > 0:
self()
count -= 1
self.dest_file.close()
if self.overwrite_src:
os.replace(self.dest_file.name, self.src_file.name)
def write(self, data: bytes) -> None:
for output in self(data):
self.dest_file.write(output)
def __enter__(self) -> 'PatchFile':
return self
def __exit__(self, *a: object) -> None:
self.close()
def develop() -> None:
import sys
src = sys.argv[-1]
sig_loader = LoadSignature()
with open(f'{src}.sig', 'wb') as f:
for chunk in signature_of_file(src):
sig_loader.add_chunk(chunk)
f.write(chunk)
sig_loader.commit()
with open(f'{src}.delta', 'wb') as f, PatchFile(src, f'{src}.output') as patcher:
for chunk in delta_for_file(src, sig_loader.signature):
f.write(chunk)
patcher.write(chunk)

View File

@ -1,251 +0,0 @@
//go:build exclude
/*
* rsync.c
* Copyright (C) 2021 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "data-types.h"
#include <librsync.h>
#define SIGNATURE_CAPSULE "rs_signature_t"
#define JOB_WITH_CALLBACK_CAPSULE "rs_callback_job_t"
// See whole.c in the librsync source code for estimating IO_BUFFER_SIZE
#define IO_BUFFER_SIZE (64u * 1024u)
static PyObject *RsyncError = NULL;
static void
free_job_with_callback_capsule(PyObject *capsule) {
if (PyCapsule_IsValid(capsule, JOB_WITH_CALLBACK_CAPSULE)) {
void *job = PyCapsule_GetPointer(capsule, JOB_WITH_CALLBACK_CAPSULE);
if (job && job != RsyncError) rs_job_free(job);
PyObject *callback = PyCapsule_GetContext(capsule);
Py_CLEAR(callback);
}
}
static void
free_sig_capsule(PyObject *capsule) {
rs_signature_t *sig = PyCapsule_GetPointer(capsule, SIGNATURE_CAPSULE);
if (sig) rs_free_sumset(sig);
}
#define CREATE_JOB(func, cb, ...) \
PyObject *job_capsule = PyCapsule_New(RsyncError, JOB_WITH_CALLBACK_CAPSULE, free_job_with_callback_capsule); \
if (job_capsule) { \
rs_job_t *job = func(__VA_ARGS__); \
if (job) { \
if (PyCapsule_SetPointer(job_capsule, job) == 0) { \
if (cb) { \
if (PyCapsule_SetContext(job_capsule, cb) == 0) { Py_INCREF(cb); } \
else { Py_CLEAR(job_capsule); } \
} \
} else { \
rs_job_free(job); Py_CLEAR(job_capsule); \
} \
} else { \
Py_CLEAR(job_capsule); \
} \
}
static PyObject*
begin_create_signature(PyObject *self UNUSED, PyObject *args) {
long long file_size = -1;
long sl = 0;
if (!PyArg_ParseTuple(args, "|Ll", &file_size, &sl)) return NULL;
rs_magic_number magic_number = 0;
size_t block_len = 0, strong_len = sl;
#ifdef KITTY_HAS_RS_SIG_ARGS
rs_result res = rs_sig_args(file_size, &magic_number, &block_len, &strong_len);
if (res != RS_DONE) {
PyErr_SetString(PyExc_ValueError, rs_strerror(res));
return NULL;
}
#else
block_len = RS_DEFAULT_BLOCK_LEN;
strong_len = 8;
magic_number = RS_MD4_SIG_MAGIC;
#endif
CREATE_JOB(rs_sig_begin, NULL, block_len, strong_len, magic_number);
return Py_BuildValue("Nnn", job_capsule, (Py_ssize_t)block_len, (Py_ssize_t)strong_len);
}
#define GET_JOB_FROM_CAPSULE \
rs_job_t *job = PyCapsule_GetPointer(job_capsule, JOB_WITH_CALLBACK_CAPSULE); \
if (!job) { PyErr_SetString(PyExc_TypeError, "Not a job capsule"); return NULL; } \
static PyObject*
iter_job(PyObject *self UNUSED, PyObject *args) {
FREE_BUFFER_AFTER_FUNCTION Py_buffer input_buf = {0};
FREE_BUFFER_AFTER_FUNCTION Py_buffer output_buf = {0};
PyObject *job_capsule, *output_array;
if (!PyArg_ParseTuple(args, "O!y*O!", &PyCapsule_Type, &job_capsule, &input_buf, &PyByteArray_Type, &output_array)) return NULL;
GET_JOB_FROM_CAPSULE;
if (PyObject_GetBuffer(output_array, &output_buf, PyBUF_WRITE) != 0) return NULL;
int eof = input_buf.len > 0 ? 0 : 1;
rs_buffers_t buffer = {
.avail_in=input_buf.len, .next_in=input_buf.buf, .eof_in=eof,
.avail_out=output_buf.len, .next_out=output_buf.buf
};
size_t before = buffer.avail_out;
rs_result result = rs_job_iter(job, &buffer);
Py_ssize_t output_size = before - buffer.avail_out;
if (result == RS_DONE || result == RS_BLOCKED) {
Py_ssize_t unused_input = buffer.avail_in;
return Py_BuildValue("Onn", result == RS_DONE ? Py_True : Py_False, unused_input, output_size);
}
PyErr_SetString(RsyncError, rs_strerror(result));
return NULL;
}
static PyObject*
begin_load_signature(PyObject *self UNUSED, PyObject *args UNUSED) {
rs_signature_t *sig = NULL;
CREATE_JOB(rs_loadsig_begin, NULL, &sig);
if (!job_capsule) { rs_free_sumset(sig); return NULL; }
PyObject *sc = PyCapsule_New(sig, SIGNATURE_CAPSULE, free_sig_capsule);
if (!sc) { Py_CLEAR(job_capsule); rs_free_sumset(sig); return NULL; }
return Py_BuildValue("NN", job_capsule, sc);
}
#define GET_SIG_FROM_CAPSULE \
rs_signature_t *sig = PyCapsule_GetPointer(sig_capsule, SIGNATURE_CAPSULE); \
if (!sig) { PyErr_SetString(PyExc_TypeError, "Not a sig capsule"); return NULL; }
static PyObject*
build_hash_table(PyObject *self UNUSED, PyObject *args) {
PyObject *sig_capsule;
if (!PyArg_ParseTuple(args, "O!", &PyCapsule_Type, &sig_capsule)) return NULL;
GET_SIG_FROM_CAPSULE;
rs_result res = rs_build_hash_table(sig);
if (res != RS_DONE) {
PyErr_SetString(RsyncError, rs_strerror(res));
return NULL;
}
Py_RETURN_NONE;
}
static PyObject*
begin_create_delta(PyObject *self UNUSED, PyObject *args) {
PyObject *sig_capsule;
if (!PyArg_ParseTuple(args, "O!", &PyCapsule_Type, &sig_capsule)) return NULL;
GET_SIG_FROM_CAPSULE;
CREATE_JOB(rs_delta_begin, NULL, sig);
return job_capsule;
}
static rs_result
copy_callback(void *opaque, rs_long_t pos, size_t *len, void **buf) {
PyObject *callback = opaque;
long long p = pos;
PyObject *mem = PyMemoryView_FromMemory(*buf, *len, PyBUF_WRITE);
if (!mem) { PyErr_Clear(); return RS_MEM_ERROR; }
PyObject *res = PyObject_CallFunction(callback, "OL", mem, p);
Py_DECREF(mem);
if (res == NULL) { PyErr_Print(); return RS_IO_ERROR; }
rs_result r = RS_DONE;
if (PyLong_Check(res)) { *len = PyLong_AsSize_t(res); }
else { r = RS_INTERNAL_ERROR; }
Py_DECREF(res);
return r;
}
static PyObject*
begin_patch(PyObject *self UNUSED, PyObject *callback) {
if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be a callable"); return NULL; }
CREATE_JOB(rs_patch_begin, callback, copy_callback, callback);
return job_capsule;
}
static bool
call_ftc_callback(PyObject *callback, char *src, Py_ssize_t key_start, Py_ssize_t key_length, Py_ssize_t val_start, Py_ssize_t val_length) {
while(src[key_start] == ';' && key_length > 0 ) { key_start++; key_length--; }
DECREF_AFTER_FUNCTION PyObject *k = PyMemoryView_FromMemory(src + key_start, key_length, PyBUF_READ);
if (!k) return false;
DECREF_AFTER_FUNCTION PyObject *v = PyMemoryView_FromMemory(src + val_start, val_length, PyBUF_READ);
if (!v) return false;
DECREF_AFTER_FUNCTION PyObject *ret = PyObject_CallFunctionObjArgs(callback, k, v, NULL);
return ret != NULL;
}
static PyObject*
decode_utf8_buffer(PyObject *self UNUSED, PyObject *args) {
FREE_BUFFER_AFTER_FUNCTION Py_buffer buf = {0};
if (!PyArg_ParseTuple(args, "s*", &buf)) return NULL;
return PyUnicode_FromStringAndSize(buf.buf, buf.len);
}
static PyObject*
parse_ftc(PyObject *self UNUSED, PyObject *args) {
FREE_BUFFER_AFTER_FUNCTION Py_buffer buf = {0};
PyObject *callback;
size_t i = 0, key_start = 0, key_length = 0, val_start = 0, val_length = 0;
if (!PyArg_ParseTuple(args, "s*O", &buf, &callback)) return NULL;
char *src = buf.buf;
size_t sz = buf.len;
if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be callable"); return NULL; }
for (i = 0; i < sz; i++) {
char ch = src[i];
if (key_length == 0) {
if (ch == '=') {
key_length = i - key_start;
val_start = i + 1;
}
} else {
if (ch == ';') {
val_length = i - val_start;
if (!call_ftc_callback(callback, src, key_start, key_length, val_start, val_length)) return NULL;
key_length = 0; key_start = i + 1; val_start = 0;
}
}
}
if (key_length && val_start) {
val_length = sz - val_start;
if (!call_ftc_callback(callback, src, key_start, key_length, val_start, val_length)) return NULL;
}
Py_RETURN_NONE;
}
static PyMethodDef module_methods[] = {
{"begin_create_signature", begin_create_signature, METH_VARARGS, ""},
{"begin_load_signature", begin_load_signature, METH_NOARGS, ""},
{"build_hash_table", build_hash_table, METH_VARARGS, ""},
{"begin_patch", begin_patch, METH_O, ""},
{"begin_create_delta", begin_create_delta, METH_VARARGS, ""},
{"iter_job", iter_job, METH_VARARGS, ""},
{"parse_ftc", parse_ftc, METH_VARARGS, ""},
{"decode_utf8_buffer", decode_utf8_buffer, METH_VARARGS, ""},
{NULL, NULL, 0, NULL} /* Sentinel */
};
static int
exec_module(PyObject *m) {
RsyncError = PyErr_NewException("rsync.RsyncError", NULL, NULL);
if (RsyncError == NULL) return -1;
PyModule_AddObject(m, "RsyncError", RsyncError);
PyModule_AddIntMacro(m, IO_BUFFER_SIZE);
return 0;
}
IGNORE_PEDANTIC_WARNINGS
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
END_IGNORE_PEDANTIC_WARNINGS
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "rsync", /* name of module */
.m_doc = NULL,
.m_slots = slots,
.m_methods = module_methods
};
EXPORTED PyMODINIT_FUNC
PyInit_rsync(void) {
return PyModuleDef_Init(&module);
}

View File

@ -1,47 +1,3 @@
from typing import Callable, Tuple, Union
IO_BUFFER_SIZE: int
class JobCapsule:
pass
class SignatureCapsule:
pass
class RsyncError(Exception):
pass
def begin_create_signature(file_size: int = -1, strong_len: int = 0) -> Tuple[JobCapsule, int, int]:
pass
def begin_load_signature() -> Tuple[JobCapsule, SignatureCapsule]:
pass
def build_hash_table(sig: SignatureCapsule) -> None:
pass
def begin_create_delta(sig: SignatureCapsule) -> JobCapsule:
pass
def begin_patch(callback: Callable[[memoryview, int], int]) -> JobCapsule:
pass
def iter_job(job_capsule: JobCapsule, input_data: bytes, output_buf: bytearray) -> Tuple[bool, int, int]:
pass
def parse_ftc(src: Union[str, bytes, memoryview], callback: Callable[[memoryview, memoryview], None]) -> None:
pass
def decode_utf8_buffer(src: Union[str, bytes, memoryview]) -> str:
pass

View File

@ -299,24 +299,6 @@ def set_arches(flags: List[str], arches: Iterable[str] = ('x86_64', 'arm64')) ->
flags.extend(('-arch', arch))
def detect_librsync(cc: List[str], cflags: List[str], ldflags: List[str]) -> str:
if not test_compile(
cc, *cflags, libraries=('rsync',), ldflags=ldflags, show_stderr=True,
src='#include <librsync.h>\nint main(void) { rs_strerror(0); return 0; }'):
raise SystemExit('The librsync library is required')
# check for rs_sig_args() which was added to librsync in Apr 2020 version 2.3.0
if test_compile(cc, *cflags, libraries=('rsync',), ldflags=ldflags, src='''
#include <librsync.h>
int main(void) {
rs_magic_number magic_number = 0;
size_t block_len = 0, strong_len = 0;
rs_sig_args(1024, &magic_number, &block_len, &strong_len);
return 0;
}'''):
return '-DKITTY_HAS_RS_SIG_ARGS'
return ''
def is_gcc(cc: Iterable[str]) -> bool:
@lru_cache()
@ -434,10 +416,6 @@ def add_lpath(which: str, name: str, val: Optional[str]) -> None:
cflags.insert(0, f'-I{os.environ["DEVELOP_ROOT"]}/include')
ldpaths.insert(0, f'-L{os.environ["DEVELOP_ROOT"]}/lib')
rs_cflag = detect_librsync(cc, cflags, ldflags + ldpaths)
if rs_cflag:
cflags.append(rs_cflag)
if build_universal_binary:
set_arches(cflags)
set_arches(ldflags)
@ -830,7 +808,7 @@ def files(
return kitten, sources, headers, f'kittens/{kitten}/{output}', includes, libraries
for kitten, sources, all_headers, dest, includes, libraries in (
files('transfer', 'rsync', libraries=('rsync',)),
files('transfer', 'rsync', libraries=('xxhash',)),
):
final_env = kenv.copy()
final_env.cflags.extend(f'-I{x}' for x in includes)

View File

@ -226,7 +226,7 @@ type signature_iterator struct {
index uint64
}
// ans is valid only iff err == nil
// ans is valid iff err == nil
func (self *signature_iterator) next() (ans BlockHash, err error) {
n, err := io.ReadAtLeast(self.src, self.buffer, cap(self.buffer))
switch err {