mirror of
https://github.com/kovidgoyal/kitty.git
synced 2024-09-19 18:47:26 +03:00
Start work on porting Go rsync implementation to C
This commit is contained in:
parent
85df15bfc3
commit
fabb6bd8cc
@ -85,7 +85,7 @@ Run-time dependencies:
|
||||
* ``zlib``
|
||||
* ``libpng``
|
||||
* ``liblcms2``
|
||||
* ``librsync``
|
||||
* ``libxxhash``
|
||||
* ``openssl``
|
||||
* ``freetype`` (not needed on macOS)
|
||||
* ``fontconfig`` (not needed on macOS)
|
||||
@ -114,7 +114,7 @@ Build-time dependencies:
|
||||
- ``liblcms2-dev``
|
||||
- ``libssl-dev``
|
||||
- ``libpython3-dev``
|
||||
- ``librsync-dev``
|
||||
- ``libxxhash-dev``
|
||||
|
||||
|
||||
Build and run from source with Nix
|
||||
|
276
kittens/transfer/algorithm.c
Normal file
276
kittens/transfer/algorithm.c
Normal file
@ -0,0 +1,276 @@
|
||||
/*
|
||||
* algorithm.c
|
||||
* Copyright (C) 2023 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#include "data-types.h"
|
||||
#define XXH_INLINE_ALL
|
||||
#include <xxhash.h>
|
||||
|
||||
static PyObject *RsyncError = NULL;
|
||||
typedef void*(*new_hash_t)(void);
|
||||
typedef void(*delete_hash_t)(void*);
|
||||
typedef bool(*reset_hash_t)(void*);
|
||||
typedef bool(*update_hash_t)(void*, const void *input, size_t length);
|
||||
typedef void(*digest_hash_t)(const void*, void *output);
|
||||
typedef uint64_t(*digest_hash64_t)(const void*);
|
||||
|
||||
typedef struct hasher_t {
|
||||
size_t hash_size, block_size;
|
||||
void *state;
|
||||
new_hash_t new;
|
||||
delete_hash_t delete;
|
||||
reset_hash_t reset;
|
||||
update_hash_t update;
|
||||
digest_hash_t digest;
|
||||
digest_hash64_t digest64;
|
||||
} hasher_t;
|
||||
|
||||
static void xxh64_delete(void* s) { XXH3_freeState(s); }
|
||||
static bool xxh64_reset(void* s) { return XXH3_64bits_reset(s) == XXH_OK; }
|
||||
static void* xxh64_create(void) { void *ans = XXH3_createState(); if (ans != NULL) xxh64_reset(ans); return ans; }
|
||||
static bool xxh64_update(void* s, const void *input, size_t length) { return XXH3_64bits_update(s, input, length) == XXH_OK; }
|
||||
static uint64_t xxh64_digest64(const void* s) { return XXH3_64bits_digest(s); }
|
||||
static void xxh64_digest(const void* s, void *output) {
|
||||
XXH64_hash_t ans = XXH3_64bits_digest(s);
|
||||
XXH64_canonical_t c;
|
||||
XXH64_canonicalFromHash(&c, ans);
|
||||
memcpy(output, c.digest, sizeof(c.digest));
|
||||
}
|
||||
|
||||
static hasher_t
|
||||
xxh64_hasher(void) {
|
||||
hasher_t ans = {
|
||||
.hash_size=sizeof(XXH64_hash_t), .block_size = 64,
|
||||
.new=xxh64_create, .delete=xxh64_delete, .reset=xxh64_reset, .update=xxh64_update, .digest=xxh64_digest, .digest64=xxh64_digest64
|
||||
};
|
||||
return ans;
|
||||
}
|
||||
|
||||
static bool xxh128_reset(void* s) { return XXH3_128bits_reset(s) == XXH_OK; }
|
||||
static void* xxh128_create(void) { void *ans = XXH3_createState(); if (ans != NULL) xxh128_reset(ans); return ans; }
|
||||
static bool xxh128_update(void* s, const void *input, size_t length) { return XXH3_128bits_update(s, input, length) == XXH_OK; }
|
||||
static void xxh128_digest(const void* s, void *output) {
|
||||
XXH128_hash_t ans = XXH3_128bits_digest(s);
|
||||
XXH128_canonical_t c;
|
||||
XXH128_canonicalFromHash(&c, ans);
|
||||
memcpy(output, c.digest, sizeof(c.digest));
|
||||
}
|
||||
|
||||
static hasher_t
|
||||
xxh128_hasher(void) {
|
||||
hasher_t ans = {
|
||||
.hash_size=sizeof(XXH128_hash_t), .block_size = 64,
|
||||
.new=xxh128_create, .delete=xxh64_delete, .reset=xxh128_reset, .update=xxh128_update, .digest=xxh128_digest,
|
||||
};
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
typedef hasher_t(*hasher_constructor_t)(void);
|
||||
|
||||
typedef struct Rsync {
|
||||
size_t block_size;
|
||||
|
||||
hasher_constructor_t hasher_constructor, checksummer_constructor;
|
||||
hasher_t hasher, checksummer;
|
||||
|
||||
void *buffer; size_t buffer_cap, buffer_sz;
|
||||
} Rsync;
|
||||
|
||||
static void
|
||||
free_rsync(Rsync* r) {
|
||||
if (r->hasher.state) { r->hasher.delete(r->hasher.state); r->hasher.state = NULL; }
|
||||
if (r->checksummer.state) { r->checksummer.delete(r->checksummer.state); r->checksummer.state = NULL; }
|
||||
if (r->buffer) { free(r->buffer); r->buffer = NULL; }
|
||||
free(r);
|
||||
}
|
||||
|
||||
static Rsync*
|
||||
new_rsync(size_t block_size, int strong_hash_type, int checksum_type) {
|
||||
Rsync *ans = calloc(1, sizeof(Rsync));
|
||||
if (ans != NULL) {
|
||||
ans->block_size = block_size;
|
||||
if (strong_hash_type == 0) ans->hasher_constructor = xxh64_hasher;
|
||||
if (checksum_type == 0) ans->checksummer_constructor = xxh128_hasher;
|
||||
if (ans->hasher_constructor == NULL) { free_rsync(ans); return NULL; }
|
||||
if (ans->checksummer_constructor == NULL) { free_rsync(ans); return NULL; }
|
||||
ans->hasher = ans->hasher_constructor();
|
||||
ans->checksummer = ans->checksummer_constructor();
|
||||
ans->buffer = malloc(block_size);
|
||||
if (ans->buffer == NULL) { free(ans); return NULL; }
|
||||
ans->buffer_cap = block_size;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
typedef struct rolling_checksum {
|
||||
uint32_t alpha, beta, val, l, first_byte_of_previous_window;
|
||||
} rolling_checksum;
|
||||
|
||||
static const uint32_t _M = (1 << 16);
|
||||
|
||||
static uint32_t
|
||||
rolling_checksum_full(rolling_checksum *self, uint8_t *data, uint32_t len) {
|
||||
uint32_t alpha = 0, beta = 0;
|
||||
self->l = len;
|
||||
for (uint32_t i = 0; i < len; i++) {
|
||||
alpha += data[i];
|
||||
beta += (self->l - i) * data[i];
|
||||
}
|
||||
self->first_byte_of_previous_window = data[0];
|
||||
self->alpha = alpha % _M;
|
||||
self->beta = beta % _M;
|
||||
self->val = self->alpha + _M*self->beta;
|
||||
return self->val;
|
||||
}
|
||||
|
||||
static void
|
||||
rolling_checksum_add_one_byte(rolling_checksum *self, uint8_t first_byte, uint8_t last_byte) {
|
||||
self->alpha = (self->alpha - self->first_byte_of_previous_window + last_byte) % _M;
|
||||
self->beta = (self->beta - (self->l)*self->first_byte_of_previous_window + self->alpha) % _M;
|
||||
self->val = self->alpha + _M*self->beta;
|
||||
self->first_byte_of_previous_window = first_byte;
|
||||
}
|
||||
|
||||
// Python interface {{{
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
hasher_t h;
|
||||
const char *name;
|
||||
} Hasher;
|
||||
|
||||
static int
|
||||
Hasher_init(PyObject *s, PyObject *args, PyObject *kwds) {
|
||||
Hasher *self = (Hasher*)s;
|
||||
static char *kwlist[] = {"which", "data", NULL};
|
||||
const char *which = "xxh3-64";
|
||||
FREE_BUFFER_AFTER_FUNCTION Py_buffer data = {0};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|sy*", kwlist, &which, &data)) return -1;
|
||||
if (strcmp(which, "xxh3-64") == 0) {
|
||||
self->h = xxh64_hasher();
|
||||
self->name = "xxh3-64";
|
||||
} else if (strcmp(which, "xxh3-128") == 0) {
|
||||
self->h = xxh128_hasher();
|
||||
self->name = "xxh3-128";
|
||||
} else {
|
||||
PyErr_Format(PyExc_KeyError, "Unknown hash type: %s", which);
|
||||
return -1;
|
||||
}
|
||||
self->h.state = self->h.new();
|
||||
if (self->h.state == NULL) { PyErr_NoMemory(); return -1; }
|
||||
if (data.buf && data.len > 0) {
|
||||
self->h.update(self->h.state, data.buf, data.len);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
Hasher_dealloc(PyObject *self) {
|
||||
Hasher *h = (Hasher*)self;
|
||||
if (h->h.state) { h->h.delete(h->h.state); h->h.state = NULL; }
|
||||
Py_TYPE(self)->tp_free(self);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
update(Hasher *self, PyObject *o) {
|
||||
FREE_BUFFER_AFTER_FUNCTION Py_buffer data = {0};
|
||||
if (PyObject_GetBuffer(o, &data, PyBUF_SIMPLE) == -1) return NULL;
|
||||
if (data.buf && data.len > 0) {
|
||||
self->h.update(self->h.state, data.buf, data.len);
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
digest(Hasher *self) {
|
||||
PyObject *ans = PyBytes_FromStringAndSize(NULL, self->h.hash_size);
|
||||
if (ans) self->h.digest(self->h.state, PyBytes_AS_STRING(ans));
|
||||
return ans;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
hexdigest(Hasher *self) {
|
||||
uint8_t digest[64]; char hexdigest[128];
|
||||
self->h.digest(self->h.state, digest);
|
||||
static const char * hex = "0123456789abcdef";
|
||||
char *pout = hexdigest; const uint8_t *pin = digest;
|
||||
for (; pin < digest + self->h.hash_size; pin++) {
|
||||
*pout++ = hex[(*pin>>4) & 0xF];
|
||||
*pout++ = hex[ *pin & 0xF];
|
||||
}
|
||||
return PyUnicode_FromStringAndSize(hexdigest, self->h.hash_size * 2);
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
Hasher_digest_size(Hasher* self, void* closure UNUSED) { return PyLong_FromSize_t(self->h.hash_size); }
|
||||
static PyObject*
|
||||
Hasher_block_size(Hasher* self, void* closure UNUSED) { return PyLong_FromSize_t(self->h.block_size); }
|
||||
static PyObject*
|
||||
Hasher_name(Hasher* self, void* closure UNUSED) { return PyUnicode_FromString(self->name); }
|
||||
|
||||
static PyMethodDef Hasher_methods[] = {
|
||||
METHODB(update, METH_O),
|
||||
METHODB(digest, METH_NOARGS),
|
||||
METHODB(hexdigest, METH_NOARGS),
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
PyGetSetDef Hasher_getsets[] = {
|
||||
{"digest_size", (getter)Hasher_digest_size, NULL, NULL, NULL},
|
||||
{"block_size", (getter)Hasher_block_size, NULL, NULL, NULL},
|
||||
{"name", (getter)Hasher_name, NULL, NULL, NULL},
|
||||
{NULL}
|
||||
};
|
||||
|
||||
|
||||
PyTypeObject Hasher_Type = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
.tp_name = "rsync.Hasher",
|
||||
.tp_basicsize = sizeof(Hasher),
|
||||
.tp_dealloc = Hasher_dealloc,
|
||||
.tp_flags = Py_TPFLAGS_DEFAULT,
|
||||
.tp_doc = "Hasher",
|
||||
.tp_methods = Hasher_methods,
|
||||
.tp_new = PyType_GenericNew,
|
||||
.tp_init = Hasher_init,
|
||||
.tp_getset = Hasher_getsets,
|
||||
};
|
||||
|
||||
static PyMethodDef module_methods[] = {
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static int
|
||||
exec_module(PyObject *m) {
|
||||
RsyncError = PyErr_NewException("rsync.RsyncError", NULL, NULL);
|
||||
if (RsyncError == NULL) return -1;
|
||||
PyModule_AddObject(m, "RsyncError", RsyncError);
|
||||
if (PyType_Ready(&Hasher_Type) < 0) return -1;
|
||||
Py_INCREF(&Hasher_Type);
|
||||
if (PyModule_AddObject(m, "Hasher", (PyObject *) &Hasher_Type) < 0) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
IGNORE_PEDANTIC_WARNINGS
|
||||
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
|
||||
END_IGNORE_PEDANTIC_WARNINGS
|
||||
|
||||
static struct PyModuleDef module = {
|
||||
.m_base = PyModuleDef_HEAD_INIT,
|
||||
.m_name = "rsync", /* name of module */
|
||||
.m_doc = NULL,
|
||||
.m_slots = slots,
|
||||
.m_methods = module_methods
|
||||
};
|
||||
|
||||
EXPORTED PyMODINIT_FUNC
|
||||
PyInit_rsync(void) {
|
||||
return PyModuleDef_Init(&module);
|
||||
}
|
||||
// }}}
|
@ -1,163 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import IO, TYPE_CHECKING, Iterator, Union
|
||||
|
||||
from .rsync import IO_BUFFER_SIZE, RsyncError, begin_create_delta, begin_create_signature, begin_load_signature, begin_patch, build_hash_table, iter_job
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .rsync import JobCapsule, SignatureCapsule
|
||||
|
||||
|
||||
class StreamingJob:
|
||||
|
||||
expected_input_size = IO_BUFFER_SIZE
|
||||
|
||||
def __init__(self, job: 'JobCapsule', output_buf_size: int = IO_BUFFER_SIZE):
|
||||
self.job = job
|
||||
self.finished = False
|
||||
self.calls_with_no_data = 0
|
||||
self.output_buf = bytearray(output_buf_size)
|
||||
self.uncomsumed_data = b''
|
||||
|
||||
def __call__(self, input_data: Union[memoryview, bytes] = b'') -> Iterator[memoryview]:
|
||||
if self.finished:
|
||||
if input_data:
|
||||
raise RsyncError('There was too much input data')
|
||||
return memoryview(self.output_buf)[:0]
|
||||
if self.uncomsumed_data:
|
||||
input_data = self.uncomsumed_data + bytes(input_data)
|
||||
self.uncomsumed_data = b''
|
||||
while True:
|
||||
self.finished, sz_of_unused_input, output_size = iter_job(self.job, input_data, self.output_buf)
|
||||
if output_size:
|
||||
yield memoryview(self.output_buf)[:output_size]
|
||||
consumed_some_input = sz_of_unused_input < len(input_data)
|
||||
produced_some_output = output_size > 0
|
||||
if self.finished or (not sz_of_unused_input and len(input_data)) or (not consumed_some_input and not produced_some_output):
|
||||
break
|
||||
input_data = memoryview(input_data)[-sz_of_unused_input:]
|
||||
if sz_of_unused_input:
|
||||
self.uncomsumed_data = bytes(input_data[-sz_of_unused_input:])
|
||||
|
||||
def get_remaining_output(self) -> Iterator[memoryview]:
|
||||
if not self.finished:
|
||||
yield from self()
|
||||
if not self.finished:
|
||||
raise RsyncError('Insufficient input data')
|
||||
if self.uncomsumed_data:
|
||||
raise RsyncError(f'{len(self.uncomsumed_data)} bytes of unconsumed input data')
|
||||
|
||||
|
||||
def drive_job_on_file(f: IO[bytes], job: 'JobCapsule', input_buf_size: int = IO_BUFFER_SIZE, output_buf_size: int = IO_BUFFER_SIZE) -> Iterator[memoryview]:
|
||||
sj = StreamingJob(job, output_buf_size=output_buf_size)
|
||||
input_buf = bytearray(input_buf_size)
|
||||
while not sj.finished:
|
||||
sz = f.readinto(input_buf) # type: ignore
|
||||
if not sz:
|
||||
del input_buf
|
||||
yield from sj.get_remaining_output()
|
||||
break
|
||||
yield from sj(memoryview(input_buf)[:sz])
|
||||
|
||||
|
||||
def signature_of_file(path: str) -> Iterator[memoryview]:
|
||||
with open(path, 'rb') as f:
|
||||
f.seek(0, os.SEEK_END)
|
||||
fsz = f.tell()
|
||||
job, block_len, strong_len = begin_create_signature(fsz)
|
||||
strong_len = max(0, strong_len)
|
||||
f.seek(0)
|
||||
# see whole.c in librsync source for size calculations
|
||||
yield from drive_job_on_file(f, job, input_buf_size=4 * block_len, output_buf_size=12 + 4 * (4 + (strong_len or IO_BUFFER_SIZE)))
|
||||
|
||||
|
||||
class LoadSignature(StreamingJob):
|
||||
|
||||
# see whole.c in librsync source for size calculations
|
||||
expected_input_size = 16 * 1024
|
||||
autocommit = True
|
||||
|
||||
def __init__(self) -> None:
|
||||
job, self.signature = begin_load_signature()
|
||||
super().__init__(job, output_buf_size=0)
|
||||
|
||||
def add_chunk(self, chunk: bytes) -> None:
|
||||
for ignored in self(chunk):
|
||||
pass
|
||||
|
||||
def commit(self) -> None:
|
||||
for ignored in self.get_remaining_output():
|
||||
pass
|
||||
build_hash_table(self.signature)
|
||||
|
||||
|
||||
def delta_for_file(path: str, sig: 'SignatureCapsule') -> Iterator[memoryview]:
|
||||
job = begin_create_delta(sig)
|
||||
with open(path, 'rb') as f:
|
||||
# see whole.c in librsync source for size calculations
|
||||
yield from drive_job_on_file(f, job, input_buf_size=8 * IO_BUFFER_SIZE, output_buf_size=4 * IO_BUFFER_SIZE)
|
||||
|
||||
|
||||
class PatchFile(StreamingJob):
|
||||
|
||||
# see whole.c in librsync source for size calculations
|
||||
expected_input_size = IO_BUFFER_SIZE
|
||||
|
||||
def __init__(self, src_path: str, output_path: str = ''):
|
||||
self.overwrite_src = not output_path
|
||||
self.src_file = open(src_path, 'rb')
|
||||
if self.overwrite_src:
|
||||
self.dest_file: IO[bytes] = tempfile.NamedTemporaryFile(mode='wb', dir=os.path.dirname(os.path.abspath(os.path.realpath(src_path))), delete=False)
|
||||
else:
|
||||
self.dest_file = open(output_path, 'wb')
|
||||
job = begin_patch(self.read_from_src)
|
||||
super().__init__(job, output_buf_size=4 * IO_BUFFER_SIZE)
|
||||
|
||||
def tell(self) -> int:
|
||||
if self.dest_file.closed:
|
||||
return os.path.getsize(self.src_file.name if self.overwrite_src else self.dest_file.name)
|
||||
return self.dest_file.tell()
|
||||
|
||||
def read_from_src(self, b: memoryview, pos: int) -> int:
|
||||
self.src_file.seek(pos)
|
||||
return self.src_file.readinto(b)
|
||||
|
||||
def close(self) -> None:
|
||||
if not self.src_file.closed:
|
||||
self.get_remaining_output()
|
||||
self.src_file.close()
|
||||
count = 100
|
||||
while not self.finished and count > 0:
|
||||
self()
|
||||
count -= 1
|
||||
self.dest_file.close()
|
||||
if self.overwrite_src:
|
||||
os.replace(self.dest_file.name, self.src_file.name)
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
for output in self(data):
|
||||
self.dest_file.write(output)
|
||||
|
||||
def __enter__(self) -> 'PatchFile':
|
||||
return self
|
||||
|
||||
def __exit__(self, *a: object) -> None:
|
||||
self.close()
|
||||
|
||||
|
||||
def develop() -> None:
|
||||
import sys
|
||||
src = sys.argv[-1]
|
||||
sig_loader = LoadSignature()
|
||||
with open(f'{src}.sig', 'wb') as f:
|
||||
for chunk in signature_of_file(src):
|
||||
sig_loader.add_chunk(chunk)
|
||||
f.write(chunk)
|
||||
sig_loader.commit()
|
||||
with open(f'{src}.delta', 'wb') as f, PatchFile(src, f'{src}.output') as patcher:
|
||||
for chunk in delta_for_file(src, sig_loader.signature):
|
||||
f.write(chunk)
|
||||
patcher.write(chunk)
|
@ -1,251 +0,0 @@
|
||||
//go:build exclude
|
||||
/*
|
||||
* rsync.c
|
||||
* Copyright (C) 2021 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#include "data-types.h"
|
||||
#include <librsync.h>
|
||||
|
||||
#define SIGNATURE_CAPSULE "rs_signature_t"
|
||||
#define JOB_WITH_CALLBACK_CAPSULE "rs_callback_job_t"
|
||||
// See whole.c in the librsync source code for estimating IO_BUFFER_SIZE
|
||||
#define IO_BUFFER_SIZE (64u * 1024u)
|
||||
static PyObject *RsyncError = NULL;
|
||||
|
||||
static void
|
||||
free_job_with_callback_capsule(PyObject *capsule) {
|
||||
if (PyCapsule_IsValid(capsule, JOB_WITH_CALLBACK_CAPSULE)) {
|
||||
void *job = PyCapsule_GetPointer(capsule, JOB_WITH_CALLBACK_CAPSULE);
|
||||
if (job && job != RsyncError) rs_job_free(job);
|
||||
PyObject *callback = PyCapsule_GetContext(capsule);
|
||||
Py_CLEAR(callback);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
free_sig_capsule(PyObject *capsule) {
|
||||
rs_signature_t *sig = PyCapsule_GetPointer(capsule, SIGNATURE_CAPSULE);
|
||||
if (sig) rs_free_sumset(sig);
|
||||
}
|
||||
|
||||
#define CREATE_JOB(func, cb, ...) \
|
||||
PyObject *job_capsule = PyCapsule_New(RsyncError, JOB_WITH_CALLBACK_CAPSULE, free_job_with_callback_capsule); \
|
||||
if (job_capsule) { \
|
||||
rs_job_t *job = func(__VA_ARGS__); \
|
||||
if (job) { \
|
||||
if (PyCapsule_SetPointer(job_capsule, job) == 0) { \
|
||||
if (cb) { \
|
||||
if (PyCapsule_SetContext(job_capsule, cb) == 0) { Py_INCREF(cb); } \
|
||||
else { Py_CLEAR(job_capsule); } \
|
||||
} \
|
||||
} else { \
|
||||
rs_job_free(job); Py_CLEAR(job_capsule); \
|
||||
} \
|
||||
} else { \
|
||||
Py_CLEAR(job_capsule); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
begin_create_signature(PyObject *self UNUSED, PyObject *args) {
|
||||
long long file_size = -1;
|
||||
long sl = 0;
|
||||
if (!PyArg_ParseTuple(args, "|Ll", &file_size, &sl)) return NULL;
|
||||
rs_magic_number magic_number = 0;
|
||||
size_t block_len = 0, strong_len = sl;
|
||||
#ifdef KITTY_HAS_RS_SIG_ARGS
|
||||
rs_result res = rs_sig_args(file_size, &magic_number, &block_len, &strong_len);
|
||||
if (res != RS_DONE) {
|
||||
PyErr_SetString(PyExc_ValueError, rs_strerror(res));
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
block_len = RS_DEFAULT_BLOCK_LEN;
|
||||
strong_len = 8;
|
||||
magic_number = RS_MD4_SIG_MAGIC;
|
||||
#endif
|
||||
CREATE_JOB(rs_sig_begin, NULL, block_len, strong_len, magic_number);
|
||||
return Py_BuildValue("Nnn", job_capsule, (Py_ssize_t)block_len, (Py_ssize_t)strong_len);
|
||||
}
|
||||
|
||||
#define GET_JOB_FROM_CAPSULE \
|
||||
rs_job_t *job = PyCapsule_GetPointer(job_capsule, JOB_WITH_CALLBACK_CAPSULE); \
|
||||
if (!job) { PyErr_SetString(PyExc_TypeError, "Not a job capsule"); return NULL; } \
|
||||
|
||||
|
||||
static PyObject*
|
||||
iter_job(PyObject *self UNUSED, PyObject *args) {
|
||||
FREE_BUFFER_AFTER_FUNCTION Py_buffer input_buf = {0};
|
||||
FREE_BUFFER_AFTER_FUNCTION Py_buffer output_buf = {0};
|
||||
PyObject *job_capsule, *output_array;
|
||||
if (!PyArg_ParseTuple(args, "O!y*O!", &PyCapsule_Type, &job_capsule, &input_buf, &PyByteArray_Type, &output_array)) return NULL;
|
||||
GET_JOB_FROM_CAPSULE;
|
||||
if (PyObject_GetBuffer(output_array, &output_buf, PyBUF_WRITE) != 0) return NULL;
|
||||
int eof = input_buf.len > 0 ? 0 : 1;
|
||||
rs_buffers_t buffer = {
|
||||
.avail_in=input_buf.len, .next_in=input_buf.buf, .eof_in=eof,
|
||||
.avail_out=output_buf.len, .next_out=output_buf.buf
|
||||
};
|
||||
size_t before = buffer.avail_out;
|
||||
rs_result result = rs_job_iter(job, &buffer);
|
||||
Py_ssize_t output_size = before - buffer.avail_out;
|
||||
if (result == RS_DONE || result == RS_BLOCKED) {
|
||||
Py_ssize_t unused_input = buffer.avail_in;
|
||||
return Py_BuildValue("Onn", result == RS_DONE ? Py_True : Py_False, unused_input, output_size);
|
||||
}
|
||||
PyErr_SetString(RsyncError, rs_strerror(result));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
begin_load_signature(PyObject *self UNUSED, PyObject *args UNUSED) {
|
||||
rs_signature_t *sig = NULL;
|
||||
CREATE_JOB(rs_loadsig_begin, NULL, &sig);
|
||||
if (!job_capsule) { rs_free_sumset(sig); return NULL; }
|
||||
PyObject *sc = PyCapsule_New(sig, SIGNATURE_CAPSULE, free_sig_capsule);
|
||||
if (!sc) { Py_CLEAR(job_capsule); rs_free_sumset(sig); return NULL; }
|
||||
return Py_BuildValue("NN", job_capsule, sc);
|
||||
}
|
||||
|
||||
#define GET_SIG_FROM_CAPSULE \
|
||||
rs_signature_t *sig = PyCapsule_GetPointer(sig_capsule, SIGNATURE_CAPSULE); \
|
||||
if (!sig) { PyErr_SetString(PyExc_TypeError, "Not a sig capsule"); return NULL; }
|
||||
|
||||
|
||||
static PyObject*
|
||||
build_hash_table(PyObject *self UNUSED, PyObject *args) {
|
||||
PyObject *sig_capsule;
|
||||
if (!PyArg_ParseTuple(args, "O!", &PyCapsule_Type, &sig_capsule)) return NULL;
|
||||
GET_SIG_FROM_CAPSULE;
|
||||
rs_result res = rs_build_hash_table(sig);
|
||||
if (res != RS_DONE) {
|
||||
PyErr_SetString(RsyncError, rs_strerror(res));
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
begin_create_delta(PyObject *self UNUSED, PyObject *args) {
|
||||
PyObject *sig_capsule;
|
||||
if (!PyArg_ParseTuple(args, "O!", &PyCapsule_Type, &sig_capsule)) return NULL;
|
||||
GET_SIG_FROM_CAPSULE;
|
||||
CREATE_JOB(rs_delta_begin, NULL, sig);
|
||||
return job_capsule;
|
||||
}
|
||||
|
||||
static rs_result
|
||||
copy_callback(void *opaque, rs_long_t pos, size_t *len, void **buf) {
|
||||
PyObject *callback = opaque;
|
||||
long long p = pos;
|
||||
PyObject *mem = PyMemoryView_FromMemory(*buf, *len, PyBUF_WRITE);
|
||||
if (!mem) { PyErr_Clear(); return RS_MEM_ERROR; }
|
||||
PyObject *res = PyObject_CallFunction(callback, "OL", mem, p);
|
||||
Py_DECREF(mem);
|
||||
if (res == NULL) { PyErr_Print(); return RS_IO_ERROR; }
|
||||
rs_result r = RS_DONE;
|
||||
if (PyLong_Check(res)) { *len = PyLong_AsSize_t(res); }
|
||||
else { r = RS_INTERNAL_ERROR; }
|
||||
Py_DECREF(res);
|
||||
return r;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
begin_patch(PyObject *self UNUSED, PyObject *callback) {
|
||||
if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be a callable"); return NULL; }
|
||||
CREATE_JOB(rs_patch_begin, callback, copy_callback, callback);
|
||||
return job_capsule;
|
||||
}
|
||||
|
||||
static bool
|
||||
call_ftc_callback(PyObject *callback, char *src, Py_ssize_t key_start, Py_ssize_t key_length, Py_ssize_t val_start, Py_ssize_t val_length) {
|
||||
while(src[key_start] == ';' && key_length > 0 ) { key_start++; key_length--; }
|
||||
DECREF_AFTER_FUNCTION PyObject *k = PyMemoryView_FromMemory(src + key_start, key_length, PyBUF_READ);
|
||||
if (!k) return false;
|
||||
DECREF_AFTER_FUNCTION PyObject *v = PyMemoryView_FromMemory(src + val_start, val_length, PyBUF_READ);
|
||||
if (!v) return false;
|
||||
DECREF_AFTER_FUNCTION PyObject *ret = PyObject_CallFunctionObjArgs(callback, k, v, NULL);
|
||||
return ret != NULL;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
decode_utf8_buffer(PyObject *self UNUSED, PyObject *args) {
|
||||
FREE_BUFFER_AFTER_FUNCTION Py_buffer buf = {0};
|
||||
if (!PyArg_ParseTuple(args, "s*", &buf)) return NULL;
|
||||
return PyUnicode_FromStringAndSize(buf.buf, buf.len);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
parse_ftc(PyObject *self UNUSED, PyObject *args) {
|
||||
FREE_BUFFER_AFTER_FUNCTION Py_buffer buf = {0};
|
||||
PyObject *callback;
|
||||
size_t i = 0, key_start = 0, key_length = 0, val_start = 0, val_length = 0;
|
||||
if (!PyArg_ParseTuple(args, "s*O", &buf, &callback)) return NULL;
|
||||
char *src = buf.buf;
|
||||
size_t sz = buf.len;
|
||||
if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be callable"); return NULL; }
|
||||
for (i = 0; i < sz; i++) {
|
||||
char ch = src[i];
|
||||
if (key_length == 0) {
|
||||
if (ch == '=') {
|
||||
key_length = i - key_start;
|
||||
val_start = i + 1;
|
||||
}
|
||||
} else {
|
||||
if (ch == ';') {
|
||||
val_length = i - val_start;
|
||||
if (!call_ftc_callback(callback, src, key_start, key_length, val_start, val_length)) return NULL;
|
||||
key_length = 0; key_start = i + 1; val_start = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (key_length && val_start) {
|
||||
val_length = sz - val_start;
|
||||
if (!call_ftc_callback(callback, src, key_start, key_length, val_start, val_length)) return NULL;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyMethodDef module_methods[] = {
|
||||
{"begin_create_signature", begin_create_signature, METH_VARARGS, ""},
|
||||
{"begin_load_signature", begin_load_signature, METH_NOARGS, ""},
|
||||
{"build_hash_table", build_hash_table, METH_VARARGS, ""},
|
||||
{"begin_patch", begin_patch, METH_O, ""},
|
||||
{"begin_create_delta", begin_create_delta, METH_VARARGS, ""},
|
||||
{"iter_job", iter_job, METH_VARARGS, ""},
|
||||
{"parse_ftc", parse_ftc, METH_VARARGS, ""},
|
||||
{"decode_utf8_buffer", decode_utf8_buffer, METH_VARARGS, ""},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static int
|
||||
exec_module(PyObject *m) {
|
||||
RsyncError = PyErr_NewException("rsync.RsyncError", NULL, NULL);
|
||||
if (RsyncError == NULL) return -1;
|
||||
PyModule_AddObject(m, "RsyncError", RsyncError);
|
||||
|
||||
|
||||
PyModule_AddIntMacro(m, IO_BUFFER_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
IGNORE_PEDANTIC_WARNINGS
|
||||
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
|
||||
END_IGNORE_PEDANTIC_WARNINGS
|
||||
|
||||
static struct PyModuleDef module = {
|
||||
.m_base = PyModuleDef_HEAD_INIT,
|
||||
.m_name = "rsync", /* name of module */
|
||||
.m_doc = NULL,
|
||||
.m_slots = slots,
|
||||
.m_methods = module_methods
|
||||
};
|
||||
|
||||
EXPORTED PyMODINIT_FUNC
|
||||
PyInit_rsync(void) {
|
||||
return PyModuleDef_Init(&module);
|
||||
}
|
@ -1,47 +1,3 @@
|
||||
from typing import Callable, Tuple, Union
|
||||
|
||||
IO_BUFFER_SIZE: int
|
||||
|
||||
|
||||
class JobCapsule:
|
||||
pass
|
||||
|
||||
|
||||
class SignatureCapsule:
|
||||
pass
|
||||
|
||||
|
||||
class RsyncError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def begin_create_signature(file_size: int = -1, strong_len: int = 0) -> Tuple[JobCapsule, int, int]:
|
||||
pass
|
||||
|
||||
|
||||
def begin_load_signature() -> Tuple[JobCapsule, SignatureCapsule]:
|
||||
pass
|
||||
|
||||
|
||||
def build_hash_table(sig: SignatureCapsule) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def begin_create_delta(sig: SignatureCapsule) -> JobCapsule:
|
||||
pass
|
||||
|
||||
|
||||
def begin_patch(callback: Callable[[memoryview, int], int]) -> JobCapsule:
|
||||
pass
|
||||
|
||||
|
||||
def iter_job(job_capsule: JobCapsule, input_data: bytes, output_buf: bytearray) -> Tuple[bool, int, int]:
|
||||
pass
|
||||
|
||||
|
||||
def parse_ftc(src: Union[str, bytes, memoryview], callback: Callable[[memoryview, memoryview], None]) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def decode_utf8_buffer(src: Union[str, bytes, memoryview]) -> str:
|
||||
pass
|
||||
|
24
setup.py
24
setup.py
@ -299,24 +299,6 @@ def set_arches(flags: List[str], arches: Iterable[str] = ('x86_64', 'arm64')) ->
|
||||
flags.extend(('-arch', arch))
|
||||
|
||||
|
||||
def detect_librsync(cc: List[str], cflags: List[str], ldflags: List[str]) -> str:
|
||||
if not test_compile(
|
||||
cc, *cflags, libraries=('rsync',), ldflags=ldflags, show_stderr=True,
|
||||
src='#include <librsync.h>\nint main(void) { rs_strerror(0); return 0; }'):
|
||||
raise SystemExit('The librsync library is required')
|
||||
# check for rs_sig_args() which was added to librsync in Apr 2020 version 2.3.0
|
||||
if test_compile(cc, *cflags, libraries=('rsync',), ldflags=ldflags, src='''
|
||||
#include <librsync.h>
|
||||
int main(void) {
|
||||
rs_magic_number magic_number = 0;
|
||||
size_t block_len = 0, strong_len = 0;
|
||||
rs_sig_args(1024, &magic_number, &block_len, &strong_len);
|
||||
return 0;
|
||||
}'''):
|
||||
return '-DKITTY_HAS_RS_SIG_ARGS'
|
||||
return ''
|
||||
|
||||
|
||||
def is_gcc(cc: Iterable[str]) -> bool:
|
||||
|
||||
@lru_cache()
|
||||
@ -434,10 +416,6 @@ def add_lpath(which: str, name: str, val: Optional[str]) -> None:
|
||||
cflags.insert(0, f'-I{os.environ["DEVELOP_ROOT"]}/include')
|
||||
ldpaths.insert(0, f'-L{os.environ["DEVELOP_ROOT"]}/lib')
|
||||
|
||||
rs_cflag = detect_librsync(cc, cflags, ldflags + ldpaths)
|
||||
if rs_cflag:
|
||||
cflags.append(rs_cflag)
|
||||
|
||||
if build_universal_binary:
|
||||
set_arches(cflags)
|
||||
set_arches(ldflags)
|
||||
@ -830,7 +808,7 @@ def files(
|
||||
return kitten, sources, headers, f'kittens/{kitten}/{output}', includes, libraries
|
||||
|
||||
for kitten, sources, all_headers, dest, includes, libraries in (
|
||||
files('transfer', 'rsync', libraries=('rsync',)),
|
||||
files('transfer', 'rsync', libraries=('xxhash',)),
|
||||
):
|
||||
final_env = kenv.copy()
|
||||
final_env.cflags.extend(f'-I{x}' for x in includes)
|
||||
|
@ -226,7 +226,7 @@ type signature_iterator struct {
|
||||
index uint64
|
||||
}
|
||||
|
||||
// ans is valid only iff err == nil
|
||||
// ans is valid iff err == nil
|
||||
func (self *signature_iterator) next() (ans BlockHash, err error) {
|
||||
n, err := io.ReadAtLeast(self.src, self.buffer, cap(self.buffer))
|
||||
switch err {
|
||||
|
Loading…
Reference in New Issue
Block a user