py3: fix flat dirstate parsing/packing

Summary: Updates the C code to support unicode filenames and states.

Reviewed By: simpkins

Differential Revision: D19786275

fbshipit-source-id: e7aeb029b792818b1b1a9c5d3028640b56522235
This commit is contained in:
Durham Goode 2020-02-26 12:45:42 -08:00 committed by Facebook Github Bot
parent febb8abb2b
commit 430f047eda
3 changed files with 73 additions and 4 deletions

View File

@ -147,8 +147,13 @@ dirstate_tuple_new(PyTypeObject* subtype, PyObject* args, PyObject* kwds) {
dirstateTupleObject* t; dirstateTupleObject* t;
char state; char state;
int size, mode, mtime; int size, mode, mtime;
#ifdef IS_PY3K
if (!PyArg_ParseTuple(args, "Ciii", &state, &mode, &size, &mtime))
return NULL;
#else
if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
return NULL; return NULL;
#endif
t = (dirstateTupleObject*)subtype->tp_alloc(subtype, 1); t = (dirstateTupleObject*)subtype->tp_alloc(subtype, 1);
if (!t) if (!t)
@ -173,7 +178,11 @@ static PyObject* dirstate_tuple_item(PyObject* o, Py_ssize_t i) {
dirstateTupleObject* t = (dirstateTupleObject*)o; dirstateTupleObject* t = (dirstateTupleObject*)o;
switch (i) { switch (i) {
case 0: case 0:
#ifdef IS_PY3K
return PyUnicode_FromStringAndSize(&t->state, 1);
#else
return PyBytes_FromStringAndSize(&t->state, 1); return PyBytes_FromStringAndSize(&t->state, 1);
#endif
case 1: case 1:
return PyInt_FromLong(t->mode); return PyInt_FromLong(t->mode);
case 2: case 2:
@ -265,8 +274,14 @@ static PyObject* parse_dirstate(PyObject* self, PyObject* args) {
goto quit; goto quit;
} }
#ifdef IS_PY3K
parents =
Py_BuildValue("y#y#", str, (Py_ssize_t)20, str + 20, (Py_ssize_t)20);
#else
parents = parents =
Py_BuildValue("s#s#", str, (Py_ssize_t)20, str + 20, (Py_ssize_t)20); Py_BuildValue("s#s#", str, (Py_ssize_t)20, str + 20, (Py_ssize_t)20);
#endif
if (!parents) if (!parents)
goto quit; goto quit;
@ -293,14 +308,23 @@ static PyObject* parse_dirstate(PyObject* self, PyObject* args) {
entry = (PyObject*)make_dirstate_tuple(state, mode, size, mtime); entry = (PyObject*)make_dirstate_tuple(state, mode, size, mtime);
cpos = memchr(cur, 0, flen); cpos = memchr(cur, 0, flen);
if (cpos) { if (cpos) {
#ifdef IS_PY3K
fname = PyUnicode_FromStringAndSize(cur, cpos - cur);
cname = PyUnicode_FromStringAndSize(cpos + 1, flen - (cpos - cur) - 1);
#else
fname = PyBytes_FromStringAndSize(cur, cpos - cur); fname = PyBytes_FromStringAndSize(cur, cpos - cur);
cname = PyBytes_FromStringAndSize(cpos + 1, flen - (cpos - cur) - 1); cname = PyBytes_FromStringAndSize(cpos + 1, flen - (cpos - cur) - 1);
#endif
if (!fname || !cname || PyDict_SetItem(cmap, fname, cname) == -1 || if (!fname || !cname || PyDict_SetItem(cmap, fname, cname) == -1 ||
PyDict_SetItem(dmap, fname, entry) == -1) PyDict_SetItem(dmap, fname, entry) == -1)
goto quit; goto quit;
Py_DECREF(cname); Py_DECREF(cname);
} else { } else {
#ifdef IS_PY3K
fname = PyUnicode_FromStringAndSize(cur, flen);
#else
fname = PyBytes_FromStringAndSize(cur, flen); fname = PyBytes_FromStringAndSize(cur, flen);
#endif
if (!fname || PyDict_SetItem(dmap, fname, entry) == -1) if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
goto quit; goto quit;
} }
@ -382,6 +406,10 @@ static PyObject* pack_dirstate(PyObject* self, PyObject* args) {
Py_ssize_t nbytes, pos, l; Py_ssize_t nbytes, pos, l;
PyObject *k, *v = NULL, *pn; PyObject *k, *v = NULL, *pn;
char *p, *s; char *p, *s;
#ifdef IS_PY3K
const char *utf8c = NULL;
const char *utf8k = NULL;
#endif
int now; int now;
if (!PyArg_ParseTuple( if (!PyArg_ParseTuple(
@ -403,6 +431,33 @@ static PyObject* pack_dirstate(PyObject* self, PyObject* args) {
/* Figure out how much we need to allocate. */ /* Figure out how much we need to allocate. */
for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) { for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
PyObject* c; PyObject* c;
#ifdef IS_PY3K
if (!PyUnicode_Check(k)) {
PyErr_SetString(PyExc_TypeError, "expected string key");
goto bail;
}
Py_ssize_t utf8k_size = 0;
utf8k = PyUnicode_AsUTF8AndSize(k, &utf8k_size);
if (!utf8k) {
goto bail;
}
nbytes += utf8k_size + 17;
c = PyDict_GetItem(copymap, k);
if (c) {
if (!PyUnicode_Check(c)) {
PyErr_SetString(PyExc_TypeError, "expected string key");
goto bail;
}
Py_ssize_t utf8c_size = 0;
utf8c = PyUnicode_AsUTF8AndSize(c, &utf8c_size);
if (!utf8c) {
goto bail;
}
nbytes += utf8c_size + 1;
}
#else
if (!PyBytes_Check(k)) { if (!PyBytes_Check(k)) {
PyErr_SetString(PyExc_TypeError, "expected string key"); PyErr_SetString(PyExc_TypeError, "expected string key");
goto bail; goto bail;
@ -416,6 +471,7 @@ static PyObject* pack_dirstate(PyObject* self, PyObject* args) {
} }
nbytes += PyBytes_GET_SIZE(c) + 1; nbytes += PyBytes_GET_SIZE(c) + 1;
} }
#endif
} }
packobj = PyBytes_FromStringAndSize(NULL, nbytes); packobj = PyBytes_FromStringAndSize(NULL, nbytes);
@ -475,14 +531,30 @@ static PyObject* pack_dirstate(PyObject* self, PyObject* args) {
putbe32((uint32_t)mtime, p + 8); putbe32((uint32_t)mtime, p + 8);
t = p + 12; t = p + 12;
p += 16; p += 16;
#ifdef IS_PY3K
utf8k = PyUnicode_AsUTF8AndSize(k, &len);
if (!utf8k) {
goto bail;
}
memcpy(p, utf8k, len);
#else
len = PyBytes_GET_SIZE(k); len = PyBytes_GET_SIZE(k);
memcpy(p, PyBytes_AS_STRING(k), len); memcpy(p, PyBytes_AS_STRING(k), len);
#endif
p += len; p += len;
o = PyDict_GetItem(copymap, k); o = PyDict_GetItem(copymap, k);
if (o) { if (o) {
*p++ = '\0'; *p++ = '\0';
#ifdef IS_PY3K
utf8c = PyUnicode_AsUTF8AndSize(o, &l1);
if (!utf8c) {
goto bail;
}
memcpy(p, utf8c, l1);
#else
l1 = PyBytes_GET_SIZE(o); l1 = PyBytes_GET_SIZE(o);
memcpy(p, PyBytes_AS_STRING(o), l1); memcpy(p, PyBytes_AS_STRING(o), l1);
#endif
p += l1; p += l1;
len += l1 + 1; len += l1 + 1;
} }

View File

@ -1515,7 +1515,7 @@ class dirstatemap(object):
# This heuristic is imperfect in many ways, so in a future dirstate # This heuristic is imperfect in many ways, so in a future dirstate
# format update it makes sense to just record the number of entries # format update it makes sense to just record the number of entries
# on write. # on write.
self._map = parsers.dict_new_presized(len(st) / 71) self._map = parsers.dict_new_presized(len(st) // 71)
# Python's garbage collector triggers a GC each time a certain number # Python's garbage collector triggers a GC each time a certain number
# of container objects (the number being defined by # of container objects (the number being defined by

View File

@ -8,9 +8,6 @@ from __future__ import absolute_import
from testutil.dott import feature, sh, testtmp # noqa: F401 from testutil.dott import feature, sh, testtmp # noqa: F401
feature.require(["py2"])
feature.require(["symlink"]) feature.require(["symlink"])
for testcase in ["v0", "v1", "v2"]: for testcase in ["v0", "v1", "v2"]: