2008-03-26 20:12:10 +03:00
|
|
|
/*
|
|
|
|
parsers.c - efficient content parsing
|
|
|
|
|
|
|
|
Copyright 2008 Matt Mackall <mpm@selenic.com> and others
|
|
|
|
|
|
|
|
This software may be used and distributed according to the terms of
|
|
|
|
the GNU General Public License, incorporated herein by reference.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <Python.h>
|
|
|
|
#include <ctype.h>
|
2012-08-14 01:04:52 +04:00
|
|
|
#include <stddef.h>
|
2008-03-26 20:12:10 +03:00
|
|
|
#include <string.h>
|
|
|
|
|
2018-01-26 07:45:12 +03:00
|
|
|
#include "mercurial/bitmanipulation.h"
|
|
|
|
#include "mercurial/cext/charencode.h"
|
|
|
|
#include "mercurial/cext/util.h"
|
2010-06-16 02:49:56 +04:00
|
|
|
|
2016-10-09 14:50:53 +03:00
|
|
|
#ifdef IS_PY3K
|
|
|
|
/* The mapping of Python types is meant to be temporary to get Python
|
|
|
|
* 3 to compile. We should remove this once Python 3 support is fully
|
|
|
|
* supported and proper types are used in the extensions themselves. */
|
2016-10-13 14:22:40 +03:00
|
|
|
#define PyInt_Check PyLong_Check
|
2016-10-09 14:50:53 +03:00
|
|
|
#define PyInt_FromLong PyLong_FromLong
|
2016-10-13 14:22:40 +03:00
|
|
|
#define PyInt_FromSsize_t PyLong_FromSsize_t
|
2016-10-09 14:50:53 +03:00
|
|
|
#define PyInt_AsLong PyLong_AsLong
|
|
|
|
#endif
|
|
|
|
|
2017-05-21 07:41:01 +03:00
|
|
|
static const char *const versionerrortext = "Python minor version mismatch";
|
parsers: fail fast if Python has wrong minor version (issue4110)
This change causes an informative ImportError to be raised when importing
the parsers extension module if the minor version of the currently-running
Python interpreter doesn't match that of the Python used when compiling
the extension module.
This change also exposes a parsers.versionerrortext constant in the
C implementation of the module. Its presence can be used to determine
whether this behavior is present in a version of the module. The value
of the constant is the leading text of the ImportError raised and is set
to "Python minor version mismatch".
Here is an example of what the new error looks like:
Traceback (most recent call last):
File "test.py", line 1, in <module>
import mercurial.parsers
ImportError: Python minor version mismatch: The Mercurial extension
modules were compiled with Python 2.7.6, but Mercurial is currently using
Python with sys.hexversion=33883888: Python 2.5.6
(r256:88840, Nov 18 2012, 05:37:10)
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))]
at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/
Python.app/Contents/MacOS/Python
The reason for raising an error in this scenario is that Python's C API
is known not to be compatible from minor version to minor version, even
if sys.api_version is the same. See for example this Python bug report
about incompatibilities between 2.5 and 2.6+:
http://bugs.python.org/issue8118
These incompatibilities can cause Mercurial to break in mysterious,
unforeseen ways. For example, when Mercurial compiled with Python 2.7 was
run with 2.5, the following crash occurred when running "hg status":
http://bz.selenic.com/show_bug.cgi?id=4110
After this crash was fixed, running with Python 2.5 no longer crashes, but
the following puzzling behavior still occurs:
$ hg status
...
File ".../mercurial/changelog.py", line 123, in __init__
revlog.revlog.__init__(self, opener, "00changelog.i")
File ".../mercurial/revlog.py", line 251, in __init__
d = self._io.parseindex(i, self._inline)
File ".../mercurial/revlog.py", line 158, in parseindex
index, cache = parsers.parse_index2(data, inline)
TypeError: data is not a string
which can be reproduced more simply with:
import mercurial.parsers as parsers
parsers.parse_index2("", True)
Both the crash and the TypeError occurred because the Python C API's
PyString_Check() returns the wrong value when the C header files from
Python 2.7 are run with Python 2.5. This is an example of an
incompatibility of the sort mentioned in the Python bug report above.
Failing fast with an informative error message results in a better user
experience in cases like the above. The information in the ImportError
also simplifies troubleshooting for those on Mercurial mailing lists, the
bug tracker, etc.
This patch only adds the version check to parsers.c, which is sufficient
to affect command-line commands like "hg status" and "hg summary".
An idea for a future improvement is to move the version-checking C code
to a more central location, and have it run when importing all
Mercurial extension modules and not just parsers.c.
2013-12-05 08:38:27 +04:00
|
|
|
|
2015-06-16 08:41:30 +03:00
|
|
|
static PyObject *dict_new_presized(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
Py_ssize_t expected_size;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return _dict_new_presized(expected_size);
|
|
|
|
}
|
|
|
|
|
2008-03-26 20:12:10 +03:00
|
|
|
/*
|
|
|
|
* This code assumes that a manifest is stitched together with newline
|
|
|
|
* ('\n') characters.
|
|
|
|
*/
|
|
|
|
static PyObject *parse_manifest(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *mfdict, *fdict;
|
2013-09-07 10:47:59 +04:00
|
|
|
char *str, *start, *end;
|
2008-03-26 20:12:10 +03:00
|
|
|
int len;
|
|
|
|
|
2017-10-16 21:53:57 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", &PyDict_Type,
|
|
|
|
&mfdict, &PyDict_Type, &fdict, &str, &len))
|
2008-03-26 20:12:10 +03:00
|
|
|
goto quit;
|
|
|
|
|
2013-09-07 10:47:59 +04:00
|
|
|
start = str;
|
|
|
|
end = str + len;
|
|
|
|
while (start < end) {
|
2008-03-26 20:12:10 +03:00
|
|
|
PyObject *file = NULL, *node = NULL;
|
|
|
|
PyObject *flags = NULL;
|
2013-09-07 10:47:59 +04:00
|
|
|
char *zero = NULL, *newline = NULL;
|
2012-08-14 01:04:52 +04:00
|
|
|
ptrdiff_t nlen;
|
2008-03-26 20:12:10 +03:00
|
|
|
|
2013-09-07 10:47:59 +04:00
|
|
|
zero = memchr(start, '\0', end - start);
|
2008-03-26 20:12:10 +03:00
|
|
|
if (!zero) {
|
|
|
|
PyErr_SetString(PyExc_ValueError,
|
2017-10-16 21:53:57 +03:00
|
|
|
"manifest entry has no separator");
|
2008-03-26 20:12:10 +03:00
|
|
|
goto quit;
|
|
|
|
}
|
|
|
|
|
2013-09-07 10:47:59 +04:00
|
|
|
newline = memchr(zero + 1, '\n', end - (zero + 1));
|
|
|
|
if (!newline) {
|
|
|
|
PyErr_SetString(PyExc_ValueError,
|
2017-10-16 21:53:57 +03:00
|
|
|
"manifest contains trailing garbage");
|
2013-09-07 10:47:59 +04:00
|
|
|
goto quit;
|
|
|
|
}
|
|
|
|
|
2010-06-16 02:49:56 +04:00
|
|
|
file = PyBytes_FromStringAndSize(start, zero - start);
|
|
|
|
|
2008-03-26 20:12:10 +03:00
|
|
|
if (!file)
|
|
|
|
goto bail;
|
|
|
|
|
2013-09-07 10:47:59 +04:00
|
|
|
nlen = newline - zero - 1;
|
2008-03-26 20:12:10 +03:00
|
|
|
|
2017-07-31 16:58:06 +03:00
|
|
|
node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
|
2008-03-26 20:12:10 +03:00
|
|
|
if (!node)
|
|
|
|
goto bail;
|
|
|
|
|
|
|
|
if (nlen > 40) {
|
2017-10-16 21:53:57 +03:00
|
|
|
flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
|
2008-03-26 20:12:10 +03:00
|
|
|
if (!flags)
|
|
|
|
goto bail;
|
|
|
|
|
|
|
|
if (PyDict_SetItem(fdict, file, flags) == -1)
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PyDict_SetItem(mfdict, file, node) == -1)
|
|
|
|
goto bail;
|
|
|
|
|
2013-09-07 10:47:59 +04:00
|
|
|
start = newline + 1;
|
2008-03-26 20:12:10 +03:00
|
|
|
|
|
|
|
Py_XDECREF(flags);
|
|
|
|
Py_XDECREF(node);
|
|
|
|
Py_XDECREF(file);
|
|
|
|
continue;
|
|
|
|
bail:
|
|
|
|
Py_XDECREF(flags);
|
|
|
|
Py_XDECREF(node);
|
|
|
|
Py_XDECREF(file);
|
|
|
|
goto quit;
|
|
|
|
}
|
|
|
|
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
return Py_None;
|
|
|
|
quit:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
|
2017-10-16 21:53:57 +03:00
|
|
|
int size, int mtime)
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
{
|
2017-10-16 21:53:57 +03:00
|
|
|
dirstateTupleObject *t =
|
|
|
|
PyObject_New(dirstateTupleObject, &dirstateTupleType);
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
if (!t)
|
|
|
|
return NULL;
|
|
|
|
t->state = state;
|
|
|
|
t->mode = mode;
|
|
|
|
t->size = size;
|
|
|
|
t->mtime = mtime;
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
|
2017-10-16 21:53:57 +03:00
|
|
|
PyObject *kwds)
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
{
|
|
|
|
/* We do all the initialization here and not a tp_init function because
|
|
|
|
* dirstate_tuple is immutable. */
|
|
|
|
dirstateTupleObject *t;
|
|
|
|
char state;
|
|
|
|
int size, mode, mtime;
|
|
|
|
if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
|
|
|
|
if (!t)
|
|
|
|
return NULL;
|
|
|
|
t->state = state;
|
|
|
|
t->mode = mode;
|
|
|
|
t->size = size;
|
|
|
|
t->mtime = mtime;
|
|
|
|
|
|
|
|
return (PyObject *)t;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dirstate_tuple_dealloc(PyObject *o)
|
|
|
|
{
|
|
|
|
PyObject_Del(o);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Py_ssize_t dirstate_tuple_length(PyObject *o)
|
|
|
|
{
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
|
|
|
|
{
|
|
|
|
dirstateTupleObject *t = (dirstateTupleObject *)o;
|
|
|
|
switch (i) {
|
|
|
|
case 0:
|
|
|
|
return PyBytes_FromStringAndSize(&t->state, 1);
|
|
|
|
case 1:
|
|
|
|
return PyInt_FromLong(t->mode);
|
|
|
|
case 2:
|
|
|
|
return PyInt_FromLong(t->size);
|
|
|
|
case 3:
|
|
|
|
return PyInt_FromLong(t->mtime);
|
|
|
|
default:
|
|
|
|
PyErr_SetString(PyExc_IndexError, "index out of range");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static PySequenceMethods dirstate_tuple_sq = {
|
2017-10-16 21:53:57 +03:00
|
|
|
dirstate_tuple_length, /* sq_length */
|
|
|
|
0, /* sq_concat */
|
|
|
|
0, /* sq_repeat */
|
|
|
|
dirstate_tuple_item, /* sq_item */
|
|
|
|
0, /* sq_ass_item */
|
|
|
|
0, /* sq_contains */
|
|
|
|
0, /* sq_inplace_concat */
|
|
|
|
0 /* sq_inplace_repeat */
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
PyTypeObject dirstateTupleType = {
|
2017-10-16 21:53:57 +03:00
|
|
|
PyVarObject_HEAD_INIT(NULL, 0) /* header */
|
|
|
|
"dirstate_tuple", /* tp_name */
|
|
|
|
sizeof(dirstateTupleObject), /* tp_basicsize */
|
|
|
|
0, /* tp_itemsize */
|
|
|
|
(destructor)dirstate_tuple_dealloc, /* tp_dealloc */
|
|
|
|
0, /* tp_print */
|
|
|
|
0, /* tp_getattr */
|
|
|
|
0, /* tp_setattr */
|
|
|
|
0, /* tp_compare */
|
|
|
|
0, /* tp_repr */
|
|
|
|
0, /* tp_as_number */
|
|
|
|
&dirstate_tuple_sq, /* tp_as_sequence */
|
|
|
|
0, /* tp_as_mapping */
|
|
|
|
0, /* tp_hash */
|
|
|
|
0, /* tp_call */
|
|
|
|
0, /* tp_str */
|
|
|
|
0, /* tp_getattro */
|
|
|
|
0, /* tp_setattro */
|
|
|
|
0, /* tp_as_buffer */
|
|
|
|
Py_TPFLAGS_DEFAULT, /* tp_flags */
|
|
|
|
"dirstate tuple", /* tp_doc */
|
|
|
|
0, /* tp_traverse */
|
|
|
|
0, /* tp_clear */
|
|
|
|
0, /* tp_richcompare */
|
|
|
|
0, /* tp_weaklistoffset */
|
|
|
|
0, /* tp_iter */
|
|
|
|
0, /* tp_iternext */
|
|
|
|
0, /* tp_methods */
|
|
|
|
0, /* tp_members */
|
|
|
|
0, /* tp_getset */
|
|
|
|
0, /* tp_base */
|
|
|
|
0, /* tp_dict */
|
|
|
|
0, /* tp_descr_get */
|
|
|
|
0, /* tp_descr_set */
|
|
|
|
0, /* tp_dictoffset */
|
|
|
|
0, /* tp_init */
|
|
|
|
0, /* tp_alloc */
|
|
|
|
dirstate_tuple_new, /* tp_new */
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
};
|
|
|
|
|
2008-10-13 00:21:08 +04:00
|
|
|
static PyObject *parse_dirstate(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
|
|
|
|
PyObject *fname = NULL, *cname = NULL, *entry = NULL;
|
2013-12-12 04:33:42 +04:00
|
|
|
char state, *cur, *str, *cpos;
|
2013-09-16 23:10:28 +04:00
|
|
|
int mode, size, mtime;
|
2014-09-08 22:57:44 +04:00
|
|
|
unsigned int flen, len, pos = 40;
|
|
|
|
int readlen;
|
2008-10-13 00:21:08 +04:00
|
|
|
|
2017-10-16 21:53:57 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", &PyDict_Type,
|
|
|
|
&dmap, &PyDict_Type, &cmap, &str, &readlen))
|
2014-09-08 22:57:44 +04:00
|
|
|
goto quit;
|
|
|
|
|
|
|
|
len = readlen;
|
|
|
|
|
2008-10-13 00:21:08 +04:00
|
|
|
/* read parents */
|
2015-08-18 23:40:10 +03:00
|
|
|
if (len < 40) {
|
2017-10-16 21:53:57 +03:00
|
|
|
PyErr_SetString(PyExc_ValueError,
|
|
|
|
"too little data for parents");
|
2008-10-13 00:21:08 +04:00
|
|
|
goto quit;
|
2015-08-18 23:40:10 +03:00
|
|
|
}
|
2008-10-13 00:21:08 +04:00
|
|
|
|
|
|
|
parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
|
|
|
|
if (!parents)
|
|
|
|
goto quit;
|
|
|
|
|
|
|
|
/* read filenames */
|
2013-12-12 04:33:42 +04:00
|
|
|
while (pos >= 40 && pos < len) {
|
2015-12-02 17:04:58 +03:00
|
|
|
if (pos + 17 > len) {
|
|
|
|
PyErr_SetString(PyExc_ValueError,
|
2017-10-16 21:53:57 +03:00
|
|
|
"overflow in dirstate");
|
2015-12-02 17:04:58 +03:00
|
|
|
goto quit;
|
|
|
|
}
|
2013-12-12 04:33:42 +04:00
|
|
|
cur = str + pos;
|
2008-10-13 00:21:08 +04:00
|
|
|
/* unpack header */
|
|
|
|
state = *cur;
|
2012-04-16 20:26:00 +04:00
|
|
|
mode = getbe32(cur + 1);
|
|
|
|
size = getbe32(cur + 5);
|
|
|
|
mtime = getbe32(cur + 9);
|
|
|
|
flen = getbe32(cur + 13);
|
2013-12-12 04:33:42 +04:00
|
|
|
pos += 17;
|
2008-10-13 00:21:08 +04:00
|
|
|
cur += 17;
|
2014-01-23 22:08:26 +04:00
|
|
|
if (flen > len - pos) {
|
2017-10-16 21:53:57 +03:00
|
|
|
PyErr_SetString(PyExc_ValueError,
|
|
|
|
"overflow in dirstate");
|
2008-10-13 00:21:08 +04:00
|
|
|
goto quit;
|
2008-10-19 22:16:37 +04:00
|
|
|
}
|
2008-10-13 00:21:08 +04:00
|
|
|
|
2017-10-16 21:53:57 +03:00
|
|
|
entry =
|
|
|
|
(PyObject *)make_dirstate_tuple(state, mode, size, mtime);
|
2008-10-13 00:21:08 +04:00
|
|
|
cpos = memchr(cur, 0, flen);
|
|
|
|
if (cpos) {
|
2010-06-16 02:49:56 +04:00
|
|
|
fname = PyBytes_FromStringAndSize(cur, cpos - cur);
|
2017-10-16 21:53:57 +03:00
|
|
|
cname = PyBytes_FromStringAndSize(
|
|
|
|
cpos + 1, flen - (cpos - cur) - 1);
|
2008-10-13 00:21:08 +04:00
|
|
|
if (!fname || !cname ||
|
|
|
|
PyDict_SetItem(cmap, fname, cname) == -1 ||
|
|
|
|
PyDict_SetItem(dmap, fname, entry) == -1)
|
|
|
|
goto quit;
|
|
|
|
Py_DECREF(cname);
|
|
|
|
} else {
|
2010-06-16 02:49:56 +04:00
|
|
|
fname = PyBytes_FromStringAndSize(cur, flen);
|
2017-10-16 21:53:57 +03:00
|
|
|
if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
|
2008-10-13 00:21:08 +04:00
|
|
|
goto quit;
|
|
|
|
}
|
|
|
|
Py_DECREF(fname);
|
|
|
|
Py_DECREF(entry);
|
|
|
|
fname = cname = entry = NULL;
|
2013-12-12 04:33:42 +04:00
|
|
|
pos += flen;
|
2008-10-13 00:21:08 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = parents;
|
|
|
|
Py_INCREF(ret);
|
|
|
|
quit:
|
|
|
|
Py_XDECREF(fname);
|
|
|
|
Py_XDECREF(cname);
|
|
|
|
Py_XDECREF(entry);
|
|
|
|
Py_XDECREF(parents);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-12-22 03:27:16 +03:00
|
|
|
/*
|
2017-03-09 04:35:20 +03:00
|
|
|
* Build a set of non-normal and other parent entries from the dirstate dmap
|
2017-10-16 21:53:57 +03:00
|
|
|
*/
|
2017-10-02 21:28:41 +03:00
|
|
|
static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
|
|
|
|
{
|
2017-03-09 04:35:20 +03:00
|
|
|
PyObject *dmap, *fname, *v;
|
|
|
|
PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
|
2015-12-22 03:27:16 +03:00
|
|
|
Py_ssize_t pos;
|
|
|
|
|
2017-10-16 21:53:57 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
|
2015-12-22 03:27:16 +03:00
|
|
|
goto bail;
|
|
|
|
|
|
|
|
nonnset = PySet_New(NULL);
|
|
|
|
if (nonnset == NULL)
|
|
|
|
goto bail;
|
|
|
|
|
2017-03-09 04:35:20 +03:00
|
|
|
otherpset = PySet_New(NULL);
|
|
|
|
if (otherpset == NULL)
|
|
|
|
goto bail;
|
|
|
|
|
2015-12-22 03:27:16 +03:00
|
|
|
pos = 0;
|
|
|
|
while (PyDict_Next(dmap, &pos, &fname, &v)) {
|
|
|
|
dirstateTupleObject *t;
|
|
|
|
if (!dirstate_tuple_check(v)) {
|
|
|
|
PyErr_SetString(PyExc_TypeError,
|
2017-10-16 21:53:57 +03:00
|
|
|
"expected a dirstate tuple");
|
2015-12-22 03:27:16 +03:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
t = (dirstateTupleObject *)v;
|
|
|
|
|
2017-03-09 04:35:20 +03:00
|
|
|
if (t->state == 'n' && t->size == -2) {
|
|
|
|
if (PySet_Add(otherpset, fname) == -1) {
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-22 03:27:16 +03:00
|
|
|
if (t->state == 'n' && t->mtime != -1)
|
|
|
|
continue;
|
|
|
|
if (PySet_Add(nonnset, fname) == -1)
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
2017-03-09 04:35:20 +03:00
|
|
|
result = Py_BuildValue("(OO)", nonnset, otherpset);
|
|
|
|
if (result == NULL)
|
|
|
|
goto bail;
|
2017-03-11 00:53:00 +03:00
|
|
|
Py_DECREF(nonnset);
|
|
|
|
Py_DECREF(otherpset);
|
2017-03-09 04:35:20 +03:00
|
|
|
return result;
|
2015-12-22 03:27:16 +03:00
|
|
|
bail:
|
|
|
|
Py_XDECREF(nonnset);
|
2017-03-09 04:35:20 +03:00
|
|
|
Py_XDECREF(otherpset);
|
|
|
|
Py_XDECREF(result);
|
2015-12-22 03:27:16 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2012-05-30 23:55:33 +04:00
|
|
|
/*
|
|
|
|
* Efficiently pack a dirstate object into its on-disk format.
|
|
|
|
*/
|
|
|
|
static PyObject *pack_dirstate(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *packobj = NULL;
|
2014-05-28 02:17:38 +04:00
|
|
|
PyObject *map, *copymap, *pl, *mtime_unset = NULL;
|
2012-05-30 23:55:33 +04:00
|
|
|
Py_ssize_t nbytes, pos, l;
|
2015-01-23 23:48:18 +03:00
|
|
|
PyObject *k, *v = NULL, *pn;
|
2012-05-30 23:55:33 +04:00
|
|
|
char *p, *s;
|
parsers: make pack_dirstate take now in integer for consistency
On recent OS, 'stat.st_mtime' has a double precision floating point
value to represent nano seconds, but it is not wide enough for actual
file timestamp: nowadays, only 52 - 32 = 20 bit width is available for
decimal places in sec.
Therefore, casting it to 'int' may cause unexpected result. See also
changeset 8102a3981272 fixing issue4836 for detail.
For example, changed file A may be treated as "clean" unexpectedly in
steps below. "rounded now" is the value gotten by rounding via
'int(st.st_mtime)' or so.
---------------------+--------------------+------------------------
"now" | | timestamp of A (time_t)
float rounded time_t| action | FS dirstate
------ ------- ------+--------------------+-------- ---------------
N+.nnn N N | | --- ---
| update file A | N
| dirstate.normal(A) | N
N+.999 N+1 N | |
| dirstate.write() | N (*1)
| : |
| change file A | N
| : |
N+1.00 N+1 N+1 | |
| "hg status" (*2) | N N
------ ------- ------+--------------------+-------- ---------------
Timestamp N of A in dirstate isn't dropped at (*1), because "rounded
now" is N+1 at that time, even if 'st_mtime' in 'time_t' is still N.
Then, file A is unexpectedly treated as "clean" at (*2) in this case.
For consistent handling of 'stat.st_mtime', this patch makes
'pack_dirstate()' take 'now' argument not in floating point but in
integer.
This patch makes 'PyArg_ParseTuple()' in 'pack_dirstate()' use format
'i' (= checking type mismatch or overflow), even though it is ensured
that 'now' is in the range of 32bit signed integer by masking with
'_rangemask' (= 0x7fffffff) on caller side.
It should be cheaper enough than packing itself, and useful to
detect that legacy code invokes 'pack_dirstate()' with 'now' in
floating point value.
2015-10-13 20:40:04 +03:00
|
|
|
int now;
|
2012-05-30 23:55:33 +04:00
|
|
|
|
2017-10-16 21:53:57 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
|
|
|
|
&PyDict_Type, ©map, &pl, &now))
|
2012-05-30 23:55:33 +04:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
|
|
|
|
PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Figure out how much we need to allocate. */
|
|
|
|
for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
|
|
|
|
PyObject *c;
|
2016-10-08 23:02:29 +03:00
|
|
|
if (!PyBytes_Check(k)) {
|
2012-05-30 23:55:33 +04:00
|
|
|
PyErr_SetString(PyExc_TypeError, "expected string key");
|
|
|
|
goto bail;
|
|
|
|
}
|
2016-10-08 23:02:29 +03:00
|
|
|
nbytes += PyBytes_GET_SIZE(k) + 17;
|
2012-05-30 23:55:33 +04:00
|
|
|
c = PyDict_GetItem(copymap, k);
|
|
|
|
if (c) {
|
2016-10-08 23:02:29 +03:00
|
|
|
if (!PyBytes_Check(c)) {
|
2012-05-30 23:55:33 +04:00
|
|
|
PyErr_SetString(PyExc_TypeError,
|
2017-10-16 21:53:57 +03:00
|
|
|
"expected string key");
|
2012-05-30 23:55:33 +04:00
|
|
|
goto bail;
|
|
|
|
}
|
2016-10-08 23:02:29 +03:00
|
|
|
nbytes += PyBytes_GET_SIZE(c) + 1;
|
2012-05-30 23:55:33 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-08 23:02:29 +03:00
|
|
|
packobj = PyBytes_FromStringAndSize(NULL, nbytes);
|
2012-05-30 23:55:33 +04:00
|
|
|
if (packobj == NULL)
|
|
|
|
goto bail;
|
|
|
|
|
2016-10-08 23:02:29 +03:00
|
|
|
p = PyBytes_AS_STRING(packobj);
|
2012-05-30 23:55:33 +04:00
|
|
|
|
|
|
|
pn = PySequence_ITEM(pl, 0);
|
2016-10-08 23:02:29 +03:00
|
|
|
if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
|
2012-05-30 23:55:33 +04:00
|
|
|
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
memcpy(p, s, l);
|
|
|
|
p += 20;
|
|
|
|
pn = PySequence_ITEM(pl, 1);
|
2016-10-08 23:02:29 +03:00
|
|
|
if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
|
2012-05-30 23:55:33 +04:00
|
|
|
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
memcpy(p, s, l);
|
|
|
|
p += 20;
|
|
|
|
|
2017-10-16 21:53:57 +03:00
|
|
|
for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
dirstateTupleObject *tuple;
|
|
|
|
char state;
|
2015-10-17 17:14:13 +03:00
|
|
|
int mode, size, mtime;
|
2018-01-26 07:45:12 +03:00
|
|
|
Py_ssize_t len, l1;
|
2012-05-30 23:55:33 +04:00
|
|
|
PyObject *o;
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
char *t;
|
2012-05-30 23:55:33 +04:00
|
|
|
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
if (!dirstate_tuple_check(v)) {
|
|
|
|
PyErr_SetString(PyExc_TypeError,
|
2017-10-16 21:53:57 +03:00
|
|
|
"expected a dirstate tuple");
|
2012-05-30 23:55:33 +04:00
|
|
|
goto bail;
|
|
|
|
}
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
tuple = (dirstateTupleObject *)v;
|
|
|
|
|
|
|
|
state = tuple->state;
|
|
|
|
mode = tuple->mode;
|
|
|
|
size = tuple->size;
|
|
|
|
mtime = tuple->mtime;
|
parsers: make pack_dirstate take now in integer for consistency
On recent OS, 'stat.st_mtime' has a double precision floating point
value to represent nano seconds, but it is not wide enough for actual
file timestamp: nowadays, only 52 - 32 = 20 bit width is available for
decimal places in sec.
Therefore, casting it to 'int' may cause unexpected result. See also
changeset 8102a3981272 fixing issue4836 for detail.
For example, changed file A may be treated as "clean" unexpectedly in
steps below. "rounded now" is the value gotten by rounding via
'int(st.st_mtime)' or so.
---------------------+--------------------+------------------------
"now" | | timestamp of A (time_t)
float rounded time_t| action | FS dirstate
------ ------- ------+--------------------+-------- ---------------
N+.nnn N N | | --- ---
| update file A | N
| dirstate.normal(A) | N
N+.999 N+1 N | |
| dirstate.write() | N (*1)
| : |
| change file A | N
| : |
N+1.00 N+1 N+1 | |
| "hg status" (*2) | N N
------ ------- ------+--------------------+-------- ---------------
Timestamp N of A in dirstate isn't dropped at (*1), because "rounded
now" is N+1 at that time, even if 'st_mtime' in 'time_t' is still N.
Then, file A is unexpectedly treated as "clean" at (*2) in this case.
For consistent handling of 'stat.st_mtime', this patch makes
'pack_dirstate()' take 'now' argument not in floating point but in
integer.
This patch makes 'PyArg_ParseTuple()' in 'pack_dirstate()' use format
'i' (= checking type mismatch or overflow), even though it is ensured
that 'now' is in the range of 32bit signed integer by masking with
'_rangemask' (= 0x7fffffff) on caller side.
It should be cheaper enough than packing itself, and useful to
detect that legacy code invokes 'pack_dirstate()' with 'now' in
floating point value.
2015-10-13 20:40:04 +03:00
|
|
|
if (state == 'n' && mtime == now) {
|
2013-01-18 11:46:08 +04:00
|
|
|
/* See pure/parsers.py:pack_dirstate for why we do
|
|
|
|
* this. */
|
2013-08-18 07:48:49 +04:00
|
|
|
mtime = -1;
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
mtime_unset = (PyObject *)make_dirstate_tuple(
|
2017-10-16 21:53:57 +03:00
|
|
|
state, mode, size, mtime);
|
2014-05-28 02:17:38 +04:00
|
|
|
if (!mtime_unset)
|
|
|
|
goto bail;
|
|
|
|
if (PyDict_SetItem(map, k, mtime_unset) == -1)
|
|
|
|
goto bail;
|
|
|
|
Py_DECREF(mtime_unset);
|
|
|
|
mtime_unset = NULL;
|
2012-05-30 23:55:33 +04:00
|
|
|
}
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
*p++ = state;
|
2015-10-17 17:14:13 +03:00
|
|
|
putbe32((uint32_t)mode, p);
|
|
|
|
putbe32((uint32_t)size, p + 4);
|
|
|
|
putbe32((uint32_t)mtime, p + 8);
|
2012-05-30 23:55:33 +04:00
|
|
|
t = p + 12;
|
|
|
|
p += 16;
|
2016-10-08 23:02:29 +03:00
|
|
|
len = PyBytes_GET_SIZE(k);
|
|
|
|
memcpy(p, PyBytes_AS_STRING(k), len);
|
2012-05-30 23:55:33 +04:00
|
|
|
p += len;
|
|
|
|
o = PyDict_GetItem(copymap, k);
|
|
|
|
if (o) {
|
|
|
|
*p++ = '\0';
|
2018-01-26 07:45:12 +03:00
|
|
|
l1 = PyBytes_GET_SIZE(o);
|
|
|
|
memcpy(p, PyBytes_AS_STRING(o), l1);
|
|
|
|
p += l1;
|
|
|
|
len += l1 + 1;
|
2012-05-30 23:55:33 +04:00
|
|
|
}
|
|
|
|
putbe32((uint32_t)len, t);
|
|
|
|
}
|
|
|
|
|
2016-10-08 23:02:29 +03:00
|
|
|
pos = p - PyBytes_AS_STRING(packobj);
|
2012-05-30 23:55:33 +04:00
|
|
|
if (pos != nbytes) {
|
|
|
|
PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
|
2017-10-16 21:53:57 +03:00
|
|
|
(long)pos, (long)nbytes);
|
2012-05-30 23:55:33 +04:00
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
|
|
|
|
return packobj;
|
|
|
|
bail:
|
2014-05-28 02:17:38 +04:00
|
|
|
Py_XDECREF(mtime_unset);
|
2012-05-30 23:55:33 +04:00
|
|
|
Py_XDECREF(packobj);
|
2015-01-23 23:48:18 +03:00
|
|
|
Py_XDECREF(v);
|
2012-05-30 23:55:33 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
#define BUMPED_FIX 1
|
|
|
|
#define USING_SHA_256 2
|
|
|
|
#define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
|
2012-04-13 01:05:59 +04:00
|
|
|
|
2017-10-16 21:53:57 +03:00
|
|
|
static PyObject *readshas(const char *source, unsigned char num,
|
|
|
|
Py_ssize_t hashwidth)
|
2012-04-06 00:00:35 +04:00
|
|
|
{
|
2017-05-21 00:01:05 +03:00
|
|
|
int i;
|
|
|
|
PyObject *list = PyTuple_New(num);
|
|
|
|
if (list == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
for (i = 0; i < num; i++) {
|
|
|
|
PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
|
|
|
|
if (hash == NULL) {
|
|
|
|
Py_DECREF(list);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
PyTuple_SET_ITEM(list, i, hash);
|
|
|
|
source += hashwidth;
|
|
|
|
}
|
|
|
|
return list;
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
static PyObject *fm1readmarker(const char *databegin, const char *dataend,
|
2017-10-16 21:53:57 +03:00
|
|
|
uint32_t *msize)
|
2017-05-21 00:01:05 +03:00
|
|
|
{
|
|
|
|
const char *data = databegin;
|
|
|
|
const char *meta;
|
2012-04-06 00:00:35 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
double mtime;
|
|
|
|
int16_t tz;
|
|
|
|
uint16_t flags;
|
|
|
|
unsigned char nsuccs, nparents, nmetadata;
|
|
|
|
Py_ssize_t hashwidth = 20;
|
2008-10-17 03:03:38 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
PyObject *prec = NULL, *parents = NULL, *succs = NULL;
|
|
|
|
PyObject *metadata = NULL, *ret = NULL;
|
|
|
|
int i;
|
2012-06-02 02:19:08 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (data + FM1_HEADER_SIZE > dataend) {
|
|
|
|
goto overflow;
|
2012-04-13 01:05:59 +04:00
|
|
|
}
|
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
*msize = getbe32(data);
|
|
|
|
data += 4;
|
|
|
|
mtime = getbefloat64(data);
|
|
|
|
data += 8;
|
|
|
|
tz = getbeint16(data);
|
|
|
|
data += 2;
|
|
|
|
flags = getbeuint16(data);
|
|
|
|
data += 2;
|
2012-04-13 01:05:59 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (flags & USING_SHA_256) {
|
|
|
|
hashwidth = 32;
|
2015-07-16 17:36:08 +03:00
|
|
|
}
|
2015-05-28 03:00:28 +03:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
nsuccs = (unsigned char)(*data++);
|
|
|
|
nparents = (unsigned char)(*data++);
|
|
|
|
nmetadata = (unsigned char)(*data++);
|
2015-05-28 03:00:28 +03:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (databegin + *msize > dataend) {
|
|
|
|
goto overflow;
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
2017-10-16 21:53:57 +03:00
|
|
|
dataend = databegin + *msize; /* narrow down to marker size */
|
2008-10-20 17:19:05 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (data + hashwidth > dataend) {
|
|
|
|
goto overflow;
|
|
|
|
}
|
|
|
|
prec = PyBytes_FromStringAndSize(data, hashwidth);
|
|
|
|
data += hashwidth;
|
|
|
|
if (prec == NULL) {
|
|
|
|
goto bail;
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (data + nsuccs * hashwidth > dataend) {
|
|
|
|
goto overflow;
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
2017-05-21 00:01:05 +03:00
|
|
|
succs = readshas(data, nsuccs, hashwidth);
|
|
|
|
if (succs == NULL) {
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
data += nsuccs * hashwidth;
|
2012-04-06 00:00:35 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (nparents == 1 || nparents == 2) {
|
|
|
|
if (data + nparents * hashwidth > dataend) {
|
|
|
|
goto overflow;
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
2017-05-21 00:01:05 +03:00
|
|
|
parents = readshas(data, nparents, hashwidth);
|
|
|
|
if (parents == NULL) {
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
data += nparents * hashwidth;
|
2012-04-06 00:00:35 +04:00
|
|
|
} else {
|
2017-05-21 00:01:05 +03:00
|
|
|
parents = Py_None;
|
|
|
|
Py_INCREF(parents);
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (data + 2 * nmetadata > dataend) {
|
|
|
|
goto overflow;
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
2017-05-21 00:01:05 +03:00
|
|
|
meta = data + (2 * nmetadata);
|
|
|
|
metadata = PyTuple_New(nmetadata);
|
|
|
|
if (metadata == NULL) {
|
|
|
|
goto bail;
|
2013-09-16 23:12:37 +04:00
|
|
|
}
|
2017-05-21 00:01:05 +03:00
|
|
|
for (i = 0; i < nmetadata; i++) {
|
|
|
|
PyObject *tmp, *left = NULL, *right = NULL;
|
|
|
|
Py_ssize_t leftsize = (unsigned char)(*data++);
|
|
|
|
Py_ssize_t rightsize = (unsigned char)(*data++);
|
|
|
|
if (meta + leftsize + rightsize > dataend) {
|
|
|
|
goto overflow;
|
|
|
|
}
|
|
|
|
left = PyBytes_FromStringAndSize(meta, leftsize);
|
|
|
|
meta += leftsize;
|
|
|
|
right = PyBytes_FromStringAndSize(meta, rightsize);
|
|
|
|
meta += rightsize;
|
|
|
|
tmp = PyTuple_New(2);
|
|
|
|
if (!left || !right || !tmp) {
|
|
|
|
Py_XDECREF(left);
|
|
|
|
Py_XDECREF(right);
|
|
|
|
Py_XDECREF(tmp);
|
|
|
|
goto bail;
|
|
|
|
}
|
|
|
|
PyTuple_SET_ITEM(tmp, 0, left);
|
|
|
|
PyTuple_SET_ITEM(tmp, 1, right);
|
|
|
|
PyTuple_SET_ITEM(metadata, i, tmp);
|
2012-04-13 01:05:59 +04:00
|
|
|
}
|
2017-10-16 21:53:57 +03:00
|
|
|
ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
|
|
|
|
(int)tz * 60, parents);
|
|
|
|
goto bail; /* return successfully */
|
2012-04-13 01:05:59 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
overflow:
|
|
|
|
PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
|
|
|
|
bail:
|
|
|
|
Py_XDECREF(prec);
|
|
|
|
Py_XDECREF(succs);
|
|
|
|
Py_XDECREF(metadata);
|
|
|
|
Py_XDECREF(parents);
|
|
|
|
return ret;
|
2012-04-13 01:05:59 +04:00
|
|
|
}
|
|
|
|
|
2017-10-02 21:28:41 +03:00
|
|
|
static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
|
|
|
|
{
|
2017-05-21 00:01:05 +03:00
|
|
|
const char *data, *dataend;
|
|
|
|
int datalen;
|
|
|
|
Py_ssize_t offset, stop;
|
|
|
|
PyObject *markers = NULL;
|
2012-04-06 00:00:35 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) {
|
2012-04-06 00:00:35 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
2017-05-21 00:01:05 +03:00
|
|
|
dataend = data + datalen;
|
|
|
|
data += offset;
|
|
|
|
markers = PyList_New(0);
|
|
|
|
if (!markers) {
|
2012-04-06 00:00:35 +04:00
|
|
|
return NULL;
|
2012-04-06 11:28:36 +04:00
|
|
|
}
|
2017-05-21 00:01:05 +03:00
|
|
|
while (offset < stop) {
|
|
|
|
uint32_t msize;
|
|
|
|
int error;
|
|
|
|
PyObject *record = fm1readmarker(data, dataend, &msize);
|
|
|
|
if (!record) {
|
2015-01-23 23:55:36 +03:00
|
|
|
goto bail;
|
2017-05-21 00:01:05 +03:00
|
|
|
}
|
|
|
|
error = PyList_Append(markers, record);
|
|
|
|
Py_DECREF(record);
|
|
|
|
if (error) {
|
2012-04-13 01:05:59 +04:00
|
|
|
goto bail;
|
2017-05-21 00:01:05 +03:00
|
|
|
}
|
|
|
|
data += msize;
|
|
|
|
offset += msize;
|
2012-04-13 01:05:59 +04:00
|
|
|
}
|
2017-05-21 00:01:05 +03:00
|
|
|
return markers;
|
2012-04-13 01:05:59 +04:00
|
|
|
bail:
|
2017-05-21 00:01:05 +03:00
|
|
|
Py_DECREF(markers);
|
2012-04-13 01:05:59 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
static char parsers_doc[] = "Efficient content parsing.";
|
2012-05-20 07:21:48 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
PyObject *encodedir(PyObject *self, PyObject *args);
|
|
|
|
PyObject *pathencode(PyObject *self, PyObject *args);
|
|
|
|
PyObject *lowerencode(PyObject *self, PyObject *args);
|
|
|
|
PyObject *parse_index2(PyObject *self, PyObject *args);
|
2012-05-20 07:21:48 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
static PyMethodDef methods[] = {
|
2017-10-16 21:53:57 +03:00
|
|
|
{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
|
|
|
|
{"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
|
|
|
|
"create a set containing non-normal and other parent entries of given "
|
|
|
|
"dirstate\n"},
|
|
|
|
{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
|
|
|
|
{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
|
|
|
|
{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
|
|
|
|
{"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
|
|
|
|
{"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
|
|
|
|
{"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
|
|
|
|
{"dict_new_presized", dict_new_presized, METH_VARARGS,
|
|
|
|
"construct a dict with an expected size\n"},
|
|
|
|
{"make_file_foldmap", make_file_foldmap, METH_VARARGS,
|
|
|
|
"make file foldmap\n"},
|
|
|
|
{"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
|
|
|
|
"escape a UTF-8 byte string to JSON (fast path)\n"},
|
|
|
|
{"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
|
|
|
|
{"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
|
|
|
|
{"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
|
|
|
|
{"fm1readmarkers", fm1readmarkers, METH_VARARGS,
|
|
|
|
"parse v1 obsolete markers\n"},
|
|
|
|
{NULL, NULL}};
|
2012-05-20 07:21:48 +04:00
|
|
|
|
2017-05-21 00:01:05 +03:00
|
|
|
void dirs_module_init(PyObject *mod);
|
|
|
|
void manifest_module_init(PyObject *mod);
|
|
|
|
void revlog_module_init(PyObject *mod);
|
2014-09-17 03:03:21 +04:00
|
|
|
|
2017-12-06 17:46:41 +03:00
|
|
|
static const int version = 4;
|
2017-04-26 03:43:30 +03:00
|
|
|
|
2012-04-06 00:00:35 +04:00
|
|
|
static void module_init(PyObject *mod)
|
|
|
|
{
|
2017-04-26 03:43:30 +03:00
|
|
|
PyModule_AddIntConstant(mod, "version", version);
|
|
|
|
|
parsers: fail fast if Python has wrong minor version (issue4110)
This change causes an informative ImportError to be raised when importing
the parsers extension module if the minor version of the currently-running
Python interpreter doesn't match that of the Python used when compiling
the extension module.
This change also exposes a parsers.versionerrortext constant in the
C implementation of the module. Its presence can be used to determine
whether this behavior is present in a version of the module. The value
of the constant is the leading text of the ImportError raised and is set
to "Python minor version mismatch".
Here is an example of what the new error looks like:
Traceback (most recent call last):
File "test.py", line 1, in <module>
import mercurial.parsers
ImportError: Python minor version mismatch: The Mercurial extension
modules were compiled with Python 2.7.6, but Mercurial is currently using
Python with sys.hexversion=33883888: Python 2.5.6
(r256:88840, Nov 18 2012, 05:37:10)
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))]
at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/
Python.app/Contents/MacOS/Python
The reason for raising an error in this scenario is that Python's C API
is known not to be compatible from minor version to minor version, even
if sys.api_version is the same. See for example this Python bug report
about incompatibilities between 2.5 and 2.6+:
http://bugs.python.org/issue8118
These incompatibilities can cause Mercurial to break in mysterious,
unforeseen ways. For example, when Mercurial compiled with Python 2.7 was
run with 2.5, the following crash occurred when running "hg status":
http://bz.selenic.com/show_bug.cgi?id=4110
After this crash was fixed, running with Python 2.5 no longer crashes, but
the following puzzling behavior still occurs:
$ hg status
...
File ".../mercurial/changelog.py", line 123, in __init__
revlog.revlog.__init__(self, opener, "00changelog.i")
File ".../mercurial/revlog.py", line 251, in __init__
d = self._io.parseindex(i, self._inline)
File ".../mercurial/revlog.py", line 158, in parseindex
index, cache = parsers.parse_index2(data, inline)
TypeError: data is not a string
which can be reproduced more simply with:
import mercurial.parsers as parsers
parsers.parse_index2("", True)
Both the crash and the TypeError occurred because the Python C API's
PyString_Check() returns the wrong value when the C header files from
Python 2.7 are run with Python 2.5. This is an example of an
incompatibility of the sort mentioned in the Python bug report above.
Failing fast with an informative error message results in a better user
experience in cases like the above. The information in the ImportError
also simplifies troubleshooting for those on Mercurial mailing lists, the
bug tracker, etc.
This patch only adds the version check to parsers.c, which is sufficient
to affect command-line commands like "hg status" and "hg summary".
An idea for a future improvement is to move the version-checking C code
to a more central location, and have it run when importing all
Mercurial extension modules and not just parsers.c.
2013-12-05 08:38:27 +04:00
|
|
|
/* This module constant has two purposes. First, it lets us unit test
|
|
|
|
* the ImportError raised without hard-coding any error text. This
|
|
|
|
* means we can change the text in the future without breaking tests,
|
|
|
|
* even across changesets without a recompile. Second, its presence
|
|
|
|
* can be used to determine whether the version-checking logic is
|
|
|
|
* present, which also helps in testing across changesets without a
|
|
|
|
* recompile. Note that this means the pure-Python version of parsers
|
|
|
|
* should not have this module constant. */
|
|
|
|
PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
|
|
|
|
|
2013-04-11 02:08:27 +04:00
|
|
|
dirs_module_init(mod);
|
2015-01-14 01:31:38 +03:00
|
|
|
manifest_module_init(mod);
|
2017-05-21 00:01:05 +03:00
|
|
|
revlog_module_init(mod);
|
2013-04-11 02:08:27 +04:00
|
|
|
|
2017-05-21 07:31:27 +03:00
|
|
|
if (PyType_Ready(&dirstateTupleType) < 0)
|
|
|
|
return;
|
parsers: inline fields of dirstate values in C version
Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:
- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all
In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.
This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.
The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.
parse_dirstate becomes significantly faster:
$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after: wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)
and as a result, several commands benefit:
$ time hg status # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after: 0.34s user 0.12s system 99% cpu 0.471 total
$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after: 0.76s user 0.17s system 99% cpu 0.931 total
There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.
2014-05-28 01:27:41 +04:00
|
|
|
Py_INCREF(&dirstateTupleType);
|
|
|
|
PyModule_AddObject(mod, "dirstatetuple",
|
2017-10-16 21:53:57 +03:00
|
|
|
(PyObject *)&dirstateTupleType);
|
2012-04-06 00:00:35 +04:00
|
|
|
}
|
|
|
|
|
parsers: fail fast if Python has wrong minor version (issue4110)
This change causes an informative ImportError to be raised when importing
the parsers extension module if the minor version of the currently-running
Python interpreter doesn't match that of the Python used when compiling
the extension module.
This change also exposes a parsers.versionerrortext constant in the
C implementation of the module. Its presence can be used to determine
whether this behavior is present in a version of the module. The value
of the constant is the leading text of the ImportError raised and is set
to "Python minor version mismatch".
Here is an example of what the new error looks like:
Traceback (most recent call last):
File "test.py", line 1, in <module>
import mercurial.parsers
ImportError: Python minor version mismatch: The Mercurial extension
modules were compiled with Python 2.7.6, but Mercurial is currently using
Python with sys.hexversion=33883888: Python 2.5.6
(r256:88840, Nov 18 2012, 05:37:10)
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))]
at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/
Python.app/Contents/MacOS/Python
The reason for raising an error in this scenario is that Python's C API
is known not to be compatible from minor version to minor version, even
if sys.api_version is the same. See for example this Python bug report
about incompatibilities between 2.5 and 2.6+:
http://bugs.python.org/issue8118
These incompatibilities can cause Mercurial to break in mysterious,
unforeseen ways. For example, when Mercurial compiled with Python 2.7 was
run with 2.5, the following crash occurred when running "hg status":
http://bz.selenic.com/show_bug.cgi?id=4110
After this crash was fixed, running with Python 2.5 no longer crashes, but
the following puzzling behavior still occurs:
$ hg status
...
File ".../mercurial/changelog.py", line 123, in __init__
revlog.revlog.__init__(self, opener, "00changelog.i")
File ".../mercurial/revlog.py", line 251, in __init__
d = self._io.parseindex(i, self._inline)
File ".../mercurial/revlog.py", line 158, in parseindex
index, cache = parsers.parse_index2(data, inline)
TypeError: data is not a string
which can be reproduced more simply with:
import mercurial.parsers as parsers
parsers.parse_index2("", True)
Both the crash and the TypeError occurred because the Python C API's
PyString_Check() returns the wrong value when the C header files from
Python 2.7 are run with Python 2.5. This is an example of an
incompatibility of the sort mentioned in the Python bug report above.
Failing fast with an informative error message results in a better user
experience in cases like the above. The information in the ImportError
also simplifies troubleshooting for those on Mercurial mailing lists, the
bug tracker, etc.
This patch only adds the version check to parsers.c, which is sufficient
to affect command-line commands like "hg status" and "hg summary".
An idea for a future improvement is to move the version-checking C code
to a more central location, and have it run when importing all
Mercurial extension modules and not just parsers.c.
2013-12-05 08:38:27 +04:00
|
|
|
static int check_python_version(void)
|
|
|
|
{
|
2015-01-23 23:30:21 +03:00
|
|
|
PyObject *sys = PyImport_ImportModule("sys"), *ver;
|
|
|
|
long hexversion;
|
|
|
|
if (!sys)
|
|
|
|
return -1;
|
|
|
|
ver = PyObject_GetAttrString(sys, "hexversion");
|
|
|
|
Py_DECREF(sys);
|
|
|
|
if (!ver)
|
|
|
|
return -1;
|
|
|
|
hexversion = PyInt_AsLong(ver);
|
|
|
|
Py_DECREF(ver);
|
parsers: fail fast if Python has wrong minor version (issue4110)
This change causes an informative ImportError to be raised when importing
the parsers extension module if the minor version of the currently-running
Python interpreter doesn't match that of the Python used when compiling
the extension module.
This change also exposes a parsers.versionerrortext constant in the
C implementation of the module. Its presence can be used to determine
whether this behavior is present in a version of the module. The value
of the constant is the leading text of the ImportError raised and is set
to "Python minor version mismatch".
Here is an example of what the new error looks like:
Traceback (most recent call last):
File "test.py", line 1, in <module>
import mercurial.parsers
ImportError: Python minor version mismatch: The Mercurial extension
modules were compiled with Python 2.7.6, but Mercurial is currently using
Python with sys.hexversion=33883888: Python 2.5.6
(r256:88840, Nov 18 2012, 05:37:10)
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))]
at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/
Python.app/Contents/MacOS/Python
The reason for raising an error in this scenario is that Python's C API
is known not to be compatible from minor version to minor version, even
if sys.api_version is the same. See for example this Python bug report
about incompatibilities between 2.5 and 2.6+:
http://bugs.python.org/issue8118
These incompatibilities can cause Mercurial to break in mysterious,
unforeseen ways. For example, when Mercurial compiled with Python 2.7 was
run with 2.5, the following crash occurred when running "hg status":
http://bz.selenic.com/show_bug.cgi?id=4110
After this crash was fixed, running with Python 2.5 no longer crashes, but
the following puzzling behavior still occurs:
$ hg status
...
File ".../mercurial/changelog.py", line 123, in __init__
revlog.revlog.__init__(self, opener, "00changelog.i")
File ".../mercurial/revlog.py", line 251, in __init__
d = self._io.parseindex(i, self._inline)
File ".../mercurial/revlog.py", line 158, in parseindex
index, cache = parsers.parse_index2(data, inline)
TypeError: data is not a string
which can be reproduced more simply with:
import mercurial.parsers as parsers
parsers.parse_index2("", True)
Both the crash and the TypeError occurred because the Python C API's
PyString_Check() returns the wrong value when the C header files from
Python 2.7 are run with Python 2.5. This is an example of an
incompatibility of the sort mentioned in the Python bug report above.
Failing fast with an informative error message results in a better user
experience in cases like the above. The information in the ImportError
also simplifies troubleshooting for those on Mercurial mailing lists, the
bug tracker, etc.
This patch only adds the version check to parsers.c, which is sufficient
to affect command-line commands like "hg status" and "hg summary".
An idea for a future improvement is to move the version-checking C code
to a more central location, and have it run when importing all
Mercurial extension modules and not just parsers.c.
2013-12-05 08:38:27 +04:00
|
|
|
/* sys.hexversion is a 32-bit number by default, so the -1 case
|
|
|
|
* should only occur in unusual circumstances (e.g. if sys.hexversion
|
|
|
|
* is manually set to an invalid value). */
|
|
|
|
if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
|
2017-10-16 21:53:57 +03:00
|
|
|
PyErr_Format(PyExc_ImportError,
|
|
|
|
"%s: The Mercurial extension "
|
|
|
|
"modules were compiled with Python " PY_VERSION
|
|
|
|
", but "
|
|
|
|
"Mercurial is currently using Python with "
|
|
|
|
"sys.hexversion=%ld: "
|
|
|
|
"Python %s\n at: %s",
|
|
|
|
versionerrortext, hexversion, Py_GetVersion(),
|
|
|
|
Py_GetProgramFullPath());
|
parsers: fail fast if Python has wrong minor version (issue4110)
This change causes an informative ImportError to be raised when importing
the parsers extension module if the minor version of the currently-running
Python interpreter doesn't match that of the Python used when compiling
the extension module.
This change also exposes a parsers.versionerrortext constant in the
C implementation of the module. Its presence can be used to determine
whether this behavior is present in a version of the module. The value
of the constant is the leading text of the ImportError raised and is set
to "Python minor version mismatch".
Here is an example of what the new error looks like:
Traceback (most recent call last):
File "test.py", line 1, in <module>
import mercurial.parsers
ImportError: Python minor version mismatch: The Mercurial extension
modules were compiled with Python 2.7.6, but Mercurial is currently using
Python with sys.hexversion=33883888: Python 2.5.6
(r256:88840, Nov 18 2012, 05:37:10)
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))]
at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/
Python.app/Contents/MacOS/Python
The reason for raising an error in this scenario is that Python's C API
is known not to be compatible from minor version to minor version, even
if sys.api_version is the same. See for example this Python bug report
about incompatibilities between 2.5 and 2.6+:
http://bugs.python.org/issue8118
These incompatibilities can cause Mercurial to break in mysterious,
unforeseen ways. For example, when Mercurial compiled with Python 2.7 was
run with 2.5, the following crash occurred when running "hg status":
http://bz.selenic.com/show_bug.cgi?id=4110
After this crash was fixed, running with Python 2.5 no longer crashes, but
the following puzzling behavior still occurs:
$ hg status
...
File ".../mercurial/changelog.py", line 123, in __init__
revlog.revlog.__init__(self, opener, "00changelog.i")
File ".../mercurial/revlog.py", line 251, in __init__
d = self._io.parseindex(i, self._inline)
File ".../mercurial/revlog.py", line 158, in parseindex
index, cache = parsers.parse_index2(data, inline)
TypeError: data is not a string
which can be reproduced more simply with:
import mercurial.parsers as parsers
parsers.parse_index2("", True)
Both the crash and the TypeError occurred because the Python C API's
PyString_Check() returns the wrong value when the C header files from
Python 2.7 are run with Python 2.5. This is an example of an
incompatibility of the sort mentioned in the Python bug report above.
Failing fast with an informative error message results in a better user
experience in cases like the above. The information in the ImportError
also simplifies troubleshooting for those on Mercurial mailing lists, the
bug tracker, etc.
This patch only adds the version check to parsers.c, which is sufficient
to affect command-line commands like "hg status" and "hg summary".
An idea for a future improvement is to move the version-checking C code
to a more central location, and have it run when importing all
Mercurial extension modules and not just parsers.c.
2013-12-05 08:38:27 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-06-16 02:49:56 +04:00
|
|
|
#ifdef IS_PY3K
|
2017-10-16 21:53:57 +03:00
|
|
|
static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
|
|
|
|
parsers_doc, -1, methods};
|
2010-06-16 02:49:56 +04:00
|
|
|
|
|
|
|
PyMODINIT_FUNC PyInit_parsers(void)
|
|
|
|
{
|
2014-03-20 08:01:59 +04:00
|
|
|
PyObject *mod;
|
|
|
|
|
parsers: fail fast if Python has wrong minor version (issue4110)
This change causes an informative ImportError to be raised when importing
the parsers extension module if the minor version of the currently-running
Python interpreter doesn't match that of the Python used when compiling
the extension module.
This change also exposes a parsers.versionerrortext constant in the
C implementation of the module. Its presence can be used to determine
whether this behavior is present in a version of the module. The value
of the constant is the leading text of the ImportError raised and is set
to "Python minor version mismatch".
Here is an example of what the new error looks like:
Traceback (most recent call last):
File "test.py", line 1, in <module>
import mercurial.parsers
ImportError: Python minor version mismatch: The Mercurial extension
modules were compiled with Python 2.7.6, but Mercurial is currently using
Python with sys.hexversion=33883888: Python 2.5.6
(r256:88840, Nov 18 2012, 05:37:10)
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))]
at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/
Python.app/Contents/MacOS/Python
The reason for raising an error in this scenario is that Python's C API
is known not to be compatible from minor version to minor version, even
if sys.api_version is the same. See for example this Python bug report
about incompatibilities between 2.5 and 2.6+:
http://bugs.python.org/issue8118
These incompatibilities can cause Mercurial to break in mysterious,
unforeseen ways. For example, when Mercurial compiled with Python 2.7 was
run with 2.5, the following crash occurred when running "hg status":
http://bz.selenic.com/show_bug.cgi?id=4110
After this crash was fixed, running with Python 2.5 no longer crashes, but
the following puzzling behavior still occurs:
$ hg status
...
File ".../mercurial/changelog.py", line 123, in __init__
revlog.revlog.__init__(self, opener, "00changelog.i")
File ".../mercurial/revlog.py", line 251, in __init__
d = self._io.parseindex(i, self._inline)
File ".../mercurial/revlog.py", line 158, in parseindex
index, cache = parsers.parse_index2(data, inline)
TypeError: data is not a string
which can be reproduced more simply with:
import mercurial.parsers as parsers
parsers.parse_index2("", True)
Both the crash and the TypeError occurred because the Python C API's
PyString_Check() returns the wrong value when the C header files from
Python 2.7 are run with Python 2.5. This is an example of an
incompatibility of the sort mentioned in the Python bug report above.
Failing fast with an informative error message results in a better user
experience in cases like the above. The information in the ImportError
also simplifies troubleshooting for those on Mercurial mailing lists, the
bug tracker, etc.
This patch only adds the version check to parsers.c, which is sufficient
to affect command-line commands like "hg status" and "hg summary".
An idea for a future improvement is to move the version-checking C code
to a more central location, and have it run when importing all
Mercurial extension modules and not just parsers.c.
2013-12-05 08:38:27 +04:00
|
|
|
if (check_python_version() == -1)
|
2016-10-08 18:51:29 +03:00
|
|
|
return NULL;
|
2014-03-20 08:01:59 +04:00
|
|
|
mod = PyModule_Create(&parsers_module);
|
2012-04-06 00:00:35 +04:00
|
|
|
module_init(mod);
|
|
|
|
return mod;
|
2010-06-16 02:49:56 +04:00
|
|
|
}
|
|
|
|
#else
|
2008-03-26 20:12:10 +03:00
|
|
|
PyMODINIT_FUNC initparsers(void)
|
|
|
|
{
|
2014-03-20 08:01:59 +04:00
|
|
|
PyObject *mod;
|
|
|
|
|
parsers: fail fast if Python has wrong minor version (issue4110)
This change causes an informative ImportError to be raised when importing
the parsers extension module if the minor version of the currently-running
Python interpreter doesn't match that of the Python used when compiling
the extension module.
This change also exposes a parsers.versionerrortext constant in the
C implementation of the module. Its presence can be used to determine
whether this behavior is present in a version of the module. The value
of the constant is the leading text of the ImportError raised and is set
to "Python minor version mismatch".
Here is an example of what the new error looks like:
Traceback (most recent call last):
File "test.py", line 1, in <module>
import mercurial.parsers
ImportError: Python minor version mismatch: The Mercurial extension
modules were compiled with Python 2.7.6, but Mercurial is currently using
Python with sys.hexversion=33883888: Python 2.5.6
(r256:88840, Nov 18 2012, 05:37:10)
[GCC 4.2.1 Compatible Apple Clang 4.1 ((tags/Apple/clang-421.11.66))]
at: /opt/local/Library/Frameworks/Python.framework/Versions/2.5/Resources/
Python.app/Contents/MacOS/Python
The reason for raising an error in this scenario is that Python's C API
is known not to be compatible from minor version to minor version, even
if sys.api_version is the same. See for example this Python bug report
about incompatibilities between 2.5 and 2.6+:
http://bugs.python.org/issue8118
These incompatibilities can cause Mercurial to break in mysterious,
unforeseen ways. For example, when Mercurial compiled with Python 2.7 was
run with 2.5, the following crash occurred when running "hg status":
http://bz.selenic.com/show_bug.cgi?id=4110
After this crash was fixed, running with Python 2.5 no longer crashes, but
the following puzzling behavior still occurs:
$ hg status
...
File ".../mercurial/changelog.py", line 123, in __init__
revlog.revlog.__init__(self, opener, "00changelog.i")
File ".../mercurial/revlog.py", line 251, in __init__
d = self._io.parseindex(i, self._inline)
File ".../mercurial/revlog.py", line 158, in parseindex
index, cache = parsers.parse_index2(data, inline)
TypeError: data is not a string
which can be reproduced more simply with:
import mercurial.parsers as parsers
parsers.parse_index2("", True)
Both the crash and the TypeError occurred because the Python C API's
PyString_Check() returns the wrong value when the C header files from
Python 2.7 are run with Python 2.5. This is an example of an
incompatibility of the sort mentioned in the Python bug report above.
Failing fast with an informative error message results in a better user
experience in cases like the above. The information in the ImportError
also simplifies troubleshooting for those on Mercurial mailing lists, the
bug tracker, etc.
This patch only adds the version check to parsers.c, which is sufficient
to affect command-line commands like "hg status" and "hg summary".
An idea for a future improvement is to move the version-checking C code
to a more central location, and have it run when importing all
Mercurial extension modules and not just parsers.c.
2013-12-05 08:38:27 +04:00
|
|
|
if (check_python_version() == -1)
|
|
|
|
return;
|
2014-03-20 08:01:59 +04:00
|
|
|
mod = Py_InitModule3("parsers", methods, parsers_doc);
|
2012-04-06 00:00:35 +04:00
|
|
|
module_init(mod);
|
2008-03-26 20:12:10 +03:00
|
|
|
}
|
2010-06-16 02:49:56 +04:00
|
|
|
#endif
|