dirs.addpath: rework algorithm to search forward

This improves performance because it uses strchr rather than a loop.

For LLVM/clang version "Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM
3.5svn)" on OS X, for a repo with over 200,000 files, this improves perfdirs
from 0.248 seconds to 0.230 (7.3%)

For gcc 4.4.6 on Linux, for a test repo with over 500,000 files, this improves
perfdirs from 0.704 seconds to 0.658 (6.5%).
This commit is contained in:
Siddharth Agarwal 2015-03-27 01:03:06 -07:00
parent 0e68ae9536
commit 0a19443756

View File

@ -9,6 +9,7 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <string.h>
#include "util.h"
/*
@ -32,23 +33,19 @@ static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos)
{
const char *s = PyString_AS_STRING(path);
while (pos != -1) {
if (s[pos] == '/')
break;
pos -= 1;
}
return pos;
const char *ret = strchr(s + pos, '/');
return (ret != NULL) ? (ret - s) : -1;
}
static int _addpath(PyObject *dirs, PyObject *path)
{
const char *cpath = PyString_AS_STRING(path);
Py_ssize_t pos = PyString_GET_SIZE(path);
char *cpath = PyString_AS_STRING(path);
Py_ssize_t len = PyString_GET_SIZE(path);
Py_ssize_t pos = -1;
PyObject *key = NULL;
int ret = -1;
while ((pos = _finddir(path, pos - 1)) != -1) {
while ((pos = _finddir(path, pos + 1)) != -1) {
PyObject *val;
/* It's likely that every prefix already has an entry
@ -56,10 +53,18 @@ static int _addpath(PyObject *dirs, PyObject *path)
deallocating a string for each prefix we check. */
if (key != NULL)
((PyStringObject *)key)->ob_shash = -1;
else {
/* Force Python to not reuse a small shared string. */
key = PyString_FromStringAndSize(cpath,
pos < 2 ? 2 : pos);
else if (pos != 0) {
/* pos >= 1, which means that len >= 2. This is
guaranteed to produce a non-interned string. */
key = PyString_FromStringAndSize(cpath, len);
if (key == NULL)
goto bail;
} else {
/* pos == 0, which means we need to increment the dir
count for the empty string. We need to make sure we
don't muck around with interned strings, so throw it
away later. */
key = PyString_FromString("");
if (key == NULL)
goto bail;
}
@ -69,6 +74,10 @@ static int _addpath(PyObject *dirs, PyObject *path)
val = PyDict_GetItem(dirs, key);
if (val != NULL) {
PyInt_AS_LONG(val) += 1;
if (pos != 0)
PyString_AS_STRING(key)[pos] = '/';
else
key = NULL;
continue;
}
@ -83,6 +92,11 @@ static int _addpath(PyObject *dirs, PyObject *path)
Py_DECREF(val);
if (ret == -1)
goto bail;
if (pos != 0)
PyString_AS_STRING(key)[pos] = '/';
else
key = NULL;
Py_CLEAR(key);
}
ret = 0;
@ -95,11 +109,11 @@ bail:
static int _delpath(PyObject *dirs, PyObject *path)
{
Py_ssize_t pos = PyString_GET_SIZE(path);
Py_ssize_t pos = -1;
PyObject *key = NULL;
int ret = -1;
while ((pos = _finddir(path, pos - 1)) != -1) {
while ((pos = _finddir(path, pos + 1)) != -1) {
PyObject *val;
key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);