cstore: uniondatapackstore support for python stores

Summary:
Now uniondatapackstore can also hold python data stores. PythonDataStore
wrapper simply passes function calls to underlying python objects and marshals
the output.

Test Plan:
* Added test case
* Tested on fbsource with treemanifest.usecunionstore=True

Reviewers: durham, simonfar, ryanmce, #fbhgext

Reviewed By: durham, simonfar, #fbhgext

Differential Revision: https://phab.mercurial-scm.org/D631
This commit is contained in:
Mihails Smolins 2017-09-11 09:23:10 -07:00
parent 5d730d772d
commit bbbb8e96dd
10 changed files with 311 additions and 70 deletions

View File

@ -4,7 +4,7 @@
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
// deltachain.h - c++ implementation of deltachain and related classes
// deltachain.cpp - c++ implementation of deltachain and related classes
// no-check-code
#include "cstore/deltachain.h"

View File

@ -160,39 +160,6 @@ class CDeltaChain : public DeltaChain {
}
};
/*
* Wrapper around python delta chain
*/
class PyDeltaChain : public DeltaChain {
private:
std::shared_ptr< std::vector<DeltaChainLink> > _chain;
public:
PyDeltaChain(std::shared_ptr< std::vector<DeltaChainLink> > chain) :
_chain(chain) {}
// Default destructor is used, because the destructor of _chain object
// will free the allocated memory automatically.
~PyDeltaChain() {}
const DeltaChainLink getlink(const size_t idx) {
return _chain->at(idx);
}
size_t linkcount() {
return _chain->size();
}
get_delta_chain_code_t status() {
if (_chain->size()) {
return GET_DELTA_CHAIN_OK;
} else {
return GET_DELTA_CHAIN_NOT_FOUND;
}
}
};
class DeltaChainIterator {
private:
size_t _index;

View File

@ -25,8 +25,9 @@ extern "C" {
#include "cstore/datastore.h"
#include "cstore/key.h"
#include "cstore/py-structs.h"
#include "cstore/pythonutil.h"
#include "cstore/pythondatastore.h"
#include "cstore/pythonkeyiterator.h"
#include "cstore/pythonutil.h"
#include "cstore/uniondatapackstore.h"
// --------- DatapackStore Implementation ---------
@ -205,7 +206,8 @@ static int uniondatapackstore_init(py_uniondatapackstore *self, PyObject *args)
try {
std::vector<DataStore*> stores;
std::vector<PythonObj> pySubStores;
std::vector<PythonObj> cSubStores;
std::vector< std::shared_ptr<PythonDataStore> > pySubStores;
PyObject *item;
PythonObj inputIterator = PyObject_GetIter(storeList);
@ -213,26 +215,38 @@ static int uniondatapackstore_init(py_uniondatapackstore *self, PyObject *args)
// Record the substore references, so:
// A) We can decref them in case of an error.
// B) They don't get GC'd while the uniondatapackstore holds on to them.
pySubStores.push_back(PythonObj(item));
int iscdatapack = PyObject_IsInstance(item, (PyObject*)&datapackstoreType);
int isinstance = PyObject_IsInstance(item, (PyObject*)&datapackstoreType);
if (isinstance == 0) {
PyErr_SetString(PyExc_RuntimeError, "cuniondatapackstore only accepts cdatapackstore");
return -1;
} else if (isinstance != 1) {
// Error
return -1;
PythonObj store(item);
switch (iscdatapack) {
case 1:
// Store is C datapack
cSubStores.push_back(store);
py_datapackstore *subStore = (py_datapackstore*)item;
stores.push_back(&subStore->datapackstore);
break;
case 0:
// Store is PythonDataStore, it's memory management
// is performed by py_uniondatapackstore
std::shared_ptr<PythonDataStore> pystore =
std::make_shared<PythonDataStore>(store);
pySubStores.push_back(pystore);
stores.push_back(pystore.get());
break;
default:
// Error
return -1;
}
py_datapackstore *pySubStore = (py_datapackstore*)item;
stores.push_back(&pySubStore->datapackstore);
}
// We have to manually call the member constructor, since the provided 'self'
// is just zerod out memory.
new(&self->uniondatapackstore) std::shared_ptr<UnionDatapackStore>(new UnionDatapackStore(stores));
new(&self->substores) std::vector<PythonObj>();
self->substores = pySubStores;
new(&self->cstores) std::vector<PythonObj>();
new(&self->pystores) std::vector< std::shared_ptr<PythonDataStore> >();
self->cstores = cSubStores;
self->pystores = pySubStores;
} catch (const std::exception &ex) {
PyErr_SetString(PyExc_RuntimeError, ex.what());
return -1;
@ -243,7 +257,8 @@ static int uniondatapackstore_init(py_uniondatapackstore *self, PyObject *args)
static void uniondatapackstore_dealloc(py_uniondatapackstore *self) {
self->uniondatapackstore.~shared_ptr<UnionDatapackStore>();
self->substores.~vector<PythonObj>();
self->cstores.~vector<PythonObj>();
self->pystores.~vector< std::shared_ptr<PythonDataStore> >();
PyObject_Del(self);
}

View File

@ -13,6 +13,7 @@
#include <memory>
#include "cstore/datapackstore.h"
#include "cstore/pythondatastore.h"
#include "cstore/pythonutil.h"
#include "cstore/uniondatapackstore.h"
@ -28,7 +29,8 @@ struct py_uniondatapackstore {
std::shared_ptr<UnionDatapackStore> uniondatapackstore;
// Keep a reference to the python objects so we can decref them later.
std::vector<PythonObj> substores;
std::vector<PythonObj> cstores;
std::vector< std::shared_ptr<PythonDataStore> > pystores;
};
#endif // FBHGEXT_CSTORE_PY_STRUCTS_H

116
cstore/pythondatastore.cpp Normal file
View File

@ -0,0 +1,116 @@
// pythondatastore.cpp - implementation of a python data store
//
// Copyright 2017 Facebook, Inc.
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//
// no-check-code
#include "cstore/pythondatastore.h"
#include "cstore/pythonkeyiterator.h"
PythonDataStore::PythonDataStore(PythonObj store) : _store(store) {}
DeltaChainIterator PythonDataStore::getDeltaChain(const Key &key) {
std::shared_ptr<DeltaChain> chain = getDeltaChainRaw(key);
return DeltaChainIterator(chain);
}
std::shared_ptr<DeltaChain> PythonDataStore::getDeltaChainRaw(const Key &key) {
// Extract the delta chain from the list of tuples
// and build a DeltaChain object from them
std::shared_ptr< std::vector<DeltaChainLink> > links =
std::make_shared< std::vector<DeltaChainLink> >();
std::shared_ptr< std::vector<PythonObj> > tuples =
std::make_shared< std::vector<PythonObj> >();
// Build (name, node) tuple and call getdeltachain
// method of the underlying store
PythonObj pyKey = Py_BuildValue("(s#s#)", (key.name).c_str(),
(key.name).size(), key.node, 20);
PythonObj list;
try {
list = _store.callmethod("getdeltachain", pyKey);
} catch (const pyexception &ex) {
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
// Clear the exception, otherwise next method call will exit immediately
PyErr_Clear();
// Return empty Delta Chain which status is GET_DELTA_CHAIN_NOT_FOUND
return std::make_shared<PyDeltaChain>(links, tuples);
} else {
// If this is not a KeyError exception then rethrow it
throw;
}
}
PythonObj iter = PyObject_GetIter(list);
PyObject *item;
while ((item = PyIter_Next(iter)) != NULL) {
PythonObj tuple(item);
const char *filename, *deltabasefilename;
const uint8_t *node, *deltabasenode, *delta;
uint16_t filenamesz, deltabasefilenamesz;
uint64_t deltasz, nodesz, deltabasenodesz;
if (!PyArg_ParseTuple(tuple, "s#z#s#z#z#",
&filename, &filenamesz,
&node, &nodesz,
&deltabasefilename, &deltabasefilenamesz,
&deltabasenode, &deltabasenodesz,
&delta, &deltasz)) {
throw pyexception();
}
links->push_back(DeltaChainLink(filename, deltabasefilename, node,
deltabasenode, delta, filenamesz,
deltabasefilenamesz, deltasz));
tuples->push_back(tuple);
}
return std::make_shared< PyDeltaChain >(links, tuples);
}
std::shared_ptr<KeyIterator> PythonDataStore::getMissing(KeyIterator &missing) {
PythonObj list = PyList_New(0);
Key *key;
while ((key = missing.next()) != NULL) {
PythonObj pyKey = Py_BuildValue("(s#s#)", key->name.c_str(),
key->name.size(), key->node, 20);
if (PyList_Append(list, (PyObject*)pyKey)) {
throw pyexception();
}
}
PythonObj arg = Py_BuildValue("(O)", (PyObject*)list);
PythonObj keys = _store.callmethod("getmissing", arg);
PythonObj iter = PyObject_GetIter((PyObject*)keys);
return std::make_shared<PythonKeyIterator>(iter);
}
void PythonDataStore::markForRefresh() {
PythonObj args = Py_BuildValue("");
_store.callmethod("markforrefresh", args);
}
class Single : public KeyIterator {
public:
Key *_k;
Single(Key *k) : _k(k) {}
Key *next() {
Key *tmp = _k;
_k = NULL;
return tmp;
}
};
bool PythonDataStore::contains(const Key &key) {
Single iter((Key*)&key);
std::shared_ptr<KeyIterator> it = getMissing(iter);
return (!it->next());
}

80
cstore/pythondatastore.h Normal file
View File

@ -0,0 +1,80 @@
// pythondatastore.h - c++ declarations for a python data store
//
// Copyright 2017 Facebook, Inc.
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//
// no-check-code
// The PY_SSIZE_T_CLEAN define must be defined before the Python.h include,
// as per the documentation.
#ifndef FBHGEXT_PYTHONDATASTORE_H
#define FBHGEXT_PYTHONDATASTORE_H
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <memory>
#include "cstore/datastore.h"
#include "cstore/key.h"
#include "cstore/pythonutil.h"
/*
* Wrapper around python delta chain
*/
class PyDeltaChain : public DeltaChain {
private:
std::shared_ptr< std::vector<DeltaChainLink> > _chain;
std::shared_ptr< std::vector<PythonObj> > _pythonChainLinks;
public:
PyDeltaChain(std::shared_ptr< std::vector<DeltaChainLink> > chain,
std::shared_ptr< std::vector<PythonObj> > pythonChainLinks) :
_chain(chain),
_pythonChainLinks(pythonChainLinks) {}
// Default destructor is used, because the destructor of _chain
// and _tuples objects will free the allocated memory automatically.
~PyDeltaChain() {}
const DeltaChainLink getlink(const size_t idx) {
return _chain->at(idx);
}
size_t linkcount() {
return _chain->size();
}
get_delta_chain_code_t status() {
if (_chain->size()) {
return GET_DELTA_CHAIN_OK;
} else {
return GET_DELTA_CHAIN_NOT_FOUND;
}
}
};
class PythonDataStore : public DataStore {
private:
PythonObj _store; // pointer to python object
public:
PythonDataStore(PythonObj store);
~PythonDataStore() = default;
DeltaChainIterator getDeltaChain(const Key &key);
std::shared_ptr<KeyIterator> getMissing(KeyIterator &missing);
std::shared_ptr<DeltaChain> getDeltaChainRaw(const Key &key);
bool contains(const Key &key);
void markForRefresh();
};
#endif //FBHGEXT_PYTHONDATASTORE_H

View File

@ -16,6 +16,7 @@ class PythonKeyIterator : public KeyIterator {
private:
PythonObj _input;
Key _current;
public:
PythonKeyIterator(PythonObj input) :
_input(input) {}
@ -23,7 +24,7 @@ class PythonKeyIterator : public KeyIterator {
Key *next() {
PyObject *item;
while ((item = PyIter_Next((PyObject*)_input)) != NULL) {
PythonObj itemObj = item;
PythonObj itemObj(item);
char *name;
Py_ssize_t namelen;

View File

@ -242,6 +242,7 @@ else:
'cstore/deltachain.cpp',
'cstore/py-cstore.cpp',
'cstore/pythonutil.cpp',
'cstore/pythondatastore.cpp',
'cstore/uniondatapackstore.cpp',
'ctreemanifest/manifest.cpp',
'ctreemanifest/manifest_entry.cpp',

View File

@ -20,7 +20,7 @@ from cstore import (
)
from remotefilelog.datapack import (
fastdatapack,
datapack,
mutabledatapack,
)
@ -48,7 +48,7 @@ class uniondatapackstoretests(unittest.TestCase):
def getFakeHash(self):
return ''.join(chr(random.randint(0, 255)) for _ in range(20))
def createPack(self, packdir, revisions=None):
def createPackStore(self, packdir, revisions=None):
if revisions is None:
revisions = [("filename", self.getFakeHash(), nullid, "content")]
@ -57,16 +57,16 @@ class uniondatapackstoretests(unittest.TestCase):
for filename, node, base, content in revisions:
packer.add(filename, node, base, content)
path = packer.close()
return fastdatapack(path)
packer.close()
return datapackstore(packdir)
def testGetFromSingleDelta(self):
packdir = self.makeTempDir()
revisions = [("foo", self.getFakeHash(), nullid, "content")]
self.createPack(packdir, revisions=revisions)
store = self.createPackStore(packdir, revisions=revisions)
unionstore = uniondatapackstore([datapackstore(packdir)])
unionstore = uniondatapackstore([store])
text = unionstore.get(revisions[0][0], revisions[0][1])
self.assertEquals("content", text)
@ -82,9 +82,9 @@ class uniondatapackstoretests(unittest.TestCase):
("foo", self.getFakeHash(), firsthash,
mdiff.textdiff(rev1, rev2)),
]
self.createPack(packdir, revisions=revisions)
store = self.createPackStore(packdir, revisions=revisions)
unionstore = uniondatapackstore([datapackstore(packdir)])
unionstore = uniondatapackstore([store])
text = unionstore.get(revisions[1][0], revisions[1][1])
self.assertEquals(rev2, text)
@ -94,9 +94,9 @@ class uniondatapackstoretests(unittest.TestCase):
packdir = self.makeTempDir()
revisions = [("foo", self.getFakeHash(), nullid, "content")]
self.createPack(packdir, revisions=revisions)
store = self.createPackStore(packdir, revisions=revisions)
unionstore = uniondatapackstore([datapackstore(packdir)])
unionstore = uniondatapackstore([store])
chain = unionstore.getdeltachain(revisions[0][0], revisions[0][1])
self.assertEquals(1, len(chain))
@ -111,9 +111,9 @@ class uniondatapackstoretests(unittest.TestCase):
("foo", firsthash, nullid, "content"),
("foo", self.getFakeHash(), firsthash, "content2"),
]
self.createPack(packdir, revisions=revisions)
store = self.createPackStore(packdir, revisions=revisions)
unionstore = uniondatapackstore([datapackstore(packdir)])
unionstore = uniondatapackstore([store])
chain = unionstore.getdeltachain(revisions[1][0], revisions[1][1])
self.assertEquals(2, len(chain))
@ -127,14 +127,14 @@ class uniondatapackstoretests(unittest.TestCase):
revisions1 = [
("foo", self.getFakeHash(), nullid, "content"),
]
self.createPack(packdir, revisions=revisions1)
store = self.createPackStore(packdir, revisions=revisions1)
revisions2 = [
("foo", self.getFakeHash(), revisions1[0][1], "content2"),
]
self.createPack(packdir, revisions=revisions2)
store = self.createPackStore(packdir, revisions=revisions2)
unionstore = uniondatapackstore([datapackstore(packdir)])
unionstore = uniondatapackstore([store])
chain = unionstore.getdeltachain(revisions2[0][0], revisions2[0][1])
self.assertEquals(2, len(chain))
@ -145,20 +145,77 @@ class uniondatapackstoretests(unittest.TestCase):
packdir = self.makeTempDir()
revisions = [("foo", self.getFakeHash(), nullid, "content")]
self.createPack(packdir, revisions=revisions)
store = self.createPackStore(packdir, revisions=revisions)
unionstore = uniondatapackstore([datapackstore(packdir)])
unionstore = uniondatapackstore([store])
missinghash1 = self.getFakeHash()
missinghash2 = self.getFakeHash()
missing = unionstore.getmissing([
(revisions[0][0], revisions[0][1]),
("foo", missinghash1),
("foo2", missinghash2),
("foo2", missinghash2)
])
self.assertEquals(2, len(missing))
self.assertEquals(set([("foo", missinghash1), ("foo2", missinghash2)]),
set(missing))
class uniondatastorepythontests(uniondatapackstoretests):
def createPackStore(self, packdir, revisions=None):
if revisions is None:
revisions = [("filename", self.getFakeHash(), nullid, "content")]
packer = mutabledatapack(mercurial.ui.ui(), packdir)
for filename, node, base, content in revisions:
packer.add(filename, node, base, content)
path = packer.close()
return datapack(path)
def testGetDeltaChainMultiPack(self):
"""Test getting chains from multiple packs."""
packdir = self.makeTempDir()
revisions1 = [
("foo", self.getFakeHash(), nullid, "content"),
]
pack1 = self.createPackStore(packdir, revisions=revisions1)
revisions2 = [
("foo", self.getFakeHash(), revisions1[0][1], "content2"),
]
pack2 = self.createPackStore(packdir, revisions=revisions2)
unionstore = uniondatapackstore([pack1, pack2])
chain = unionstore.getdeltachain(revisions2[0][0], revisions2[0][1])
self.assertEquals(2, len(chain))
self.assertEquals("content2", chain[0][4])
self.assertEquals("content", chain[1][4])
def testGetDeltaChainMultiPackPyAndC(self):
"""Test getting chains from multiple packs."""
packdir1 = self.makeTempDir()
packdir2 = self.makeTempDir()
revisions1 = [
("foo", self.getFakeHash(), nullid, "content"),
]
store = super(uniondatastorepythontests, self)\
.createPackStore(packdir1, revisions=revisions1)
revisions2 = [
("foo", self.getFakeHash(), revisions1[0][1], "content2"),
]
pack = self.createPackStore(packdir2, revisions=revisions2)
unionstore = uniondatapackstore([pack, store])
chain = unionstore.getdeltachain(revisions2[0][0], revisions2[0][1])
self.assertEquals(2, len(chain))
self.assertEquals("content2", chain[0][4])
self.assertEquals("content", chain[1][4])
if __name__ == '__main__':
silenttestrunner.main(__name__)

View File

@ -74,6 +74,8 @@ New errors are not allowed. Warnings are strongly discouraged.
Skipping cstore/py-datapackstore.h it has no-che?k-code (glob)
Skipping cstore/py-structs.h it has no-che?k-code (glob)
Skipping cstore/py-treemanifest.h it has no-che?k-code (glob)
Skipping cstore/pythondatastore.cpp it has no-che?k-code (glob)
Skipping cstore/pythondatastore.h it has no-che?k-code (glob)
Skipping cstore/pythonkeyiterator.h it has no-che?k-code (glob)
Skipping cstore/pythonutil.cpp it has no-che?k-code (glob)
Skipping cstore/pythonutil.h it has no-che?k-code (glob)