Renaming python PhaseTable to BinaryPhaseTable + better docs

This commit is contained in:
Wilker Aziz 2012-09-12 11:05:28 +02:00
parent 85dd58fed3
commit 92295911b1
4 changed files with 68 additions and 57 deletions

View File

@ -1,15 +1,26 @@
# Moses interface for python
# Python interface to Moses
The idea is to have some of Moses' internals exposed to Python (inspired by pycdec).
---
## What's been interfaced?
* Binary phrase table:
Moses::PhraseDictionaryTree.h
---
## Building
1. Compile the cython code
cython --cplus binpt/binpt.pyx
cython --cplus binpt/binpt.pyx
2. Build the python extension
python setup.py build_ext -i
python setup.py build_ext -i
3. Check the example code
echo '! " and "' | python example.py /media/Data/data/smt/sample/bin/sample.en-es 5 1
echo "casa" | python example.py /media/Data/data/smt/fapesp/bin/fapesp.br-en 5
echo '! " and "' | python example.py bin-ptable-stem 5 1
echo "casa" | python example.py bin-ptable-stem 5

View File

@ -1,4 +1,4 @@
/* Generated by Cython 0.16 on Wed Sep 12 00:44:55 2012 */
/* Generated by Cython 0.16 on Wed Sep 12 11:03:15 2012 */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
@ -363,7 +363,7 @@ static const char *__pyx_f[] = {
/*--- Type declarations ---*/
struct __pyx_obj_5binpt_QueryResult;
struct __pyx_obj_5binpt_PhraseTable;
struct __pyx_obj_5binpt_BinaryPhraseTable;
struct __pyx_opt_args_5binpt_get_query_result;
/* "binpt.pxd":5
@ -405,11 +405,11 @@ struct __pyx_obj_5binpt_QueryResult {
/* "binpt.pyx":61
* return QueryResult(words, scores, wa)
*
* cdef class PhraseTable: # <<<<<<<<<<<<<<
* cdef class BinaryPhraseTable: # <<<<<<<<<<<<<<
* '''This class encapsulates a Moses::PhraseDictionaryTree for operations over
* binary phrase tables.'''
*/
struct __pyx_obj_5binpt_PhraseTable {
struct __pyx_obj_5binpt_BinaryPhraseTable {
PyObject_HEAD
Moses::PhraseDictionaryTree *tree;
};
@ -548,7 +548,7 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
/* Module declarations from 'binpt' */
static PyTypeObject *__pyx_ptype_5binpt_QueryResult = 0;
static PyTypeObject *__pyx_ptype_5binpt_PhraseTable = 0;
static PyTypeObject *__pyx_ptype_5binpt_BinaryPhraseTable = 0;
static PyObject *__pyx_f_5binpt_as_str(PyObject *); /*proto*/
static struct __pyx_obj_5binpt_QueryResult *__pyx_f_5binpt_get_query_result(Moses::StringTgtCand &, struct __pyx_opt_args_5binpt_get_query_result *__pyx_optional_args); /*proto*/
#define __Pyx_MODULE_NAME "binpt"
@ -564,9 +564,9 @@ static PyObject *__pyx_pf_5binpt_11QueryResult_4scores(struct __pyx_obj_5binpt_Q
static PyObject *__pyx_pf_5binpt_11QueryResult_6wa(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5binpt_11QueryResult_8__str__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5binpt_11QueryResult_10__repr__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
static int __pyx_pf_5binpt_11PhraseTable___cinit__(struct __pyx_obj_5binpt_PhraseTable *__pyx_v_self, char *__pyx_v_path, unsigned int __pyx_v_nscores, int __pyx_v_wa); /* proto */
static void __pyx_pf_5binpt_11PhraseTable_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5binpt_PhraseTable *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5binpt_11PhraseTable_4query(struct __pyx_obj_5binpt_PhraseTable *__pyx_v_self, char *__pyx_v_line); /* proto */
static int __pyx_pf_5binpt_17BinaryPhraseTable___cinit__(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, char *__pyx_v_path, unsigned int __pyx_v_nscores, int __pyx_v_wa); /* proto */
static void __pyx_pf_5binpt_17BinaryPhraseTable_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_4query(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, char *__pyx_v_line); /* proto */
static char __pyx_k_1[] = " ||| ";
static char __pyx_k_2[] = " ";
static char __pyx_k_3[] = "UTF-8";
@ -1404,7 +1404,7 @@ static struct __pyx_obj_5binpt_QueryResult *__pyx_f_5binpt_get_query_result(Mose
* cdef tuple scores = tuple([cand.second[i] for i in range(cand.second.size())])
* return QueryResult(words, scores, wa) # <<<<<<<<<<<<<<
*
* cdef class PhraseTable:
* cdef class BinaryPhraseTable:
*/
__Pyx_XDECREF(((PyObject *)__pyx_r));
__pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@ -1441,8 +1441,8 @@ static struct __pyx_obj_5binpt_QueryResult *__pyx_f_5binpt_get_query_result(Mose
}
/* Python wrapper */
static int __pyx_pw_5binpt_11PhraseTable_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static int __pyx_pw_5binpt_11PhraseTable_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
static int __pyx_pw_5binpt_17BinaryPhraseTable_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static int __pyx_pw_5binpt_17BinaryPhraseTable_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
char *__pyx_v_path;
unsigned int __pyx_v_nscores;
int __pyx_v_wa;
@ -1523,16 +1523,16 @@ static int __pyx_pw_5binpt_11PhraseTable_1__cinit__(PyObject *__pyx_v_self, PyOb
__pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("__cinit__", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
__pyx_L3_error:;
__Pyx_AddTraceback("binpt.PhraseTable.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_AddTraceback("binpt.BinaryPhraseTable.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return -1;
__pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_5binpt_11PhraseTable___cinit__(((struct __pyx_obj_5binpt_PhraseTable *)__pyx_v_self), __pyx_v_path, __pyx_v_nscores, __pyx_v_wa);
__pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable___cinit__(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self), __pyx_v_path, __pyx_v_nscores, __pyx_v_wa);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static int __pyx_pf_5binpt_11PhraseTable___cinit__(struct __pyx_obj_5binpt_PhraseTable *__pyx_v_self, char *__pyx_v_path, unsigned int __pyx_v_nscores, int __pyx_v_wa) {
static int __pyx_pf_5binpt_17BinaryPhraseTable___cinit__(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, char *__pyx_v_path, unsigned int __pyx_v_nscores, int __pyx_v_wa) {
int __pyx_r;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__cinit__", 0);
@ -1570,11 +1570,11 @@ static int __pyx_pf_5binpt_11PhraseTable___cinit__(struct __pyx_obj_5binpt_Phras
}
/* Python wrapper */
static void __pyx_pw_5binpt_11PhraseTable_3__dealloc__(PyObject *__pyx_v_self); /*proto*/
static void __pyx_pw_5binpt_11PhraseTable_3__dealloc__(PyObject *__pyx_v_self) {
static void __pyx_pw_5binpt_17BinaryPhraseTable_3__dealloc__(PyObject *__pyx_v_self); /*proto*/
static void __pyx_pw_5binpt_17BinaryPhraseTable_3__dealloc__(PyObject *__pyx_v_self) {
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0);
__pyx_pf_5binpt_11PhraseTable_2__dealloc__(((struct __pyx_obj_5binpt_PhraseTable *)__pyx_v_self));
__pyx_pf_5binpt_17BinaryPhraseTable_2__dealloc__(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self));
__Pyx_RefNannyFinishContext();
}
@ -1586,7 +1586,7 @@ static void __pyx_pw_5binpt_11PhraseTable_3__dealloc__(PyObject *__pyx_v_self) {
*
*/
static void __pyx_pf_5binpt_11PhraseTable_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5binpt_PhraseTable *__pyx_v_self) {
static void __pyx_pf_5binpt_17BinaryPhraseTable_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self) {
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__dealloc__", 0);
@ -1603,9 +1603,9 @@ static void __pyx_pf_5binpt_11PhraseTable_2__dealloc__(CYTHON_UNUSED struct __py
}
/* Python wrapper */
static PyObject *__pyx_pw_5binpt_11PhraseTable_5query(PyObject *__pyx_v_self, PyObject *__pyx_arg_line); /*proto*/
static char __pyx_doc_5binpt_11PhraseTable_4query[] = "Queries the phrase table and returns a list of matches.\n Each match is a QueryResult.";
static PyObject *__pyx_pw_5binpt_11PhraseTable_5query(PyObject *__pyx_v_self, PyObject *__pyx_arg_line) {
static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_5query(PyObject *__pyx_v_self, PyObject *__pyx_arg_line); /*proto*/
static char __pyx_doc_5binpt_17BinaryPhraseTable_4query[] = "Queries the phrase table and returns a list of matches.\n Each match is a QueryResult.";
static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_5query(PyObject *__pyx_v_self, PyObject *__pyx_arg_line) {
char *__pyx_v_line;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
@ -1615,11 +1615,11 @@ static PyObject *__pyx_pw_5binpt_11PhraseTable_5query(PyObject *__pyx_v_self, Py
}
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
__Pyx_AddTraceback("binpt.PhraseTable.query", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_AddTraceback("binpt.BinaryPhraseTable.query", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_5binpt_11PhraseTable_4query(((struct __pyx_obj_5binpt_PhraseTable *)__pyx_v_self), ((char *)__pyx_v_line));
__pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable_4query(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self), ((char *)__pyx_v_line));
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
@ -1632,7 +1632,7 @@ static PyObject *__pyx_pw_5binpt_11PhraseTable_5query(PyObject *__pyx_v_self, Py
* Each match is a QueryResult.'''
*/
static PyObject *__pyx_pf_5binpt_11PhraseTable_4query(struct __pyx_obj_5binpt_PhraseTable *__pyx_v_self, char *__pyx_v_line) {
static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_4query(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, char *__pyx_v_line) {
PyObject *__pyx_v_text = 0;
std::vector<std::string> __pyx_v_fphrase;
std::vector<Moses::StringTgtCand> *__pyx_v_rv;
@ -1812,7 +1812,7 @@ static PyObject *__pyx_pf_5binpt_11PhraseTable_4query(struct __pyx_obj_5binpt_Ph
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_7);
__Pyx_AddTraceback("binpt.PhraseTable.query", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_AddTraceback("binpt.BinaryPhraseTable.query", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XDECREF(__pyx_v_text);
@ -2035,21 +2035,21 @@ static PyTypeObject __pyx_type_5binpt_QueryResult = {
#endif
};
static PyObject *__pyx_tp_new_5binpt_PhraseTable(PyTypeObject *t, PyObject *a, PyObject *k) {
static PyObject *__pyx_tp_new_5binpt_BinaryPhraseTable(PyTypeObject *t, PyObject *a, PyObject *k) {
PyObject *o = (*t->tp_alloc)(t, 0);
if (!o) return 0;
if (__pyx_pw_5binpt_11PhraseTable_1__cinit__(o, a, k) < 0) {
if (__pyx_pw_5binpt_17BinaryPhraseTable_1__cinit__(o, a, k) < 0) {
Py_DECREF(o); o = 0;
}
return o;
}
static void __pyx_tp_dealloc_5binpt_PhraseTable(PyObject *o) {
static void __pyx_tp_dealloc_5binpt_BinaryPhraseTable(PyObject *o) {
{
PyObject *etype, *eval, *etb;
PyErr_Fetch(&etype, &eval, &etb);
++Py_REFCNT(o);
__pyx_pw_5binpt_11PhraseTable_3__dealloc__(o);
__pyx_pw_5binpt_17BinaryPhraseTable_3__dealloc__(o);
if (PyErr_Occurred()) PyErr_WriteUnraisable(o);
--Py_REFCNT(o);
PyErr_Restore(etype, eval, etb);
@ -2057,12 +2057,12 @@ static void __pyx_tp_dealloc_5binpt_PhraseTable(PyObject *o) {
(*Py_TYPE(o)->tp_free)(o);
}
static PyMethodDef __pyx_methods_5binpt_PhraseTable[] = {
{__Pyx_NAMESTR("query"), (PyCFunction)__pyx_pw_5binpt_11PhraseTable_5query, METH_O, __Pyx_DOCSTR(__pyx_doc_5binpt_11PhraseTable_4query)},
static PyMethodDef __pyx_methods_5binpt_BinaryPhraseTable[] = {
{__Pyx_NAMESTR("query"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_5query, METH_O, __Pyx_DOCSTR(__pyx_doc_5binpt_17BinaryPhraseTable_4query)},
{0, 0, 0, 0}
};
static PyNumberMethods __pyx_tp_as_number_PhraseTable = {
static PyNumberMethods __pyx_tp_as_number_BinaryPhraseTable = {
0, /*nb_add*/
0, /*nb_subtract*/
0, /*nb_multiply*/
@ -2120,7 +2120,7 @@ static PyNumberMethods __pyx_tp_as_number_PhraseTable = {
#endif
};
static PySequenceMethods __pyx_tp_as_sequence_PhraseTable = {
static PySequenceMethods __pyx_tp_as_sequence_BinaryPhraseTable = {
0, /*sq_length*/
0, /*sq_concat*/
0, /*sq_repeat*/
@ -2133,13 +2133,13 @@ static PySequenceMethods __pyx_tp_as_sequence_PhraseTable = {
0, /*sq_inplace_repeat*/
};
static PyMappingMethods __pyx_tp_as_mapping_PhraseTable = {
static PyMappingMethods __pyx_tp_as_mapping_BinaryPhraseTable = {
0, /*mp_length*/
0, /*mp_subscript*/
0, /*mp_ass_subscript*/
};
static PyBufferProcs __pyx_tp_as_buffer_PhraseTable = {
static PyBufferProcs __pyx_tp_as_buffer_BinaryPhraseTable = {
#if PY_MAJOR_VERSION < 3
0, /*bf_getreadbuffer*/
#endif
@ -2160,12 +2160,12 @@ static PyBufferProcs __pyx_tp_as_buffer_PhraseTable = {
#endif
};
static PyTypeObject __pyx_type_5binpt_PhraseTable = {
static PyTypeObject __pyx_type_5binpt_BinaryPhraseTable = {
PyVarObject_HEAD_INIT(0, 0)
__Pyx_NAMESTR("binpt.PhraseTable"), /*tp_name*/
sizeof(struct __pyx_obj_5binpt_PhraseTable), /*tp_basicsize*/
__Pyx_NAMESTR("binpt.BinaryPhraseTable"), /*tp_name*/
sizeof(struct __pyx_obj_5binpt_BinaryPhraseTable), /*tp_basicsize*/
0, /*tp_itemsize*/
__pyx_tp_dealloc_5binpt_PhraseTable, /*tp_dealloc*/
__pyx_tp_dealloc_5binpt_BinaryPhraseTable, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
@ -2175,15 +2175,15 @@ static PyTypeObject __pyx_type_5binpt_PhraseTable = {
0, /*reserved*/
#endif
0, /*tp_repr*/
&__pyx_tp_as_number_PhraseTable, /*tp_as_number*/
&__pyx_tp_as_sequence_PhraseTable, /*tp_as_sequence*/
&__pyx_tp_as_mapping_PhraseTable, /*tp_as_mapping*/
&__pyx_tp_as_number_BinaryPhraseTable, /*tp_as_number*/
&__pyx_tp_as_sequence_BinaryPhraseTable, /*tp_as_sequence*/
&__pyx_tp_as_mapping_BinaryPhraseTable, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
&__pyx_tp_as_buffer_PhraseTable, /*tp_as_buffer*/
&__pyx_tp_as_buffer_BinaryPhraseTable, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/
__Pyx_DOCSTR("This class encapsulates a Moses::PhraseDictionaryTree for operations over\n binary phrase tables."), /*tp_doc*/
0, /*tp_traverse*/
@ -2192,7 +2192,7 @@ static PyTypeObject __pyx_type_5binpt_PhraseTable = {
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
__pyx_methods_5binpt_PhraseTable, /*tp_methods*/
__pyx_methods_5binpt_BinaryPhraseTable, /*tp_methods*/
0, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
@ -2202,7 +2202,7 @@ static PyTypeObject __pyx_type_5binpt_PhraseTable = {
0, /*tp_dictoffset*/
0, /*tp_init*/
0, /*tp_alloc*/
__pyx_tp_new_5binpt_PhraseTable, /*tp_new*/
__pyx_tp_new_5binpt_BinaryPhraseTable, /*tp_new*/
0, /*tp_free*/
0, /*tp_is_gc*/
0, /*tp_bases*/
@ -2362,9 +2362,9 @@ PyMODINIT_FUNC PyInit_binpt(void)
if (PyType_Ready(&__pyx_type_5binpt_QueryResult) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_SetAttrString(__pyx_m, "QueryResult", (PyObject *)&__pyx_type_5binpt_QueryResult) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_ptype_5binpt_QueryResult = &__pyx_type_5binpt_QueryResult;
if (PyType_Ready(&__pyx_type_5binpt_PhraseTable) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_SetAttrString(__pyx_m, "PhraseTable", (PyObject *)&__pyx_type_5binpt_PhraseTable) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_ptype_5binpt_PhraseTable = &__pyx_type_5binpt_PhraseTable;
if (PyType_Ready(&__pyx_type_5binpt_BinaryPhraseTable) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_SetAttrString(__pyx_m, "BinaryPhraseTable", (PyObject *)&__pyx_type_5binpt_BinaryPhraseTable) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_ptype_5binpt_BinaryPhraseTable = &__pyx_type_5binpt_BinaryPhraseTable;
/*--- Type import code ---*/
/*--- Variable import code ---*/
/*--- Function import code ---*/

View File

@ -58,7 +58,7 @@ cdef QueryResult get_query_result(StringTgtCand& cand, wa = None):
cdef tuple scores = tuple([cand.second[i] for i in range(cand.second.size())])
return QueryResult(words, scores, wa)
cdef class PhraseTable:
cdef class BinaryPhraseTable:
'''This class encapsulates a Moses::PhraseDictionaryTree for operations over
binary phrase tables.'''

View File

@ -1,4 +1,4 @@
from binpt import PhraseTable
from binpt import BinaryPhraseTable
from binpt import QueryResult
import sys
@ -13,7 +13,7 @@ wa = len(sys.argv) == 4
print >> sys.stderr, "-ttable %s -nscores %d -alignment-info %s\n" %(pt_file, nscores, str(wa))
pt = PhraseTable(pt_file, nscores, wa)
pt = BinaryPhraseTable(pt_file, nscores, wa)
for line in sys.stdin:
f = line.strip()
matches = pt.query(f)