added suffix array phrase table updater service to moses server

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3232 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
leven101 2010-05-07 09:50:19 +00:00
parent 034528988d
commit e1ea1e8301
7 changed files with 100 additions and 17 deletions

View File

@ -25,6 +25,13 @@ namespace Moses {
// - currently Input/Output variable is not stored in class Word, but in the future???
return word;
}
wordID_t Vocab::GetWordID(const std::string& word_str) {
FactorList factors;
factors.push_back(0);
Word word;
word.CreateFromString(Input, factors, word_str, true);
return GetWordID(word);
}
// get wordID_t index for word represented as string
wordID_t Vocab::GetWordID(const std::string& word_str,

View File

@ -53,6 +53,7 @@ class Vocab {
wordID_t GetWordID(const std::string& word, const FactorDirection& direction,
const FactorList& factors, bool isNonTerminal);
wordID_t GetWordID(const Word& word);
wordID_t GetWordID(const string& word);
Word& GetWord(wordID_t id);
inline const wordID_t GetkOOVWordID() { return m_kOOVWordID; }
inline const wordID_t GetBOSWordID() { return m_kBOSWordID; }

View File

@ -70,8 +70,8 @@ int DynSuffixArray::F_firstIdx(unsigned word) {
else return -1;
}
/* uses rank() and c() to obtain the LF function */
int DynSuffixArray::LF(unsigned L_idx) {
/* uses rank() and c() to obtain the LastFirstFunc function */
int DynSuffixArray::LastFirstFunc(unsigned L_idx) {
int fIdx(-1);
unsigned word = m_L->at(L_idx);
if((fIdx = F_firstIdx(word)) != -1)
@ -88,12 +88,12 @@ void DynSuffixArray::InsertFactor(vuint_t* newSent, unsigned newIndex) {
assert(newIndex <= m_SA->size());
int k(-1), kprime(-1);
k = (newIndex < m_SA->size() ? m_ISA->at(newIndex) : m_ISA->at(0)); // k is now index of the cycle that starts at newindex
int true_pos = LF(k); // track cycle shift (newIndex - 1)
int true_pos = LastFirstFunc(k); // track cycle shift (newIndex - 1)
int Ltmp = m_L->at(k);
m_L->at(k) = (*newSent)[newSent->size()-1]; // cycle k now ends with correct word
for(int j = newSent->size()-1; j > -1; --j) {
kprime = LF(k); // find cycle that starts with (newindex - 1)
kprime = LastFirstFunc(k); // find cycle that starts with (newindex - 1)
//kprime += ((m_L[k] == Ltmp) && (k > isa[k]) ? 1 : 0); // yada yada
// only terminal char can be 0 so add new vocab at end
kprime = (kprime > 0 ? kprime : m_SA->size());
@ -114,7 +114,7 @@ void DynSuffixArray::InsertFactor(vuint_t* newSent, unsigned newIndex) {
k = kprime;
}
// Begin stage 4
Reorder(true_pos, LF(kprime)); // actual position vs computed position of cycle (newIndex-1)
Reorder(true_pos, LastFirstFunc(kprime)); // actual position vs computed position of cycle (newIndex-1)
}
void DynSuffixArray::Reorder(unsigned j, unsigned jprime) {
@ -122,7 +122,7 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime) {
while(j != jprime) {
printf("j=%d\tj'=%d\n", j, jprime);
int tmp, isaIdx(-1);
int new_j = LF(j);
int new_j = LastFirstFunc(j);
// for SA, L, and F, the element at pos j is moved to j'
tmp = m_L->at(j); // L
m_L->at(j) = m_L->at(jprime);
@ -141,13 +141,13 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime) {
//isa[isaIdx] = jprime;
m_ISA->at(isaIdx) = jprime;
j = new_j;
jprime = LF(jprime);
jprime = LastFirstFunc(jprime);
}
}
void DynSuffixArray::DeleteFactor(unsigned index, unsigned num2del) {
int ltmp = m_L->at(m_ISA->at(index));
int true_pos = LF(m_ISA->at(index)); // track cycle shift (newIndex - 1)
int true_pos = LastFirstFunc(m_ISA->at(index)); // track cycle shift (newIndex - 1)
for(size_t q = 0; q < num2del; ++q) {
int row = m_ISA->at(index); // gives the position of index in SA and m_F
std::cerr << "row = " << row << std::endl;
@ -165,7 +165,7 @@ void DynSuffixArray::DeleteFactor(unsigned index, unsigned num2del) {
if(*itr > index) --(*itr);
}
m_L->at(m_ISA->at(index))= ltmp;
Reorder(LF(m_ISA->at(index)), true_pos);
Reorder(LastFirstFunc(m_ISA->at(index)), true_pos);
PrintAuxArrays();
}

View File

@ -22,6 +22,9 @@ public:
bool GetCorpusIndex(const vuint_t*, vuint_t*);
void Load(FILE*);
void Save(FILE*);
void InsertFactor(vuint_t*, unsigned);
void DeleteFactor(unsigned, unsigned);
void SubstituteFactor(vuint_t*, unsigned);
private:
vuint_t* m_SA;
@ -33,10 +36,7 @@ private:
void Qsort(int* array, int begin, int end);
int Compare(int, int, int);
void Reorder(unsigned, unsigned);
void InsertFactor(vuint_t*, unsigned);
void DeleteFactor(unsigned, unsigned);
void SubstituteFactor(vuint_t*, unsigned);
int LF(unsigned);
int LastFirstFunc(unsigned);
int Rank(unsigned, unsigned);
int F_firstIdx(unsigned);
void PrintAuxArrays() {

View File

@ -74,5 +74,11 @@ const ChartRuleCollection *PhraseDictionaryDynSuffixArray::GetChartRuleCollectio
assert(false);
return NULL;
}
void PhraseDictionaryDynSuffixArray::insertSnt(string& source, string& target, string& alignment) {
m_biSA->addSntPair(source, target, alignment);
}
void PhraseDictionaryDynSuffixArray::deleteSnt(unsigned idx, unsigned num2Del) {
// need to implement --
}
}// end namepsace

View File

@ -27,6 +27,8 @@ public:
void InitializeForInput(const InputType& i);
void AddEquivPhrase(const Phrase &, const TargetPhrase &){}
void CleanUp();
void insertSnt(string&, string&, string&);
void deleteSnt(unsigned, unsigned);
private:
BilingualDynSuffixArray *m_biSA;
std::vector<float> m_weight;

View File

@ -9,11 +9,63 @@
#include "Hypothesis.h"
#include "Manager.h"
#include "StaticData.h"
#include "PhraseDictionaryDynSuffixArray.h"
using namespace Moses;
using namespace std;
typedef std::map<std::string, xmlrpc_c::value> params_t;
class Updater: public xmlrpc_c::method {
public:
Updater() {
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and
// system.methodHelp RPC.
this->_signature = "S:S";
this->_help = "Updates stuff";
}
void
execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) {
const params_t params = paramList.getStruct(0);
breakOutParams(params);
const StaticData &staticData = StaticData::Instance();
InputType* dummy=0;
PhraseDictionaryFeature* pdf = staticData.GetPhraseDictionaries()[0];
PhraseDictionaryDynSuffixArray* pdsa = (PhraseDictionaryDynSuffixArray*) pdf->GetDictionary(*dummy);
cerr << "Inserting into address " << pdsa << endl;
pdsa->insertSnt(source_, target_, alignment_);
cerr << "Done inserting\n";
//PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
map<string, xmlrpc_c::value> retData;
//*retvalP = xmlrpc_c::value_struct(retData);
pdf = 0;
pdsa = 0;
*retvalP = xmlrpc_c::value_string("Phrase table updated");
}
string source_, target_, alignment_;
bool bounded_;
void breakOutParams(const params_t& params) {
params_t::const_iterator si = params.find("source");
if(si == params.end())
throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE);
source_ = xmlrpc_c::value_string(si->second);
cerr << "source = " << source_ << endl;
si = params.find("target");
if(si == params.end())
throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE);
target_ = xmlrpc_c::value_string(si->second);
cerr << "target = " << target_ << endl;
si = params.find("alignment");
if(si == params.end())
throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
alignment_ = xmlrpc_c::value_string(si->second);
cerr << "alignment = " << alignment_ << endl;
si = params.find("bounded");
bounded_ = (si != params.end());
}
};
class Translator : public xmlrpc_c::method {
public:
@ -25,8 +77,6 @@ public:
this->_help = "Does translation";
}
typedef std::map<std::string, xmlrpc_c::value> params_t;
void
execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) {
@ -47,6 +97,10 @@ public:
bool addAlignInfo = (si != params.end());
const StaticData &staticData = StaticData::Instance();
bool addGraphInfo = staticData.GetOutputSearchGraph();
cerr << "addGraphInfo: " << addGraphInfo << endl;
Sentence sentence(Input);
const vector<FactorType> &inputFactorOrder =
staticData.GetInputFactorOrder();
@ -57,7 +111,7 @@ public:
const Hypothesis* hypo = manager.GetBestHypothesis();
vector<xmlrpc_c::value> alignInfo;
stringstream out;
stringstream out, graphInfo, transCollOpts;
outputHypo(out,hypo,addAlignInfo,alignInfo);
map<string, xmlrpc_c::value> retData;
@ -69,6 +123,17 @@ public:
}
retData.insert(text);
if(addGraphInfo) {
vector<SearchGraphNode> searchGraph;
/*manager.GetSearchGraph(0, graphInfo);
manager.getSntTranslationOptions(transCollOpts);
pair<string, xmlrpc_c::value>
graphData("graph", xmlrpc_c::value_string(graphInfo.str()));
retData.insert(graphData);
pair<string, xmlrpc_c::value>
transOpts("transCollOpts", xmlrpc_c::value_string(transCollOpts.str()));
retData.insert(transOpts);*/
}
*retvalP = xmlrpc_c::value_struct(retData);
}
@ -150,8 +215,10 @@ int main(int argc, char** argv) {
xmlrpc_c::registry myRegistry;
xmlrpc_c::methodPtr const translator(new Translator);
xmlrpc_c::methodPtr const updater(new Updater);
myRegistry.addMethod("translate", translator);
myRegistry.addMethod("updater", updater);
xmlrpc_c::serverAbyss myAbyssServer(
myRegistry,