mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
added suffix array phrase table updater service to moses server
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3232 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
034528988d
commit
e1ea1e8301
@ -25,6 +25,13 @@ namespace Moses {
|
||||
// - currently Input/Output variable is not stored in class Word, but in the future???
|
||||
return word;
|
||||
}
|
||||
wordID_t Vocab::GetWordID(const std::string& word_str) {
|
||||
FactorList factors;
|
||||
factors.push_back(0);
|
||||
Word word;
|
||||
word.CreateFromString(Input, factors, word_str, true);
|
||||
return GetWordID(word);
|
||||
}
|
||||
|
||||
// get wordID_t index for word represented as string
|
||||
wordID_t Vocab::GetWordID(const std::string& word_str,
|
||||
|
@ -53,6 +53,7 @@ class Vocab {
|
||||
wordID_t GetWordID(const std::string& word, const FactorDirection& direction,
|
||||
const FactorList& factors, bool isNonTerminal);
|
||||
wordID_t GetWordID(const Word& word);
|
||||
wordID_t GetWordID(const string& word);
|
||||
Word& GetWord(wordID_t id);
|
||||
inline const wordID_t GetkOOVWordID() { return m_kOOVWordID; }
|
||||
inline const wordID_t GetBOSWordID() { return m_kBOSWordID; }
|
||||
|
@ -70,8 +70,8 @@ int DynSuffixArray::F_firstIdx(unsigned word) {
|
||||
else return -1;
|
||||
}
|
||||
|
||||
/* uses rank() and c() to obtain the LF function */
|
||||
int DynSuffixArray::LF(unsigned L_idx) {
|
||||
/* uses rank() and c() to obtain the LastFirstFunc function */
|
||||
int DynSuffixArray::LastFirstFunc(unsigned L_idx) {
|
||||
int fIdx(-1);
|
||||
unsigned word = m_L->at(L_idx);
|
||||
if((fIdx = F_firstIdx(word)) != -1)
|
||||
@ -88,12 +88,12 @@ void DynSuffixArray::InsertFactor(vuint_t* newSent, unsigned newIndex) {
|
||||
assert(newIndex <= m_SA->size());
|
||||
int k(-1), kprime(-1);
|
||||
k = (newIndex < m_SA->size() ? m_ISA->at(newIndex) : m_ISA->at(0)); // k is now index of the cycle that starts at newindex
|
||||
int true_pos = LF(k); // track cycle shift (newIndex - 1)
|
||||
int true_pos = LastFirstFunc(k); // track cycle shift (newIndex - 1)
|
||||
int Ltmp = m_L->at(k);
|
||||
m_L->at(k) = (*newSent)[newSent->size()-1]; // cycle k now ends with correct word
|
||||
|
||||
for(int j = newSent->size()-1; j > -1; --j) {
|
||||
kprime = LF(k); // find cycle that starts with (newindex - 1)
|
||||
kprime = LastFirstFunc(k); // find cycle that starts with (newindex - 1)
|
||||
//kprime += ((m_L[k] == Ltmp) && (k > isa[k]) ? 1 : 0); // yada yada
|
||||
// only terminal char can be 0 so add new vocab at end
|
||||
kprime = (kprime > 0 ? kprime : m_SA->size());
|
||||
@ -114,7 +114,7 @@ void DynSuffixArray::InsertFactor(vuint_t* newSent, unsigned newIndex) {
|
||||
k = kprime;
|
||||
}
|
||||
// Begin stage 4
|
||||
Reorder(true_pos, LF(kprime)); // actual position vs computed position of cycle (newIndex-1)
|
||||
Reorder(true_pos, LastFirstFunc(kprime)); // actual position vs computed position of cycle (newIndex-1)
|
||||
}
|
||||
|
||||
void DynSuffixArray::Reorder(unsigned j, unsigned jprime) {
|
||||
@ -122,7 +122,7 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime) {
|
||||
while(j != jprime) {
|
||||
printf("j=%d\tj'=%d\n", j, jprime);
|
||||
int tmp, isaIdx(-1);
|
||||
int new_j = LF(j);
|
||||
int new_j = LastFirstFunc(j);
|
||||
// for SA, L, and F, the element at pos j is moved to j'
|
||||
tmp = m_L->at(j); // L
|
||||
m_L->at(j) = m_L->at(jprime);
|
||||
@ -141,13 +141,13 @@ void DynSuffixArray::Reorder(unsigned j, unsigned jprime) {
|
||||
//isa[isaIdx] = jprime;
|
||||
m_ISA->at(isaIdx) = jprime;
|
||||
j = new_j;
|
||||
jprime = LF(jprime);
|
||||
jprime = LastFirstFunc(jprime);
|
||||
}
|
||||
}
|
||||
|
||||
void DynSuffixArray::DeleteFactor(unsigned index, unsigned num2del) {
|
||||
int ltmp = m_L->at(m_ISA->at(index));
|
||||
int true_pos = LF(m_ISA->at(index)); // track cycle shift (newIndex - 1)
|
||||
int true_pos = LastFirstFunc(m_ISA->at(index)); // track cycle shift (newIndex - 1)
|
||||
for(size_t q = 0; q < num2del; ++q) {
|
||||
int row = m_ISA->at(index); // gives the position of index in SA and m_F
|
||||
std::cerr << "row = " << row << std::endl;
|
||||
@ -165,7 +165,7 @@ void DynSuffixArray::DeleteFactor(unsigned index, unsigned num2del) {
|
||||
if(*itr > index) --(*itr);
|
||||
}
|
||||
m_L->at(m_ISA->at(index))= ltmp;
|
||||
Reorder(LF(m_ISA->at(index)), true_pos);
|
||||
Reorder(LastFirstFunc(m_ISA->at(index)), true_pos);
|
||||
PrintAuxArrays();
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,9 @@ public:
|
||||
bool GetCorpusIndex(const vuint_t*, vuint_t*);
|
||||
void Load(FILE*);
|
||||
void Save(FILE*);
|
||||
void InsertFactor(vuint_t*, unsigned);
|
||||
void DeleteFactor(unsigned, unsigned);
|
||||
void SubstituteFactor(vuint_t*, unsigned);
|
||||
|
||||
private:
|
||||
vuint_t* m_SA;
|
||||
@ -33,10 +36,7 @@ private:
|
||||
void Qsort(int* array, int begin, int end);
|
||||
int Compare(int, int, int);
|
||||
void Reorder(unsigned, unsigned);
|
||||
void InsertFactor(vuint_t*, unsigned);
|
||||
void DeleteFactor(unsigned, unsigned);
|
||||
void SubstituteFactor(vuint_t*, unsigned);
|
||||
int LF(unsigned);
|
||||
int LastFirstFunc(unsigned);
|
||||
int Rank(unsigned, unsigned);
|
||||
int F_firstIdx(unsigned);
|
||||
void PrintAuxArrays() {
|
||||
|
@ -74,5 +74,11 @@ const ChartRuleCollection *PhraseDictionaryDynSuffixArray::GetChartRuleCollectio
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
void PhraseDictionaryDynSuffixArray::insertSnt(string& source, string& target, string& alignment) {
|
||||
m_biSA->addSntPair(source, target, alignment);
|
||||
}
|
||||
void PhraseDictionaryDynSuffixArray::deleteSnt(unsigned idx, unsigned num2Del) {
|
||||
// need to implement --
|
||||
}
|
||||
|
||||
}// end namepsace
|
||||
|
@ -27,6 +27,8 @@ public:
|
||||
void InitializeForInput(const InputType& i);
|
||||
void AddEquivPhrase(const Phrase &, const TargetPhrase &){}
|
||||
void CleanUp();
|
||||
void insertSnt(string&, string&, string&);
|
||||
void deleteSnt(unsigned, unsigned);
|
||||
private:
|
||||
BilingualDynSuffixArray *m_biSA;
|
||||
std::vector<float> m_weight;
|
||||
|
@ -9,11 +9,63 @@
|
||||
#include "Hypothesis.h"
|
||||
#include "Manager.h"
|
||||
#include "StaticData.h"
|
||||
|
||||
#include "PhraseDictionaryDynSuffixArray.h"
|
||||
|
||||
using namespace Moses;
|
||||
using namespace std;
|
||||
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
|
||||
class Updater: public xmlrpc_c::method {
|
||||
public:
|
||||
Updater() {
|
||||
// signature and help strings are documentation -- the client
|
||||
// can query this information with a system.methodSignature and
|
||||
// system.methodHelp RPC.
|
||||
this->_signature = "S:S";
|
||||
this->_help = "Updates stuff";
|
||||
}
|
||||
void
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP) {
|
||||
const params_t params = paramList.getStruct(0);
|
||||
breakOutParams(params);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
InputType* dummy=0;
|
||||
PhraseDictionaryFeature* pdf = staticData.GetPhraseDictionaries()[0];
|
||||
PhraseDictionaryDynSuffixArray* pdsa = (PhraseDictionaryDynSuffixArray*) pdf->GetDictionary(*dummy);
|
||||
cerr << "Inserting into address " << pdsa << endl;
|
||||
pdsa->insertSnt(source_, target_, alignment_);
|
||||
cerr << "Done inserting\n";
|
||||
//PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
|
||||
map<string, xmlrpc_c::value> retData;
|
||||
//*retvalP = xmlrpc_c::value_struct(retData);
|
||||
pdf = 0;
|
||||
pdsa = 0;
|
||||
*retvalP = xmlrpc_c::value_string("Phrase table updated");
|
||||
}
|
||||
string source_, target_, alignment_;
|
||||
bool bounded_;
|
||||
void breakOutParams(const params_t& params) {
|
||||
params_t::const_iterator si = params.find("source");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE);
|
||||
source_ = xmlrpc_c::value_string(si->second);
|
||||
cerr << "source = " << source_ << endl;
|
||||
si = params.find("target");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE);
|
||||
target_ = xmlrpc_c::value_string(si->second);
|
||||
cerr << "target = " << target_ << endl;
|
||||
si = params.find("alignment");
|
||||
if(si == params.end())
|
||||
throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
|
||||
alignment_ = xmlrpc_c::value_string(si->second);
|
||||
cerr << "alignment = " << alignment_ << endl;
|
||||
si = params.find("bounded");
|
||||
bounded_ = (si != params.end());
|
||||
}
|
||||
};
|
||||
|
||||
class Translator : public xmlrpc_c::method {
|
||||
public:
|
||||
@ -25,8 +77,6 @@ public:
|
||||
this->_help = "Does translation";
|
||||
}
|
||||
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
|
||||
void
|
||||
execute(xmlrpc_c::paramList const& paramList,
|
||||
xmlrpc_c::value * const retvalP) {
|
||||
@ -47,6 +97,10 @@ public:
|
||||
bool addAlignInfo = (si != params.end());
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
bool addGraphInfo = staticData.GetOutputSearchGraph();
|
||||
cerr << "addGraphInfo: " << addGraphInfo << endl;
|
||||
|
||||
Sentence sentence(Input);
|
||||
const vector<FactorType> &inputFactorOrder =
|
||||
staticData.GetInputFactorOrder();
|
||||
@ -57,7 +111,7 @@ public:
|
||||
const Hypothesis* hypo = manager.GetBestHypothesis();
|
||||
|
||||
vector<xmlrpc_c::value> alignInfo;
|
||||
stringstream out;
|
||||
stringstream out, graphInfo, transCollOpts;
|
||||
outputHypo(out,hypo,addAlignInfo,alignInfo);
|
||||
|
||||
map<string, xmlrpc_c::value> retData;
|
||||
@ -69,6 +123,17 @@ public:
|
||||
}
|
||||
retData.insert(text);
|
||||
|
||||
if(addGraphInfo) {
|
||||
vector<SearchGraphNode> searchGraph;
|
||||
/*manager.GetSearchGraph(0, graphInfo);
|
||||
manager.getSntTranslationOptions(transCollOpts);
|
||||
pair<string, xmlrpc_c::value>
|
||||
graphData("graph", xmlrpc_c::value_string(graphInfo.str()));
|
||||
retData.insert(graphData);
|
||||
pair<string, xmlrpc_c::value>
|
||||
transOpts("transCollOpts", xmlrpc_c::value_string(transCollOpts.str()));
|
||||
retData.insert(transOpts);*/
|
||||
}
|
||||
*retvalP = xmlrpc_c::value_struct(retData);
|
||||
|
||||
}
|
||||
@ -150,8 +215,10 @@ int main(int argc, char** argv) {
|
||||
xmlrpc_c::registry myRegistry;
|
||||
|
||||
xmlrpc_c::methodPtr const translator(new Translator);
|
||||
xmlrpc_c::methodPtr const updater(new Updater);
|
||||
|
||||
myRegistry.addMethod("translate", translator);
|
||||
myRegistry.addMethod("updater", updater);
|
||||
|
||||
xmlrpc_c::serverAbyss myAbyssServer(
|
||||
myRegistry,
|
||||
|
Loading…
Reference in New Issue
Block a user