mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
Support for POS backoff in factored models. Spaghetti code needs refactoring
This commit is contained in:
parent
6df221966d
commit
dd43c287f6
@ -47,7 +47,7 @@ void BilingualLM::Load(){
|
||||
|
||||
//Cache for NeuralLMids
|
||||
int BilingualLM::getNeuralLMId(const Word& word) const{
|
||||
const Factor* factor = word.GetFactor(0); //Parameter here is m_factorType, hard coded to 0
|
||||
const Factor* factor = word.GetFactor(word_factortype);
|
||||
|
||||
std::map<const Factor *, int>::iterator it;
|
||||
|
||||
@ -55,7 +55,32 @@ int BilingualLM::getNeuralLMId(const Word& word) const{
|
||||
it = neuralLMids.find(factor);
|
||||
|
||||
if (it != neuralLMids.end()) {
|
||||
return it->second; //Lock is released here automatically
|
||||
if (!factored){
|
||||
return it->second; //Lock is released here automatically
|
||||
} else {
|
||||
//See if word is unknown
|
||||
if (it->second == unknown_word_id){
|
||||
const Factor* pos_factor = word.GetFactor(pos_factortype); //Get POS tag
|
||||
//Look up the POS tag in the cache
|
||||
it = neuralLMids.find(pos_factor);
|
||||
if (it != neuralLMids.end()){
|
||||
return it->second; //We have our pos tag in the cache.
|
||||
} else {
|
||||
//We have to lookup the word
|
||||
const std::string string = pos_factor->GetString().as_string();
|
||||
int neuralLM_wordID = m_neuralLM->lookup_word(string);
|
||||
|
||||
boost::upgrade_to_unique_lock< boost::shared_mutex > uniqueLock(read_lock);
|
||||
neuralLMids.insert(std::pair<const Factor *, int>(pos_factor, neuralLM_wordID));
|
||||
|
||||
return neuralLM_wordID; //We return the ID of the pos TAG
|
||||
}
|
||||
} else {
|
||||
return it->second; //We return the neuralLMid of the word
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
//We have to lookup the word
|
||||
const std::string string = factor->GetString().as_string();
|
||||
@ -64,7 +89,18 @@ int BilingualLM::getNeuralLMId(const Word& word) const{
|
||||
boost::upgrade_to_unique_lock< boost::shared_mutex > uniqueLock(read_lock);
|
||||
neuralLMids.insert(std::pair<const Factor *, int>(factor, neuralLM_wordID));
|
||||
|
||||
return neuralLM_wordID; //Lock is released here
|
||||
if (!factored) {
|
||||
return neuralLM_wordID; //Lock is released here
|
||||
} else {
|
||||
if (neuralLM_wordID == unknown_word_id){
|
||||
const Factor* pos_factor = word.GetFactor(pos_factortype);
|
||||
const std::string factorstring = pos_factor->GetString().as_string();
|
||||
neuralLM_wordID = m_neuralLM->lookup_word(factorstring);
|
||||
neuralLMids.insert(std::pair<const Factor *, int>(pos_factor, neuralLM_wordID));
|
||||
}
|
||||
return neuralLM_wordID; //If a POS tag is needed, neuralLM_wordID is going to be updated.
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -621,6 +657,19 @@ void BilingualLM::SetParameter(const std::string& key, const std::string& value)
|
||||
std::cerr << "UNRECOGNIZED OPTION FOR PARAMETER premultiply. Got " << value << " , expected true or false!" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else if (key == "factored") {
|
||||
std::string truestr = "true";
|
||||
std::string falsestr = "false";
|
||||
if (value == truestr) {
|
||||
factored = true;
|
||||
} else if (value == falsestr) {
|
||||
factored = false;
|
||||
} else {
|
||||
std::cerr << "UNRECOGNIZED OPTION FOR PARAMETER factored. Got " << value << " , expected true or false!" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else if (key == "pos_factor") {
|
||||
pos_factortype = (size_t)atoi(value.c_str());
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
|
@ -64,14 +64,20 @@ protected:
|
||||
int target_ngrams;
|
||||
int source_ngrams;
|
||||
bool premultiply = true;
|
||||
bool factored = false;
|
||||
int neuralLM_cache = 1000000;
|
||||
int unknown_word_id;
|
||||
|
||||
//NeuralLM lookup
|
||||
FactorType word_factortype = 0;
|
||||
FactorType pos_factortype;
|
||||
const Factor* BOS_factor;
|
||||
const Factor* EOS_factor;
|
||||
mutable Word BOS_word_actual;
|
||||
mutable Word EOS_word_actual;
|
||||
const Word& BOS_word = BOS_word_actual;
|
||||
const Word& EOS_word = EOS_word_actual;
|
||||
|
||||
// thread-specific nplm for thread-safety
|
||||
mutable boost::thread_specific_ptr<nplm::neuralLM> m_neuralLM;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user