mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
Rework lookup and greatly speedup decoding (2x+)
This commit is contained in:
parent
cf3fe60cf6
commit
18a6d12cb0
@ -4,8 +4,6 @@
|
||||
#include "moses/FF/StatefulFeatureFunction.h"
|
||||
#include "moses/FF/FFState.h"
|
||||
#include <boost/thread/tss.hpp>
|
||||
#include <boost/thread/locks.hpp>
|
||||
#include <boost/thread/shared_mutex.hpp>
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "moses/InputPath.h"
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "BiLM_NPLM.h"
|
||||
#include "neuralLM.h"
|
||||
#include "vocabulary.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
@ -23,70 +24,32 @@ float BilingualLM_NPLM::Score(std::vector<int>& source_words, std::vector<int>&
|
||||
return m_neuralLM->lookup_ngram(source_words);
|
||||
}
|
||||
|
||||
int BilingualLM_NPLM::LookUpNeuralLMWord(const std::string& str) const {
|
||||
return m_neuralLM->lookup_word(str);
|
||||
}
|
||||
|
||||
const Word& BilingualLM_NPLM::getNullWord() const {
|
||||
return NULL_word;
|
||||
}
|
||||
|
||||
//Cache for NeuralLMids
|
||||
int BilingualLM_NPLM::getNeuralLMId(
|
||||
const Word& word, bool is_source_word) const{
|
||||
int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const {
|
||||
initSharedPointer();
|
||||
|
||||
boost::unordered_map<const Factor*, int>::iterator it;
|
||||
const Factor* factor = word.GetFactor(word_factortype);
|
||||
|
||||
std::map<const Factor *, int>::iterator it;
|
||||
|
||||
boost::upgrade_lock< boost::shared_mutex > read_lock(neuralLMids_lock);
|
||||
it = neuralLMids.find(factor);
|
||||
|
||||
if (it != neuralLMids.end()) {
|
||||
if (!factored){
|
||||
return it->second; //Lock is released here automatically
|
||||
} else {
|
||||
//See if word is unknown
|
||||
if (it->second == unknown_word_id){
|
||||
const Factor* pos_factor = word.GetFactor(pos_factortype); //Get POS tag
|
||||
//Look up the POS tag in the cache
|
||||
it = neuralLMids.find(pos_factor);
|
||||
if (it != neuralLMids.end()){
|
||||
return it->second; //We have our pos tag in the cache.
|
||||
} else {
|
||||
//We have to lookup the pos_tag
|
||||
const std::string posstring = pos_factor->GetString().as_string();
|
||||
int neuralLM_wordID = LookUpNeuralLMWord(posstring);
|
||||
|
||||
boost::upgrade_to_unique_lock< boost::shared_mutex > uniqueLock(read_lock);
|
||||
neuralLMids.insert(std::pair<const Factor *, int>(pos_factor, neuralLM_wordID));
|
||||
|
||||
return neuralLM_wordID; //We return the ID of the pos TAG
|
||||
}
|
||||
} else {
|
||||
return it->second; //We return the neuralLMid of the word
|
||||
}
|
||||
}
|
||||
//If we know the word return immediately
|
||||
if (it != neuralLMids.end()){
|
||||
return it->second;
|
||||
}
|
||||
//If we don't know the word and we aren't factored, return the word.
|
||||
if (!factored) {
|
||||
return unknown_word_id;
|
||||
}
|
||||
//Else try to get a pos_factor
|
||||
const Factor* pos_factor = word.GetFactor(pos_factortype);
|
||||
it = neuralLMids.find(pos_factor);
|
||||
if (it != neuralLMids.end()){
|
||||
return it->second;
|
||||
} else {
|
||||
//We have to lookup the word
|
||||
const std::string string = factor->GetString().as_string();
|
||||
int neuralLM_wordID = LookUpNeuralLMWord(string);
|
||||
|
||||
boost::upgrade_to_unique_lock< boost::shared_mutex > uniqueLock(read_lock);
|
||||
neuralLMids.insert(std::pair<const Factor *, int>(factor, neuralLM_wordID));
|
||||
|
||||
if (!factored) {
|
||||
return neuralLM_wordID; //Lock is released here
|
||||
} else {
|
||||
if (neuralLM_wordID == unknown_word_id){
|
||||
const Factor* pos_factor = word.GetFactor(pos_factortype);
|
||||
const std::string factorstring = pos_factor->GetString().as_string();
|
||||
neuralLM_wordID = LookUpNeuralLMWord(string);
|
||||
neuralLMids.insert(std::pair<const Factor *, int>(pos_factor, neuralLM_wordID));
|
||||
}
|
||||
return neuralLM_wordID; //If a POS tag is needed, neuralLM_wordID is going to be updated.
|
||||
}
|
||||
return unknown_word_id;
|
||||
}
|
||||
}
|
||||
|
||||
@ -128,6 +91,23 @@ void BilingualLM_NPLM::loadModel() {
|
||||
|
||||
m_neuralLM_shared->set_cache(neuralLM_cache); //Default 1000000
|
||||
unknown_word_id = m_neuralLM_shared->lookup_word("<unk>");
|
||||
|
||||
//Setup factor -> NeuralLMId cache
|
||||
FactorCollection& factorFactory = FactorCollection::Instance(); //To do the conversion from string to vocabID
|
||||
|
||||
const nplm::vocabulary& vocab = m_neuralLM_shared->get_vocabulary();
|
||||
const boost::unordered_map<std::string, int>& neuraLMvocabmap = vocab.get_idmap();
|
||||
|
||||
boost::unordered_map<std::string, int>::const_iterator it;
|
||||
|
||||
for (it = neuraLMvocabmap.cbegin(); it != neuraLMvocabmap.cend(); it++) {
|
||||
std::string raw_word = it->first;
|
||||
int neuralLMid = it->second;
|
||||
const Factor * factor = factorFactory.AddFactor(raw_word);
|
||||
|
||||
neuralLMids.insert(std::make_pair(factor, neuralLMid));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -1,4 +1,6 @@
|
||||
#include "moses/LM/BilingualLM.h"
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <utility> //make_pair
|
||||
|
||||
namespace nplm {
|
||||
class neuralLM;
|
||||
@ -15,8 +17,6 @@ class BilingualLM_NPLM : public BilingualLM {
|
||||
|
||||
int getNeuralLMId(const Word& word, bool is_source_word) const;
|
||||
|
||||
int LookUpNeuralLMWord(const std::string& str) const;
|
||||
|
||||
void initSharedPointer() const;
|
||||
|
||||
void loadModel();
|
||||
@ -28,8 +28,7 @@ class BilingualLM_NPLM : public BilingualLM {
|
||||
nplm::neuralLM *m_neuralLM_shared;
|
||||
mutable boost::thread_specific_ptr<nplm::neuralLM> m_neuralLM;
|
||||
|
||||
mutable std::map<const Factor*, int> neuralLMids;
|
||||
mutable boost::shared_mutex neuralLMids_lock;
|
||||
mutable boost::unordered_map<const Factor*, int> neuralLMids;
|
||||
|
||||
//const Factor* NULL_factor_overwrite;
|
||||
std::string NULL_string;
|
||||
|
Loading…
Reference in New Issue
Block a user