mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-10 00:47:31 +03:00
Add vocab mapping back to DALM
This commit is contained in:
parent
b3ba081ec9
commit
1accc75d14
@ -2,10 +2,11 @@
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include "DALM.h"
|
||||
#include "logger.h"
|
||||
//#include "DALM/include/lm.h"
|
||||
#include "dalm.h"
|
||||
#include "vocabulary.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -78,6 +79,10 @@ void LanguageModelDALM::Load()
|
||||
string wordstxt; //Path to the vocabulary file in text format.
|
||||
read_ini(m_filePath.c_str(), model, words, wordstxt);
|
||||
|
||||
UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
|
||||
util::FileOpenException,
|
||||
"Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
|
||||
|
||||
////////////////
|
||||
// LOADING LM //
|
||||
////////////////
|
||||
@ -94,6 +99,10 @@ void LanguageModelDALM::Load()
|
||||
|
||||
wid_start = m_vocab->lookup(BOS_);
|
||||
wid_end = m_vocab->lookup(EOS_);
|
||||
|
||||
// vocab mapping
|
||||
CreateVocabMapping(wordstxt);
|
||||
|
||||
}
|
||||
|
||||
LMResult LanguageModelDALM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
|
||||
@ -126,11 +135,32 @@ LMResult LanguageModelDALM::GetValue(const vector<const Word*> &contextFactor, S
|
||||
return ret;
|
||||
}
|
||||
|
||||
void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt)
|
||||
{
|
||||
InputFileStream vocabStrm(wordstxt);
|
||||
|
||||
string line;
|
||||
while(getline(vocabStrm, line)) {
|
||||
const Factor *factor = FactorCollection::Instance().AddFactor(line);
|
||||
DALM::VocabId wid = m_vocab->lookup(line.c_str());
|
||||
|
||||
VocabMap::value_type entry(factor, wid);
|
||||
m_vocabMap.insert(entry);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const
|
||||
{
|
||||
StringPiece str = factor->GetString();
|
||||
DALM::VocabId wid = m_vocab->lookup(str.as_string().c_str());
|
||||
return wid;
|
||||
VocabMap::left_map::const_iterator iter;
|
||||
iter = m_vocabMap.left.find(factor);
|
||||
if (iter != m_vocabMap.left.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
else {
|
||||
// not in mapping. Must be UNK
|
||||
return DALM_UNK_WORD;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -28,6 +28,10 @@ protected:
|
||||
|
||||
DALM::VocabId wid_start, wid_end;
|
||||
|
||||
typedef boost::bimap<const Factor *, DALM::VocabId> VocabMap;
|
||||
mutable VocabMap m_vocabMap;
|
||||
|
||||
void CreateVocabMapping(const std::string &wordstxt);
|
||||
DALM::VocabId GetVocabId(const Factor *factor) const;
|
||||
|
||||
public:
|
||||
|
Loading…
Reference in New Issue
Block a user