Add vocab mapping back to DALM

This commit is contained in:
Hieu Hoang 2013-11-18 13:54:40 +00:00
parent b3ba081ec9
commit 1accc75d14
2 changed files with 38 additions and 4 deletions

View File

@ -2,10 +2,11 @@
#include <boost/functional/hash.hpp>
#include "DALM.h"
#include "logger.h"
//#include "DALM/include/lm.h"
#include "dalm.h"
#include "vocabulary.h"
#include "moses/FactorCollection.h"
#include "moses/InputFileStream.h"
#include "util/exception.hh"
using namespace std;
@ -78,6 +79,10 @@ void LanguageModelDALM::Load()
string wordstxt; //Path to the vocabulary file in text format.
read_ini(m_filePath.c_str(), model, words, wordstxt);
UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(),
util::FileOpenException,
"Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist");
////////////////
// LOADING LM //
////////////////
@ -94,6 +99,10 @@ void LanguageModelDALM::Load()
wid_start = m_vocab->lookup(BOS_);
wid_end = m_vocab->lookup(EOS_);
// vocab mapping
CreateVocabMapping(wordstxt);
}
LMResult LanguageModelDALM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
@ -126,11 +135,32 @@ LMResult LanguageModelDALM::GetValue(const vector<const Word*> &contextFactor, S
return ret;
}
void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt)
{
InputFileStream vocabStrm(wordstxt);
string line;
while(getline(vocabStrm, line)) {
const Factor *factor = FactorCollection::Instance().AddFactor(line);
DALM::VocabId wid = m_vocab->lookup(line.c_str());
VocabMap::value_type entry(factor, wid);
m_vocabMap.insert(entry);
}
}
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const
{
StringPiece str = factor->GetString();
DALM::VocabId wid = m_vocab->lookup(str.as_string().c_str());
return wid;
VocabMap::left_map::const_iterator iter;
iter = m_vocabMap.left.find(factor);
if (iter != m_vocabMap.left.end()) {
return iter->second;
}
else {
// not in mapping. Must be UNK
return DALM_UNK_WORD;
}
}
}

View File

@ -28,6 +28,10 @@ protected:
DALM::VocabId wid_start, wid_end;
typedef boost::bimap<const Factor *, DALM::VocabId> VocabMap;
mutable VocabMap m_vocabMap;
void CreateVocabMapping(const std::string &wordstxt);
DALM::VocabId GetVocabId(const Factor *factor) const;
public: