mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Handling dictionary upperbound for IRTS LMs through parameter -lmodel-dub
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1592 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
43ae535165
commit
a48be8f280
@ -45,7 +45,8 @@ namespace LanguageModelFactory
|
||||
, size_t nGramOrder
|
||||
, const std::string &languageModelFile
|
||||
, float weight
|
||||
, ScoreIndexManager &scoreIndexManager)
|
||||
, ScoreIndexManager &scoreIndexManager
|
||||
, int dub)
|
||||
{
|
||||
LanguageModel *lm = NULL;
|
||||
switch (lmImplementation)
|
||||
@ -59,7 +60,7 @@ namespace LanguageModelFactory
|
||||
break;
|
||||
case IRST:
|
||||
#ifdef LM_IRST
|
||||
lm = new LanguageModelIRST(true, scoreIndexManager);
|
||||
lm = new LanguageModelIRST(true, scoreIndexManager, dub);
|
||||
#endif
|
||||
break;
|
||||
case Skip:
|
||||
|
@ -21,7 +21,8 @@ namespace LanguageModelFactory {
|
||||
, size_t nGramOrder
|
||||
, const std::string &languageModelFile
|
||||
, float weight
|
||||
, ScoreIndexManager &scoreIndexManager);
|
||||
, ScoreIndexManager &scoreIndexManager
|
||||
, int dub);
|
||||
|
||||
};
|
||||
|
||||
|
@ -39,9 +39,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
using namespace std;
|
||||
|
||||
LanguageModelIRST::LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager)
|
||||
LanguageModelIRST::LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub)
|
||||
:LanguageModelSingleFactor(registerScore, scoreIndexManager)
|
||||
,m_lmtb(0)
|
||||
,m_lmtb(0),m_lmtb_dub(dub)
|
||||
{
|
||||
}
|
||||
|
||||
@ -118,6 +118,8 @@ bool LanguageModelIRST::Load(const std::string &filePath,
|
||||
m_lmtb->init_statecache();
|
||||
m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);
|
||||
|
||||
m_lmtb->set_dictionary_upperbound(m_lmtb_dub);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -49,6 +49,7 @@ protected:
|
||||
int m_lmtb_sentenceStart; //lmtb symbols to initialize ngram with
|
||||
int m_lmtb_sentenceEnd; //lmt symbol to initialize ngram with
|
||||
int m_lmtb_size; //max ngram stored in the table
|
||||
int m_lmtb_dub; //dictionary upperboud
|
||||
|
||||
std::string m_mapFilePath;
|
||||
|
||||
@ -63,7 +64,7 @@ protected:
|
||||
};
|
||||
|
||||
public:
|
||||
LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager);
|
||||
LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub);
|
||||
~LanguageModelIRST();
|
||||
bool Load(const std::string &filePath
|
||||
, FactorType factorType
|
||||
@ -74,4 +75,8 @@ public:
|
||||
|
||||
void CleanUpAfterSentenceProcessing();
|
||||
void InitializeBeforeSentenceProcessing();
|
||||
|
||||
void set_dictionary_upperbound(int dub){ m_lmtb_size=dub ;
|
||||
//m_lmtb->set_dictionary_upperbound(dub);
|
||||
};
|
||||
};
|
||||
|
@ -45,6 +45,7 @@ Parameter::Parameter()
|
||||
AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
|
||||
AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");
|
||||
AddParam("lmodel-file", "location and properties of the language models");
|
||||
AddParam("lmodel-dub", "dictionary upper bounds of language models");
|
||||
AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis");
|
||||
AddParam("mapping", "description of decoding steps");
|
||||
AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
|
||||
@ -229,8 +230,22 @@ bool Parameter::Validate()
|
||||
noErrorFlag = false;
|
||||
}
|
||||
|
||||
if (m_setting["lmodel-dub"].size() > 0){
|
||||
if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()){
|
||||
stringstream errorMsg("");
|
||||
errorMsg << "Config and parameters specify "
|
||||
<< static_cast<int>(m_setting["lmodel-file"].size())
|
||||
<< " language model files (lmodel-file), but "
|
||||
<< static_cast<int>(m_setting["lmodel-dub"].size())
|
||||
<< " LM upperbounds (lmodel-dub)"
|
||||
<< endl;
|
||||
UserMessage::Add(errorMsg.str());
|
||||
noErrorFlag = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_setting["lmodel-file"].size() != m_setting["weight-l"].size())
|
||||
{
|
||||
{
|
||||
stringstream errorMsg("");
|
||||
errorMsg << "Config and parameters specify "
|
||||
<< static_cast<int>(m_setting["lmodel-file"].size())
|
||||
|
@ -506,6 +506,13 @@ bool StaticData::LoadLanguageModels()
|
||||
{
|
||||
m_allWeights.push_back(weightAll[i]);
|
||||
}
|
||||
|
||||
// dictionary upper-bounds fo all IRST LMs
|
||||
vector<int> LMdub = Scan<int>(m_parameter->GetParam("lmodel-dub"));
|
||||
if (m_parameter->GetParam("lmodel-dub").size() == 0){
|
||||
for(size_t i=0; i<m_parameter->GetParam("lmodel-file").size(); i++)
|
||||
LMdub.push_back(0);
|
||||
}
|
||||
|
||||
// initialize n-gram order for each factor. populated only by factored lm
|
||||
const vector<string> &lmVector = m_parameter->GetParam("lmodel-file");
|
||||
@ -528,14 +535,15 @@ bool StaticData::LoadLanguageModels()
|
||||
size_t nGramOrder = Scan<int>(token[2]);
|
||||
|
||||
string &languageModelFile = token[3];
|
||||
if (token.size() == 5)
|
||||
if (token.size() == 5){
|
||||
if (lmImplementation==IRST)
|
||||
languageModelFile += " " + token[4];
|
||||
else {
|
||||
UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
|
||||
return false;
|
||||
}
|
||||
IFVERBOSE(1)
|
||||
}
|
||||
IFVERBOSE(1)
|
||||
PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
|
||||
|
||||
LanguageModel *lm = LanguageModelFactory::CreateLanguageModel(
|
||||
@ -544,7 +552,8 @@ bool StaticData::LoadLanguageModels()
|
||||
, nGramOrder
|
||||
, languageModelFile
|
||||
, weightAll[i]
|
||||
, m_scoreIndexManager);
|
||||
, m_scoreIndexManager
|
||||
, LMdub[i]);
|
||||
if (lm == NULL)
|
||||
{
|
||||
UserMessage::Add("no LM created. We probably don't have it compiled");
|
||||
|
Loading…
Reference in New Issue
Block a user