mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
Fixed bug concerning the handling of the oov penalty with IRSTLM
Now, the penalty for out-of-vocabulary words is specified by the parameter -lmodel-dub: dictionary upper bounds of language models For instance, if you set it lmodel-dub to 1000000 (1M) and your actual vocabulary is let me say 200000 (200K), then the LM probabilty of the OOV word-class is divided by 800000 (800K), i.e. 1M-200K You have to make sure that lmodel-dub is always larger than the LM dictionary. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1870 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
7a2ebedc20
commit
9e61900fad
@ -118,7 +118,7 @@ bool LanguageModelIRST::Load(const std::string &filePath,
|
||||
m_lmtb->init_statecache();
|
||||
m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);
|
||||
|
||||
m_lmtb->set_dictionary_upperbound(m_lmtb_dub);
|
||||
m_lmtb->setlogOOVpenalty(m_lmtb_dub);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -202,6 +202,14 @@ float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, Stat
|
||||
}
|
||||
|
||||
float prob = m_lmtb->clprob(*m_lmtb_ng);
|
||||
|
||||
//apply OOV penalty if the n-gram starts with an OOV word
|
||||
//in a following version this will be integrated into the
|
||||
//irstlm library
|
||||
|
||||
if (*m_lmtb_ng->wordp(1) == m_lmtb->dict->oovcode())
|
||||
prob-=m_lmtb->getlogOOVpenalty();
|
||||
|
||||
return TransformIRSTScore(prob);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user