From 9e61900fad22c87341b4690c202c57f083eb1402 Mon Sep 17 00:00:00 2001
From: mfederico <mfederico@1f5c12ca-751b-0410-a591-d2e778427230>
Date: Mon, 4 Aug 2008 13:06:52 +0000
Subject: [PATCH] Fixed bug concerning the handling of the oov penalty with
 IRSTLM Now, the penalty for out-of-vocabulary words is specified by the
 parameter

-lmodel-dub: dictionary upper bounds of language models

For instance, if you set it lmodel-dub to 1000000 (1M) and your actual
vocabulary is let me say 200000 (200K), then the LM probabilty  of the
OOV word-class is divided by 800000 (800K), i.e. 1M-200K

You have to make sure that lmodel-dub is always larger than the LM
dictionary.



git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1870 1f5c12ca-751b-0410-a591-d2e778427230
---
 moses/src/LanguageModelIRST.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/moses/src/LanguageModelIRST.cpp b/moses/src/LanguageModelIRST.cpp
index 4631396da..d3e09705c 100644
--- a/moses/src/LanguageModelIRST.cpp
+++ b/moses/src/LanguageModelIRST.cpp
@@ -118,7 +118,7 @@ bool LanguageModelIRST::Load(const std::string &filePath,
   m_lmtb->init_statecache();
   m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);
 
-  m_lmtb->set_dictionary_upperbound(m_lmtb_dub);
+  m_lmtb->setlogOOVpenalty(m_lmtb_dub);
 
   return true;
 }
@@ -202,6 +202,14 @@ float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, Stat
 	}
 
 	float prob = m_lmtb->clprob(*m_lmtb_ng);
+  
+  //apply OOV penalty if the n-gram starts with an OOV word 
+  //in a following version this will be integrated into the
+  //irstlm library
+  
+  if (*m_lmtb_ng->wordp(1) == m_lmtb->dict->oovcode())
+    prob-=m_lmtb->getlogOOVpenalty();
+  
 	return TransformIRSTScore(prob);
 }