Handling dictionary upperbound for IRTS LMs through parameter -lmodel-dub

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1592 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
nicolabertoldi 2008-04-04 15:52:45 +00:00
parent 43ae535165
commit a48be8f280
6 changed files with 43 additions and 10 deletions

View File

@ -45,7 +45,8 @@ namespace LanguageModelFactory
, size_t nGramOrder
, const std::string &languageModelFile
, float weight
, ScoreIndexManager &scoreIndexManager)
, ScoreIndexManager &scoreIndexManager
, int dub)
{
LanguageModel *lm = NULL;
switch (lmImplementation)
@ -59,7 +60,7 @@ namespace LanguageModelFactory
break;
case IRST:
#ifdef LM_IRST
lm = new LanguageModelIRST(true, scoreIndexManager);
lm = new LanguageModelIRST(true, scoreIndexManager, dub);
#endif
break;
case Skip:

View File

@ -21,7 +21,8 @@ namespace LanguageModelFactory {
, size_t nGramOrder
, const std::string &languageModelFile
, float weight
, ScoreIndexManager &scoreIndexManager);
, ScoreIndexManager &scoreIndexManager
, int dub);
};

View File

@ -39,9 +39,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
LanguageModelIRST::LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager)
LanguageModelIRST::LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub)
:LanguageModelSingleFactor(registerScore, scoreIndexManager)
,m_lmtb(0)
,m_lmtb(0),m_lmtb_dub(dub)
{
}
@ -118,6 +118,8 @@ bool LanguageModelIRST::Load(const std::string &filePath,
m_lmtb->init_statecache();
m_lmtb->init_lmtcaches(m_lmtb->maxlevel()>2?m_lmtb->maxlevel()-1:2);
m_lmtb->set_dictionary_upperbound(m_lmtb_dub);
return true;
}

View File

@ -49,6 +49,7 @@ protected:
int m_lmtb_sentenceStart; //lmtb symbols to initialize ngram with
int m_lmtb_sentenceEnd; //lmt symbol to initialize ngram with
int m_lmtb_size; //max ngram stored in the table
int m_lmtb_dub; //dictionary upperboud
std::string m_mapFilePath;
@ -63,7 +64,7 @@ protected:
};
public:
LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager);
LanguageModelIRST(bool registerScore, ScoreIndexManager &scoreIndexManager, int dub);
~LanguageModelIRST();
bool Load(const std::string &filePath
, FactorType factorType
@ -74,4 +75,8 @@ public:
void CleanUpAfterSentenceProcessing();
void InitializeBeforeSentenceProcessing();
void set_dictionary_upperbound(int dub){ m_lmtb_size=dub ;
//m_lmtb->set_dictionary_upperbound(dub);
};
};

View File

@ -45,6 +45,7 @@ Parameter::Parameter()
AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");
AddParam("lmodel-file", "location and properties of the language models");
AddParam("lmodel-dub", "dictionary upper bounds of language models");
AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis");
AddParam("mapping", "description of decoding steps");
AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
@ -229,8 +230,22 @@ bool Parameter::Validate()
noErrorFlag = false;
}
if (m_setting["lmodel-dub"].size() > 0){
if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()){
stringstream errorMsg("");
errorMsg << "Config and parameters specify "
<< static_cast<int>(m_setting["lmodel-file"].size())
<< " language model files (lmodel-file), but "
<< static_cast<int>(m_setting["lmodel-dub"].size())
<< " LM upperbounds (lmodel-dub)"
<< endl;
UserMessage::Add(errorMsg.str());
noErrorFlag = false;
}
}
if (m_setting["lmodel-file"].size() != m_setting["weight-l"].size())
{
{
stringstream errorMsg("");
errorMsg << "Config and parameters specify "
<< static_cast<int>(m_setting["lmodel-file"].size())

View File

@ -506,6 +506,13 @@ bool StaticData::LoadLanguageModels()
{
m_allWeights.push_back(weightAll[i]);
}
// dictionary upper-bounds fo all IRST LMs
vector<int> LMdub = Scan<int>(m_parameter->GetParam("lmodel-dub"));
if (m_parameter->GetParam("lmodel-dub").size() == 0){
for(size_t i=0; i<m_parameter->GetParam("lmodel-file").size(); i++)
LMdub.push_back(0);
}
// initialize n-gram order for each factor. populated only by factored lm
const vector<string> &lmVector = m_parameter->GetParam("lmodel-file");
@ -528,14 +535,15 @@ bool StaticData::LoadLanguageModels()
size_t nGramOrder = Scan<int>(token[2]);
string &languageModelFile = token[3];
if (token.size() == 5)
if (token.size() == 5){
if (lmImplementation==IRST)
languageModelFile += " " + token[4];
else {
UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
return false;
}
IFVERBOSE(1)
}
IFVERBOSE(1)
PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
LanguageModel *lm = LanguageModelFactory::CreateLanguageModel(
@ -544,7 +552,8 @@ bool StaticData::LoadLanguageModels()
, nGramOrder
, languageModelFile
, weightAll[i]
, m_scoreIndexManager);
, m_scoreIndexManager
, LMdub[i]);
if (lm == NULL)
{
UserMessage::Add("no LM created. We probably don't have it compiled");