I added a new parameter (-clean-lm-cache <value>) in order to control how often the LMs clean up their caches (if available).

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3195 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
nicolabertoldi 2010-04-23 15:01:06 +00:00
parent ca353d3e68
commit 476528c05f
4 changed files with 42 additions and 17 deletions

View File

@ -93,7 +93,8 @@ int main(int argc, char* argv[])
return EXIT_FAILURE;
}
const StaticData &staticData = StaticData::Instance();
//const StaticData &staticData = StaticData::Instance();
StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
if (!StaticData::LoadDataStatic(&parameter))
return EXIT_FAILURE;
@ -122,15 +123,17 @@ int main(int argc, char* argv[])
size_t lineCount = 0;
while(ReadInput(*ioWrapper,staticData.GetInputType(),source))
{
// note: source is only valid within this while loop!
// note: source is only valid within this while loop!
IFVERBOSE(1)
ResetUserTime();
lineCount++;
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << *source);
VERBOSE(2,"\nTRANSLATING(" << lineCount << "): " << *source);
Manager manager(*source, staticData.GetSearchAlgorithm());
manager.ProcessSentence();
if (staticData.GetOutputWordGraph())
manager.GetWordGraph(source->GetTranslationId(), ioWrapper->GetOutputWordGraphStream());
@ -228,6 +231,7 @@ int main(int argc, char* argv[])
manager.CalcDecoderStatistics();
staticData.SetNumberOfSentences(lineCount);
}
delete ioWrapper;

View File

@ -102,6 +102,7 @@ Parameter::Parameter()
AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
AddParam("lmbr-map-weight", "weight given to map solution when doing lattice MBR (default 0)");
AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
AddParam("clean-lm-cache", "clean language model caches after N translations (default N=1)");
AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");

View File

@ -378,6 +378,10 @@ bool StaticData::LoadData(Parameter *parameter)
Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
m_timeout = (GetTimeoutThreshold() == -1) ? false : true;
m_lmcache_cleanup_threshold = (m_parameter->GetParam("clean-lm-cache").size() > 0) ?
Scan<size_t>(m_parameter->GetParam("clean-lm-cache")[0]) : 1;
// Read in constraint decoding file, if provided
if(m_parameter->GetParam("constraint").size()) {
if (m_parameter->GetParam("search-algorithm").size() > 0
@ -1034,8 +1038,15 @@ vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
return decodeGraphs;
}
bool StaticData::LMCacheCleanup() const{
if (m_lmcache_cleanup_threshold)
if (m_sentences_done % m_lmcache_cleanup_threshold == 0)
return 1;
return 0;
}
#include "PhraseDictionary.h"
void StaticData::CleanUpAfterSentenceProcessing() const
{
@ -1050,12 +1061,14 @@ void StaticData::CleanUpAfterSentenceProcessing() const
for(size_t i=0;i<m_generationDictionary.size();++i)
m_generationDictionary[i]->CleanUp();
//something LMs could do after each sentence
LMList::const_iterator iterLM;
for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
{
LanguageModel &languageModel = **iterLM;
languageModel.CleanUpAfterSentenceProcessing();
if (LMCacheCleanup()){
//something LMs could do after each sentence
LMList::const_iterator iterLM;
for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
{
LanguageModel &languageModel = **iterLM;
languageModel.CleanUpAfterSentenceProcessing();
}
}
}

View File

@ -91,11 +91,12 @@ protected:
m_weightWordPenalty,
m_wordDeletionWeight,
m_weightUnknownWord;
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
int m_maxDistortion;
// do it differently from old pharaoh
// -ve = no limit on distortion
// 0 = no disortion (monotone in old pharaoh)
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
int m_maxDistortion;
// do it differently from old pharaoh
// -ve = no limit on distortion
// 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; // use additional reordering constraints
size_t
m_maxHypoStackSize //hypothesis-stack size that triggers pruning
@ -118,8 +119,9 @@ protected:
*/
bool m_dropUnknown;
bool m_wordDeletionEnabled;
bool m_disableDiscarding;
bool m_printAllDerivations;
bool m_disableDiscarding;
bool m_printAllDerivations;
bool m_sourceStartPosMattersForRecombination;
bool m_recoverPath;
@ -162,6 +164,8 @@ protected:
float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details
float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details
size_t m_sentences_done; //! number of translations already done
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
bool m_timeout; //! use timeout
size_t m_timeout_threshold; //! seconds after which time out is activated
@ -523,6 +527,9 @@ public:
bool UseTimeout() const { return m_timeout; }
size_t GetTimeoutThreshold() const { return m_timeout_threshold; }
void SetNumberOfSentences(size_t v) { m_sentences_done=v; }
bool LMCacheCleanup() const;
bool GetOutputSearchGraph() const { return m_outputSearchGraph; }
bool GetOutputSearchGraphExtended() const { return m_outputSearchGraphExtended; }