I added a new parameter (-clean-lm-cache <value>) in order to control how often the LMs clean up their caches (if available).

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3195 1f5c12ca-751b-0410-a591-d2e778427230
2024-12-26 21:42:19 +03:00 · 2010-04-23 15:01:06 +00:00 · 2010-04-23 15:01:06 +00:00 · 476528c05f
commit 476528c05f
parent ca353d3e68
4 changed files with 42 additions and 17 deletions
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@ -93,7 +93,8 @@ int main(int argc, char* argv[])
 		return EXIT_FAILURE;
 	}

-	const StaticData &staticData = StaticData::Instance();
+	//const StaticData &staticData = StaticData::Instance();
+	StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
 	if (!StaticData::LoadDataStatic(&parameter))
 		return EXIT_FAILURE;

@ -122,15 +123,17 @@ int main(int argc, char* argv[])
 	size_t lineCount = 0;
 	while(ReadInput(*ioWrapper,staticData.GetInputType(),source))
 	{
-			// note: source is only valid within this while loop!
+		// note: source is only valid within this while loop!
 		IFVERBOSE(1)
 			ResetUserTime();
+		lineCount++;

-    VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << *source);
+		VERBOSE(2,"\nTRANSLATING(" << lineCount << "): " << *source);

 		Manager manager(*source, staticData.GetSearchAlgorithm());
 		manager.ProcessSentence();

+
 		if (staticData.GetOutputWordGraph())
 			manager.GetWordGraph(source->GetTranslationId(), ioWrapper->GetOutputWordGraphStream());

@ -228,6 +231,7 @@ int main(int argc, char* argv[])

        manager.CalcDecoderStatistics();

+		staticData.SetNumberOfSentences(lineCount);
 	}

 	delete ioWrapper;
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@ -102,6 +102,7 @@ Parameter::Parameter()
  AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
  AddParam("lmbr-map-weight", "weight given to map solution when doing lattice MBR (default 0)");
  AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
+	AddParam("clean-lm-cache", "clean language model caches after N translations (default N=1)");
 	AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
 	AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
 	AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -378,6 +378,10 @@ bool StaticData::LoadData(Parameter *parameter)
 	  Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
 	m_timeout = (GetTimeoutThreshold() == -1) ? false : true;

+
+  m_lmcache_cleanup_threshold = (m_parameter->GetParam("clean-lm-cache").size() > 0) ?
+    Scan<size_t>(m_parameter->GetParam("clean-lm-cache")[0]) : 1;
+
 	// Read in constraint decoding file, if provided
 	if(m_parameter->GetParam("constraint").size()) {
 	  if (m_parameter->GetParam("search-algorithm").size() > 0
@ -1034,8 +1038,15 @@ vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
 	return decodeGraphs;
 }

+
+bool StaticData::LMCacheCleanup() const{
+	if (m_lmcache_cleanup_threshold)
+  	if (m_sentences_done % m_lmcache_cleanup_threshold == 0)
+    	return 1;
+	return 0;
+}
+
 #include "PhraseDictionary.h"
-	
 void StaticData::CleanUpAfterSentenceProcessing() const
 {
 	
@ -1050,12 +1061,14 @@ void StaticData::CleanUpAfterSentenceProcessing() const
 	for(size_t i=0;i<m_generationDictionary.size();++i)
 		m_generationDictionary[i]->CleanUp();
  
-  //something LMs could do after each sentence 
-  LMList::const_iterator iterLM;
-	for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
-	{
-		LanguageModel &languageModel = **iterLM;
-    languageModel.CleanUpAfterSentenceProcessing();
+	if (LMCacheCleanup()){
+		//something LMs could do after each sentence 
+		LMList::const_iterator iterLM;
+		for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
+		{
+			LanguageModel &languageModel = **iterLM;
+			languageModel.CleanUpAfterSentenceProcessing();
+		}
 	}
 }

--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@ -91,11 +91,12 @@ protected:
 		m_weightWordPenalty, 
 		m_wordDeletionWeight,
 		m_weightUnknownWord;
-									// PhraseTrans, Generation & LanguageModelScore has multiple weights.
-	int																	m_maxDistortion;
-									// do it differently from old pharaoh
-									// -ve	= no limit on distortion
-									// 0		= no disortion (monotone in old pharaoh)
+
+	// PhraseTrans, Generation & LanguageModelScore has multiple weights.
+	int				m_maxDistortion;
+	// do it differently from old pharaoh
+	// -ve	= no limit on distortion
+	// 0		= no disortion (monotone in old pharaoh)
 	bool m_reorderingConstraint; // use additional reordering constraints
 	size_t                              
 			m_maxHypoStackSize //hypothesis-stack size that triggers pruning
@ -118,8 +119,9 @@ protected:
 	 */
 	bool m_dropUnknown;
 	bool m_wordDeletionEnabled;
-  bool m_disableDiscarding;
-  bool m_printAllDerivations;
+
+	bool m_disableDiscarding;
+	bool m_printAllDerivations;

 	bool m_sourceStartPosMattersForRecombination;
 	bool m_recoverPath;
@ -162,6 +164,8 @@ protected:
  float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details
  float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details
    
+	size_t m_sentences_done; //! number of translations already done
+	size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)

 	bool m_timeout; //! use timeout
 	size_t m_timeout_threshold; //! seconds after which time out is activated
@ -523,6 +527,9 @@ public:
  
 	bool UseTimeout() const { return m_timeout; }
 	size_t GetTimeoutThreshold() const { return m_timeout_threshold; }
+
+	void SetNumberOfSentences(size_t v) { m_sentences_done=v; }
+	bool LMCacheCleanup() const;
 	
 	bool GetOutputSearchGraph() const { return m_outputSearchGraph; }
 	bool GetOutputSearchGraphExtended() const { return m_outputSearchGraphExtended; }