mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
I added a new parameter (-clean-lm-cache <value>) in order to control how often the LMs clean up their caches (if available).
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3195 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
ca353d3e68
commit
476528c05f
@ -93,7 +93,8 @@ int main(int argc, char* argv[])
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
//const StaticData &staticData = StaticData::Instance();
|
||||
StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
|
||||
if (!StaticData::LoadDataStatic(¶meter))
|
||||
return EXIT_FAILURE;
|
||||
|
||||
@ -122,15 +123,17 @@ int main(int argc, char* argv[])
|
||||
size_t lineCount = 0;
|
||||
while(ReadInput(*ioWrapper,staticData.GetInputType(),source))
|
||||
{
|
||||
// note: source is only valid within this while loop!
|
||||
// note: source is only valid within this while loop!
|
||||
IFVERBOSE(1)
|
||||
ResetUserTime();
|
||||
lineCount++;
|
||||
|
||||
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << *source);
|
||||
VERBOSE(2,"\nTRANSLATING(" << lineCount << "): " << *source);
|
||||
|
||||
Manager manager(*source, staticData.GetSearchAlgorithm());
|
||||
manager.ProcessSentence();
|
||||
|
||||
|
||||
if (staticData.GetOutputWordGraph())
|
||||
manager.GetWordGraph(source->GetTranslationId(), ioWrapper->GetOutputWordGraphStream());
|
||||
|
||||
@ -228,6 +231,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
manager.CalcDecoderStatistics();
|
||||
|
||||
staticData.SetNumberOfSentences(lineCount);
|
||||
}
|
||||
|
||||
delete ioWrapper;
|
||||
|
@ -102,6 +102,7 @@ Parameter::Parameter()
|
||||
AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
|
||||
AddParam("lmbr-map-weight", "weight given to map solution when doing lattice MBR (default 0)");
|
||||
AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
|
||||
AddParam("clean-lm-cache", "clean language model caches after N translations (default N=1)");
|
||||
AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
|
||||
AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
|
||||
AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
|
||||
|
@ -378,6 +378,10 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
|
||||
m_timeout = (GetTimeoutThreshold() == -1) ? false : true;
|
||||
|
||||
|
||||
m_lmcache_cleanup_threshold = (m_parameter->GetParam("clean-lm-cache").size() > 0) ?
|
||||
Scan<size_t>(m_parameter->GetParam("clean-lm-cache")[0]) : 1;
|
||||
|
||||
// Read in constraint decoding file, if provided
|
||||
if(m_parameter->GetParam("constraint").size()) {
|
||||
if (m_parameter->GetParam("search-algorithm").size() > 0
|
||||
@ -1034,8 +1038,15 @@ vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
|
||||
return decodeGraphs;
|
||||
}
|
||||
|
||||
|
||||
bool StaticData::LMCacheCleanup() const{
|
||||
if (m_lmcache_cleanup_threshold)
|
||||
if (m_sentences_done % m_lmcache_cleanup_threshold == 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "PhraseDictionary.h"
|
||||
|
||||
void StaticData::CleanUpAfterSentenceProcessing() const
|
||||
{
|
||||
|
||||
@ -1050,12 +1061,14 @@ void StaticData::CleanUpAfterSentenceProcessing() const
|
||||
for(size_t i=0;i<m_generationDictionary.size();++i)
|
||||
m_generationDictionary[i]->CleanUp();
|
||||
|
||||
//something LMs could do after each sentence
|
||||
LMList::const_iterator iterLM;
|
||||
for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
|
||||
{
|
||||
LanguageModel &languageModel = **iterLM;
|
||||
languageModel.CleanUpAfterSentenceProcessing();
|
||||
if (LMCacheCleanup()){
|
||||
//something LMs could do after each sentence
|
||||
LMList::const_iterator iterLM;
|
||||
for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
|
||||
{
|
||||
LanguageModel &languageModel = **iterLM;
|
||||
languageModel.CleanUpAfterSentenceProcessing();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,11 +91,12 @@ protected:
|
||||
m_weightWordPenalty,
|
||||
m_wordDeletionWeight,
|
||||
m_weightUnknownWord;
|
||||
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
|
||||
int m_maxDistortion;
|
||||
// do it differently from old pharaoh
|
||||
// -ve = no limit on distortion
|
||||
// 0 = no disortion (monotone in old pharaoh)
|
||||
|
||||
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
|
||||
int m_maxDistortion;
|
||||
// do it differently from old pharaoh
|
||||
// -ve = no limit on distortion
|
||||
// 0 = no disortion (monotone in old pharaoh)
|
||||
bool m_reorderingConstraint; // use additional reordering constraints
|
||||
size_t
|
||||
m_maxHypoStackSize //hypothesis-stack size that triggers pruning
|
||||
@ -118,8 +119,9 @@ protected:
|
||||
*/
|
||||
bool m_dropUnknown;
|
||||
bool m_wordDeletionEnabled;
|
||||
bool m_disableDiscarding;
|
||||
bool m_printAllDerivations;
|
||||
|
||||
bool m_disableDiscarding;
|
||||
bool m_printAllDerivations;
|
||||
|
||||
bool m_sourceStartPosMattersForRecombination;
|
||||
bool m_recoverPath;
|
||||
@ -162,6 +164,8 @@ protected:
|
||||
float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details
|
||||
float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details
|
||||
|
||||
size_t m_sentences_done; //! number of translations already done
|
||||
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
|
||||
|
||||
bool m_timeout; //! use timeout
|
||||
size_t m_timeout_threshold; //! seconds after which time out is activated
|
||||
@ -523,6 +527,9 @@ public:
|
||||
|
||||
bool UseTimeout() const { return m_timeout; }
|
||||
size_t GetTimeoutThreshold() const { return m_timeout_threshold; }
|
||||
|
||||
void SetNumberOfSentences(size_t v) { m_sentences_done=v; }
|
||||
bool LMCacheCleanup() const;
|
||||
|
||||
bool GetOutputSearchGraph() const { return m_outputSearchGraph; }
|
||||
bool GetOutputSearchGraphExtended() const { return m_outputSearchGraphExtended; }
|
||||
|
Loading…
Reference in New Issue
Block a user