mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-11 19:27:11 +03:00
added stuff for time-out policy. If translation takes too much time, it will be interrupted and the actual partial translation is output. By default, time-out is not set.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1591 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
4ee468dc03
commit
43ae535165
@ -63,8 +63,15 @@ then
|
||||
[AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
|
||||
[AC_MSG_ERROR([Cannot find IRST-LM!])])
|
||||
|
||||
if test $MACHTYPE
|
||||
then
|
||||
MY_ARCH=$MACHTYPE
|
||||
echo $ECHO_N "THEN -> MY_ARCH=$MY_ARCH" >&6
|
||||
else
|
||||
MY_ARCH=`uname -m`
|
||||
echo $ECHO_N "ELSE -> MY_ARCH=$MY_ARCH\n" >&6
|
||||
fi
|
||||
|
||||
MY_ARCH=`uname -m`
|
||||
LIB_IRSTLM="-lirstlm"
|
||||
LDFLAGS="$LDFLAGS -L${with_irstlm}/lib/${MY_ARCH}"
|
||||
LIBS="$LIBS $LIB_IRSTLM"
|
||||
|
@ -146,9 +146,7 @@ int main(int argc, char* argv[])
|
||||
// pick best translation (maximum a posteriori decoding)
|
||||
if (! staticData.UseMBR()) {
|
||||
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
|
||||
staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors()
|
||||
);
|
||||
staticData.GetReportSegmentation(), staticData.GetReportAllFactors());
|
||||
IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); }
|
||||
|
||||
// n-best
|
||||
|
@ -51,6 +51,7 @@ Manager::Manager(InputType const& source)
|
||||
,m_transOptColl(source.CreateTranslationOptionCollection())
|
||||
,m_initialTargetPhrase(Output)
|
||||
,m_start(clock())
|
||||
,interrupted_flag(0)
|
||||
{
|
||||
VERBOSE(1, "Translating: " << m_source << endl);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
@ -105,6 +106,14 @@ void Manager::ProcessSentence()
|
||||
std::vector < HypothesisStack >::iterator iterStack;
|
||||
for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)
|
||||
{
|
||||
|
||||
//checked if elapsed time ran out of time with respect
|
||||
double _elapsed_time = GetUserTime();
|
||||
if (_elapsed_time > staticData.GetTimeoutThreshold()){
|
||||
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
|
||||
interrupted_flag = 1;
|
||||
return;
|
||||
}
|
||||
HypothesisStack &sourceHypoColl = *iterStack;
|
||||
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
@ -122,6 +131,9 @@ void Manager::ProcessSentence()
|
||||
}
|
||||
// some logging
|
||||
IFVERBOSE(2) { OutputHypoStackSize(); }
|
||||
|
||||
//This stack is fully expanded;
|
||||
actual_hypoStack = &sourceHypoColl;
|
||||
}
|
||||
|
||||
// some more logging
|
||||
@ -336,10 +348,18 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
|
||||
*/
|
||||
const Hypothesis *Manager::GetBestHypothesis() const
|
||||
{
|
||||
const HypothesisStack &hypoColl = m_hypoStackColl.back();
|
||||
return hypoColl.GetBestHypothesis();
|
||||
// const HypothesisStack &hypoColl = m_hypoStackColl.back();
|
||||
if (interrupted_flag == 0){
|
||||
const HypothesisStack &hypoColl = m_hypoStackColl.back();
|
||||
return hypoColl.GetBestHypothesis();
|
||||
}
|
||||
else{
|
||||
const HypothesisStack &hypoColl = *actual_hypoStack;
|
||||
return hypoColl.GetBestHypothesis();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Logging of hypothesis stack sizes
|
||||
*/
|
||||
|
@ -75,7 +75,10 @@ protected:
|
||||
// data
|
||||
InputType const& m_source; /**< source sentence to be translated */
|
||||
|
||||
std::vector < HypothesisStack > m_hypoStackColl; /**< stacks to store hypothesis (partial translations) */
|
||||
size_t interrupted_flag;
|
||||
|
||||
HypothesisStack* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
|
||||
std::vector < HypothesisStack > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */
|
||||
// no of elements = no of words in source + 1
|
||||
TranslationOptionCollection *m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
|
||||
TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
|
||||
@ -95,6 +98,7 @@ public:
|
||||
|
||||
void ProcessSentence();
|
||||
const Hypothesis *GetBestHypothesis() const;
|
||||
const Hypothesis *GetActualBestHypothesis() const;
|
||||
void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
|
||||
|
||||
void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
|
||||
|
@ -81,6 +81,7 @@ Parameter::Parameter()
|
||||
AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
|
||||
AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
|
||||
AddParam("output-word-graph", "owg", "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
|
||||
AddParam("time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
|
||||
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
|
||||
}
|
||||
|
||||
|
@ -263,7 +263,11 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
Scan<size_t>(m_parameter->GetParam("mbr-size")[0]) : 200;
|
||||
m_mbrScale = (m_parameter->GetParam("mbr-scale").size() > 0) ?
|
||||
Scan<float>(m_parameter->GetParam("mbr-scale")[0]) : 1.0f;
|
||||
|
||||
|
||||
m_timeout_threshold = (m_parameter->GetParam("time-out").size() > 0) ?
|
||||
Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
|
||||
m_timeout = (GetTimeoutThreshold() == -1) ? false : true;
|
||||
|
||||
//default case
|
||||
|
||||
if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
|
||||
|
@ -117,6 +117,9 @@ protected:
|
||||
size_t m_mbrSize; //! number of translation candidates considered
|
||||
float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation
|
||||
|
||||
bool m_timeout; //! use timeout
|
||||
size_t m_timeout_threshold; //! seconds after which time out is activated
|
||||
|
||||
bool m_useTransOptCache;
|
||||
mutable std::map<Phrase, TranslationOptionList> m_transOptCache;
|
||||
|
||||
@ -363,9 +366,13 @@ public:
|
||||
const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
|
||||
size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
|
||||
size_t GetMaxNumFactors() const { return m_maxNumFactors; }
|
||||
size_t UseMBR() const { return m_mbr; }
|
||||
bool UseMBR() const { return m_mbr; }
|
||||
size_t GetMBRSize() const { return m_mbrSize; }
|
||||
float GetMBRScale() const { return m_mbrScale; }
|
||||
|
||||
bool UseTimeout() const { return m_timeout; }
|
||||
size_t GetTimeoutThreshold() const { return m_timeout_threshold; }
|
||||
|
||||
size_t GetOutputSearchGraph() const { return m_outputSearchGraph; }
|
||||
|
||||
XmlInputType GetXmlInputType() const { return m_xmlInputType; }
|
||||
|
@ -28,6 +28,7 @@ class Timer
|
||||
// void restart(const char* msg = 0);
|
||||
// void stop(const char* msg = 0);
|
||||
void check(const char* msg = 0);
|
||||
double get_elapsed_time();
|
||||
|
||||
};
|
||||
|
||||
@ -44,6 +45,18 @@ inline double Timer::elapsed_time()
|
||||
return difftime(now, start_time);
|
||||
}
|
||||
|
||||
/***
|
||||
* Return the total time that the timer has been in the "running"
|
||||
* state since it was first "started" or last "restarted". For
|
||||
* "short" time periods (less than an hour), the actual cpu time
|
||||
* used is reported instead of the elapsed time.
|
||||
* This function is the public version of elapsed_time()
|
||||
*/
|
||||
inline double Timer::get_elapsed_time()
|
||||
{
|
||||
return elapsed_time();
|
||||
}
|
||||
|
||||
/***
|
||||
* Start a timer. If it is already running, let it continue running.
|
||||
* Print an optional message.
|
||||
|
@ -142,6 +142,11 @@ void PrintUserTime(const std::string &message)
|
||||
g_timer.check(message.c_str());
|
||||
}
|
||||
|
||||
double GetUserTime()
|
||||
{
|
||||
return g_timer.get_elapsed_time();
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
|
||||
{
|
||||
std::map<std::string, std::string> meta;
|
||||
|
@ -279,6 +279,7 @@ const std::string ToLower(const std::string& str);
|
||||
// A couple of utilities to measure decoding time
|
||||
void ResetUserTime();
|
||||
void PrintUserTime(const std::string &message);
|
||||
double GetUserTime();
|
||||
|
||||
// dump SGML parser for <seg> tags
|
||||
#include <map>
|
||||
|
Loading…
Reference in New Issue
Block a user