added stuff for time-out policy. If translation takes too much time, it will be interrupted and the actual partial translation is output. By default, time-out is not set.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1591 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
nicolabertoldi 2008-04-02 17:08:54 +00:00
parent 4ee468dc03
commit 43ae535165
10 changed files with 69 additions and 9 deletions

View File

@ -63,8 +63,15 @@ then
[AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
[AC_MSG_ERROR([Cannot find IRST-LM!])])
if test $MACHTYPE
then
MY_ARCH=$MACHTYPE
echo $ECHO_N "THEN -> MY_ARCH=$MY_ARCH" >&6
else
MY_ARCH=`uname -m`
echo $ECHO_N "ELSE -> MY_ARCH=$MY_ARCH\n" >&6
fi
MY_ARCH=`uname -m`
LIB_IRSTLM="-lirstlm"
LDFLAGS="$LDFLAGS -L${with_irstlm}/lib/${MY_ARCH}"
LIBS="$LIBS $LIB_IRSTLM"

View File

@ -146,9 +146,7 @@ int main(int argc, char* argv[])
// pick best translation (maximum a posteriori decoding)
if (! staticData.UseMBR()) {
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors()
);
staticData.GetReportSegmentation(), staticData.GetReportAllFactors());
IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); }
// n-best

View File

@ -51,6 +51,7 @@ Manager::Manager(InputType const& source)
,m_transOptColl(source.CreateTranslationOptionCollection())
,m_initialTargetPhrase(Output)
,m_start(clock())
,interrupted_flag(0)
{
VERBOSE(1, "Translating: " << m_source << endl);
const StaticData &staticData = StaticData::Instance();
@ -105,6 +106,14 @@ void Manager::ProcessSentence()
std::vector < HypothesisStack >::iterator iterStack;
for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)
{
//checked if elapsed time ran out of time with respect
double _elapsed_time = GetUserTime();
if (_elapsed_time > staticData.GetTimeoutThreshold()){
VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
interrupted_flag = 1;
return;
}
HypothesisStack &sourceHypoColl = *iterStack;
// the stack is pruned before processing (lazy pruning):
@ -122,6 +131,9 @@ void Manager::ProcessSentence()
}
// some logging
IFVERBOSE(2) { OutputHypoStackSize(); }
//This stack is fully expanded;
actual_hypoStack = &sourceHypoColl;
}
// some more logging
@ -336,10 +348,18 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
*/
const Hypothesis *Manager::GetBestHypothesis() const
{
const HypothesisStack &hypoColl = m_hypoStackColl.back();
return hypoColl.GetBestHypothesis();
// const HypothesisStack &hypoColl = m_hypoStackColl.back();
if (interrupted_flag == 0){
const HypothesisStack &hypoColl = m_hypoStackColl.back();
return hypoColl.GetBestHypothesis();
}
else{
const HypothesisStack &hypoColl = *actual_hypoStack;
return hypoColl.GetBestHypothesis();
}
}
/**
* Logging of hypothesis stack sizes
*/

View File

@ -75,7 +75,10 @@ protected:
// data
InputType const& m_source; /**< source sentence to be translated */
std::vector < HypothesisStack > m_hypoStackColl; /**< stacks to store hypothesis (partial translations) */
size_t interrupted_flag;
HypothesisStack* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
std::vector < HypothesisStack > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */
// no of elements = no of words in source + 1
TranslationOptionCollection *m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
@ -95,6 +98,7 @@ public:
void ProcessSentence();
const Hypothesis *GetBestHypothesis() const;
const Hypothesis *GetActualBestHypothesis() const;
void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;

View File

@ -81,6 +81,7 @@ Parameter::Parameter()
AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
AddParam("output-word-graph", "owg", "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
AddParam("time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
}

View File

@ -263,7 +263,11 @@ bool StaticData::LoadData(Parameter *parameter)
Scan<size_t>(m_parameter->GetParam("mbr-size")[0]) : 200;
m_mbrScale = (m_parameter->GetParam("mbr-scale").size() > 0) ?
Scan<float>(m_parameter->GetParam("mbr-scale")[0]) : 1.0f;
m_timeout_threshold = (m_parameter->GetParam("time-out").size() > 0) ?
Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
m_timeout = (GetTimeoutThreshold() == -1) ? false : true;
//default case
if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;

View File

@ -117,6 +117,9 @@ protected:
size_t m_mbrSize; //! number of translation candidates considered
float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation
bool m_timeout; //! use timeout
size_t m_timeout_threshold; //! seconds after which time out is activated
bool m_useTransOptCache;
mutable std::map<Phrase, TranslationOptionList> m_transOptCache;
@ -363,9 +366,13 @@ public:
const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
size_t GetMaxNumFactors() const { return m_maxNumFactors; }
size_t UseMBR() const { return m_mbr; }
bool UseMBR() const { return m_mbr; }
size_t GetMBRSize() const { return m_mbrSize; }
float GetMBRScale() const { return m_mbrScale; }
bool UseTimeout() const { return m_timeout; }
size_t GetTimeoutThreshold() const { return m_timeout_threshold; }
size_t GetOutputSearchGraph() const { return m_outputSearchGraph; }
XmlInputType GetXmlInputType() const { return m_xmlInputType; }

View File

@ -28,6 +28,7 @@ class Timer
// void restart(const char* msg = 0);
// void stop(const char* msg = 0);
void check(const char* msg = 0);
double get_elapsed_time();
};
@ -44,6 +45,18 @@ inline double Timer::elapsed_time()
return difftime(now, start_time);
}
/***
* Return the total time that the timer has been in the "running"
* state since it was first "started" or last "restarted". For
* "short" time periods (less than an hour), the actual cpu time
* used is reported instead of the elapsed time.
* This function is the public version of elapsed_time()
*/
inline double Timer::get_elapsed_time()
{
return elapsed_time();
}
/***
* Start a timer. If it is already running, let it continue running.
* Print an optional message.

View File

@ -142,6 +142,11 @@ void PrintUserTime(const std::string &message)
g_timer.check(message.c_str());
}
double GetUserTime()
{
return g_timer.get_elapsed_time();
}
std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
{
std::map<std::string, std::string> meta;

View File

@ -279,6 +279,7 @@ const std::string ToLower(const std::string& str);
// A couple of utilities to measure decoding time
void ResetUserTime();
void PrintUserTime(const std::string &message);
double GetUserTime();
// dump SGML parser for <seg> tags
#include <map>