Merge ../mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2016-05-31 14:36:15 +01:00
commit 36812013bf
15 changed files with 66 additions and 20 deletions

View File

@ -3,10 +3,11 @@
namespace Moses
{
OSMLM* ConstructOSMLM(const char *file)
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
{
lm::ngram::ModelType model_type;
lm::ngram::Config config;
config.load_method = load_method;
if (lm::ngram::RecognizeBinary(file, model_type)) {
switch(model_type) {
case lm::ngram::PROBING:

View File

@ -47,7 +47,7 @@ private:
typedef KenOSMBase OSMLM;
OSMLM* ConstructOSMLM(const char *file);
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
} // namespace

View File

@ -17,6 +17,7 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
tFactor = 0;
numFeatures = 5;
ReadParameters();
load_method = util::READ;
}
OpSequenceModel::~OpSequenceModel()
@ -27,7 +28,7 @@ OpSequenceModel::~OpSequenceModel()
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
OSM = ConstructOSMLM(m_lmPath.c_str());
OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
State startState = OSM->NullContextState();
State endState;
@ -248,6 +249,20 @@ void OpSequenceModel::SetParameter(const std::string& key, const std::string& va
sFactor = Scan<int>(value);
} else if (key == "output-factor") {
tFactor = Scan<int>(value);
} else if (key == "load") {
if (value == "lazy") {
load_method = util::LAZY;
} else if (value == "populate_or_lazy") {
load_method = util::POPULATE_OR_LAZY;
} else if (value == "populate_or_read" || value == "populate") {
load_method = util::POPULATE_OR_READ;
} else if (value == "read") {
load_method = util::READ;
} else if (value == "parallel_read") {
load_method = util::PARALLEL_READ;
} else {
UTIL_THROW2("Unknown KenLM load method " << value);
}
} else {
StatefulFeatureFunction::SetParameter(key, value);
}

View File

@ -20,6 +20,7 @@ public:
int sFactor; // Source Factor ...
int tFactor; // Target Factor ...
int numFeatures; // Number of features used ...
util::LoadMethod load_method; // method to load model
OpSequenceModel(const std::string &line);
~OpSequenceModel();

View File

@ -59,6 +59,7 @@ Parameter::Parameter()
AddParam(main_opts,"version", "show version of Moses and libraries used");
AddParam(main_opts,"show-weights", "print feature weights and exit");
AddParam(main_opts,"time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
AddParam(main_opts,"segment-time-out", "seconds for single segment after which is interrupted (-1=no time-out, default is -1)");
///////////////////////////////////////////////////////////////////////////////////////
// factorization options

View File

@ -17,21 +17,34 @@ Search::Search(Manager& manager)
, interrupted_flag(0)
{
m_initialTransOpt.SetInputPath(m_inputPath);
m_timer.start();
}
bool
Search::
out_of_time()
{
int const& timelimit = m_options.search.timeout;
if (!timelimit) return false;
double elapsed_time = GetUserTime();
if (elapsed_time <= timelimit) return false;
VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
<< timelimit << ")" << std::endl);
interrupted_flag = 1;
return true;
if (timelimit > 0) {
double elapsed_time = GetUserTime();
if (elapsed_time > timelimit) {
VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
<< timelimit << ")" << std::endl);
interrupted_flag = 1;
return true;
}
}
int const& segment_timelimit = m_options.search.segment_timeout;
if (segment_timelimit > 0) {
double elapsed_time = m_timer.get_elapsed_time();
if (elapsed_time > segment_timelimit) {
VERBOSE(1,"Decoding for segment is out of time (" << elapsed_time << ","
<< segment_timelimit << ")" << std::endl);
interrupted_flag = 1;
return true;
}
}
return false;
}
}

View File

@ -7,6 +7,7 @@
#include "Phrase.h"
#include "InputPath.h"
#include "Bitmaps.h"
#include "Timer.h"
namespace Moses
{
@ -48,6 +49,7 @@ protected:
/** flag indicating that decoder ran out of time (see switch -time-out) */
size_t interrupted_flag;
Timer m_timer;
bool out_of_time();
};

View File

@ -97,7 +97,6 @@ void SearchCubePruning::Decode()
// go through each stack
size_t stackNo = 1;
int timelimit = m_options.search.timeout;
std::vector < HypothesisStack* >::iterator iterStack;
for (iterStack = m_hypoStackColl.begin() + 1 ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
// BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {

View File

@ -155,7 +155,9 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
m_xmlOptions,
m_reorderingConstraint,
xmlWalls, placeholders);
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
if (!OK) {
TRACE_ERR("Unable to parse XML in line: " << line);
}
}
}

View File

@ -38,6 +38,7 @@ namespace Moses
param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
DEFAULT_EARLY_DISCARDING_THRESHOLD);
param.SetParameter(timeout, "time-out", 0);
param.SetParameter(segment_timeout, "segment-time-out", 0);
param.SetParameter(max_phrase_length, "max-phrase-length",
DEFAULT_MAX_PHRASE_LENGTH);
param.SetParameter(trans_opt_threshold, "translation-option-threshold",

View File

@ -25,6 +25,7 @@ namespace Moses
float beam_width;
int timeout;
int segment_timeout;
bool consensus; //! Use Consensus decoding (DeNero et al 2009)

View File

@ -240,7 +240,7 @@ sub train_transliteration_module{
`$MOSES_SRC_DIR/scripts/ems/support/substitute-filtered-tables.perl $OUT_DIR/tuning/filtered/moses.ini < $OUT_DIR/model/moses.ini > $OUT_DIR/tuning/moses.filtered.ini`;
`$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/mert -threads=16 --no-filter-phrase-table`;
`$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/bin -threads=16 --no-filter-phrase-table`;
`cp $OUT_DIR/tuning/tmp/moses.ini $OUT_DIR/tuning/moses.ini`;

View File

@ -827,7 +827,7 @@ create-config
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table-pruned sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero thot
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature operation-sequence-model-load-method
default-name: model/moses.ini
error: Unknown option
error: requires an argument

View File

@ -2660,12 +2660,16 @@ sub define_training_create_config {
if ($osm) {
my $osm_settings = &get("TRAINING:operation-sequence-model-settings");
if ($osm_settings =~ /-factor *(\S+)/){
if ($osm_settings =~ /-factor *(\S+)/) {
$cmd .= "-osm-model $osm/ -osm-setting $1 ";
}
else {
$cmd .= "-osm-model $osm/operationLM.bin ";
}
my $osm_load_method = &get("TRAINING:operation-sequence-model-load-method");
if (defined($osm_load_method)) {
$cmd .= "-osm-load-method $osm_load_method ";
}
}
if (&get("TRAINING:phrase-orientation")) {

View File

@ -83,6 +83,7 @@ my($_EXTERNAL_BINDIR,
$_CONFIG,
$_OSM,
$_OSM_FACTORS,
$_OSM_LOAD_METHOD,
$_POST_DECODING_TRANSLIT,
$_TRANSLITERATION_PHRASE_TABLE,
$_HIERARCHICAL,
@ -238,6 +239,7 @@ $_HELP = 1
'config=s' => \$_CONFIG,
'osm-model=s' => \$_OSM,
'osm-setting=s' => \$_OSM_FACTORS,
'osm-load-method=s' => \$_OSM_LOAD_METHOD,
'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
'mmsapt' => \$_MMSAPT,
@ -2249,6 +2251,8 @@ sub create_ini {
if($_OSM)
{
my $load_method = "";
$load_method = " load=$_OSM_LOAD_METHOD" if defined($_OSM_LOAD_METHOD);
if (defined($_OSM_FACTORS))
{
my $count = 0;
@ -2258,11 +2262,11 @@ sub create_ini {
my ($factor_f,$factor_e) = split(/\-/,$factor_val);
if($count == 0){
$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
$weight_spec .= "OpSequenceModel$count= 0.08 -0.02 0.02 -0.001 0.03\n";
}
else{
$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
$weight_spec .= "OpSequenceModel$count= 0.08 \n";
}
@ -2271,7 +2275,7 @@ sub create_ini {
}
else
{
$feature_spec .= "OpSequenceModel name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
$weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
}
}
@ -2292,7 +2296,9 @@ sub create_ini {
}
$type = "KENLM" unless defined $type; # default to KENLM if no type given
if ($type =~ /^\d+$/) {
if ($type =~ /^8-(.+)/) {
$type = "KENLM load=$1";
} elsif ($type =~ /^\d+$/) {
# backwards compatibility if the type is given not as string but as a number
if ($type == 0) {
$type = "SRILM";