Multi-threading of mert, for random restarts.

Fix mert tests.


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4182 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2011-09-07 08:08:35 +00:00
parent 37ebe7a62e
commit ca5c0f19b7
10 changed files with 125 additions and 44 deletions

View File

@ -21,7 +21,7 @@ FeatureStats::FeatureStats()
FeatureStats::~FeatureStats()
{
delete array_;
delete[] array_;
};
FeatureStats::FeatureStats(const FeatureStats &stats)

View File

@ -1,6 +1,6 @@
lib_LTLIBRARIES = libmert.la
bin_PROGRAMS = mert extractor evaluator
AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE
AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)
libmert_la_SOURCES = \
Util.cpp \
@ -27,10 +27,10 @@ TERsrc/tools.cpp \
TerScorer.cpp \
CderScorer.cpp
mert_SOURCES = mert.cpp
mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp
extractor_SOURCES = extractor.cpp
evaluator_SOURCES = evaluator.cpp
extractor_LDADD = libmert.la -lm -lz
mert_LDADD = libmert.la -lm -lz
mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
evaluator_LDADD = libmert.la -lm -lz

View File

@ -21,7 +21,7 @@ ScoreStats::ScoreStats()
ScoreStats::~ScoreStats()
{
delete array_;
delete[] array_;
};
ScoreStats::ScoreStats(const ScoreStats &stats)

View File

@ -23,6 +23,8 @@
#include "Timer.h"
#include "Util.h"
#include "../moses/src/ThreadPool.h"
float min_interval = 1e-3;
@ -42,6 +44,9 @@ void usage(void)
cerr<<"[--scfile|-S] comma separated list of scorer data files (default score.data)"<<endl;
cerr<<"[--ffile|-F] comma separated list of feature data files (default feature.data)"<<endl;
cerr<<"[--ifile|-i] the starting point data file (default init.opt)"<<endl;
#ifdef WITH_THREADS
cerr<<"[--threads|-T] use multiple threads for random restart (default 1)"<<endl;
#endif
cerr<<"[-v] verbose level"<<endl;
cerr<<"[--help|-h] print this message and exit"<<endl;
exit(1);
@ -60,12 +65,46 @@ static struct option long_options[] = {
{"scfile",1,0,'S'},
{"ffile",1,0,'F'},
{"ifile",1,0,'i'},
#ifdef WITH_THREADS
{"threads", required_argument,0,'T'},
#endif
{"verbose",1,0,'v'},
{"help",no_argument,0,'h'},
{0, 0, 0, 0}
};
int option_index;
/**
* Runs an optimisation, or a random restart.
**/
class OptimizationTask : public Moses::Task
{
public:
OptimizationTask(Optimizer* optimizer, const Point& point) :
m_optimizer(optimizer), m_point(point) {}
bool DeleteAfterExecution() {
return false;
}
void Run() {
m_score = m_optimizer->Run(m_point);
}
statscore_t getScore() const {
return m_score;
}
const Point& getPoint() const {
return m_point;
}
private:
Optimizer* m_optimizer;
Point m_point;
statscore_t m_score;
};
int main (int argc, char **argv)
{
@ -83,6 +122,9 @@ int main (int argc, char **argv)
int nrandom=0;
int seed=0;
bool hasSeed = false;
#ifdef WITH_THREADS
size_t threads=1;
#endif
string type("powell");
string scorertype("BLEU");
string scorerconfig("");
@ -140,6 +182,12 @@ int main (int argc, char **argv)
case 'v':
setverboselevel(strtol(optarg,NULL,10));
break;
#ifdef WITH_THREADS
case 'T':
threads = strtol(optarg, NULL, 10);
if (threads < 1) threads = 1;
break;
#endif
default:
usage();
}
@ -266,41 +314,58 @@ int main (int argc, char **argv)
O->SetScorer(TheScorer);
O->SetFData(D.getFeatureData());
#ifdef WITH_THREADS
cerr << "Creating a pool of " << threads << " threads" << endl;
Moses::ThreadPool pool(threads);
#endif
vector<OptimizationTask*> tasks;
// run with specified starting points
stringstream oss;
statscore_t best=0, mean=0, var=0;
Point bestP;
for(int i=0;i<start_list.size();i++) {
Point P(start_list[i], min, max);//Generate from the full feature set. Warning: must be done after Optimizer initialization
statscore_t score=O->Run(P);
oss.str("");
oss << "Specified starting point number " << (1+i) << ", score: " << score;
if (i==0 || score>best) {
best=score;
bestP=P;
oss << " (new best)";
}
mean+=score;
var+=(score*score);
PrintUserTime(oss.str());
for(size_t i=0;i<start_list.size();i++) {
//Generate from the full feature set. Warning: must be done after Optimizer initialization
Point P(start_list[i], min, max);
OptimizationTask* task = new OptimizationTask(O,P);
tasks.push_back(task);
#ifdef WITH_THREADS
pool.Submit(task);
#else
task->Run();
#endif
}
// run with random starting points
for(int i=0; i<ntry; i++) {
//run with random starting points
for (int i = 0; i < ntry; ++i) {
Point P(start_list[0], min, max);
P.Randomize(); // randomize within min and max as given to the constructor
statscore_t score=O->Run(P);
oss.str("");
oss << "Randomized starting point number " << (1+i) << ", score: " << score;
if(score>best) {
best=score;
bestP=P;
oss << " (new best)";
}
mean+=score;
var+=(score*score);
PrintUserTime(oss.str());
OptimizationTask* task = new OptimizationTask(O,P);
tasks.push_back(task);
#ifdef WITH_THREADS
pool.Submit(task);
#else
task->Run();
#endif
}
//wait for all threads to finish
#ifdef WITH_THREADS
pool.Stop(true);
#endif
//collect results
statscore_t best=0, mean=0, var=0;
Point bestP;
for (vector<OptimizationTask*>::const_iterator i = tasks.begin(); i != tasks.end(); ++i) {
statscore_t score = (*i)->getScore();
mean += score;
var += score*score;
if (score > best) {
bestP = (*i)->getPoint();
best = score;
}
}
mean/=(float)ntry;
var/=(float)ntry;
var=sqrt(abs(var-mean*mean));

View File

@ -4,6 +4,10 @@ bin=$1; shift
testdir=$1; shift
cd $testdir
$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"
$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"
cmd="$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000"
#echo $cmd
$cmd 2>&1 | grep -i "^Best"
#echo $cmd
cmd="$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000"
$cmd 2>&1 | grep -i "^Best"

View File

@ -1 +1,3 @@
0.4 0.15 0.15 0.15 0.15 0.15 0.15 0.5 -1 0.2 0.2 0.2 0.2 0.2
0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 1 1 1 1 1 1 1 1 1 1 1 1 1

View File

@ -56,13 +56,12 @@ void ThreadPool::Execute()
//Execute job
if (task) {
task->Run();
delete task;
if (task->DeleteAfterExecution()) {
delete task;
}
}
m_threadAvailable.notify_all();
} while (!m_stopped);
#ifdef BOOST_HAS_PTHREADS
TRACE_ERR("Thread " << pthread_self() << " exiting" << endl);
#endif
}
void ThreadPool::Submit( Task* task )

View File

@ -36,7 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#endif
#include "Util.h"
//#include "Util.h"
/**
@ -54,6 +54,7 @@ class Task
{
public:
virtual void Run() = 0;
virtual bool DeleteAfterExecution() {return true;}
virtual ~Task() {}
};

View File

@ -8,8 +8,8 @@ DS?=$(shell date '+%Y%m%d')
# Set TARGETDIR to directory where you want the compiled scripts to be copied
# to.
# Set BINDIR to the directory where GIZA++ and other tools are installed.
TARGETDIR=/opt/AO/sw/edinburgh-code/
BINDIR=/opt/AO/sw/edinburgh-code/
TARGETDIR=/home/bhaddow/work/moses.svn
BINDIR=/opt/statmt/moses/bin/
MAIN_SCRIPTS_TARGET_DIR=$(TARGETDIR)
# MAIN_SCRIPTS_TARGET_DIR=$(shell echo `pwd`/temp)

View File

@ -10,6 +10,7 @@
# Excerpts from revision history
# Sept 2011 multi-threaded mert (Barry Haddow)
# Jul 2011 simplifications (Ondrej Bojar)
# -- rely on moses' -show-weights instead of parsing moses.ini
# ... so moses is also run once *before* mert starts, checking
@ -99,6 +100,7 @@ my $___RANDOM_DIRECTIONS = 0; # search in random directions only
my $___NUM_RANDOM_DIRECTIONS = 0; # number of random directions, also works with default optimizer [Cer&al.,2008]
my $___PAIRWISE_RANKED_OPTIMIZER = 0; # use Hopkins&May[2011]
my $___RANDOM_RESTARTS = 20;
my $__THREADS = 0;
# Parameter for effective reference length when computing BLEU score
# Default is to use shortest reference
@ -180,7 +182,8 @@ GetOptions(
"range=s@" => \$___RANGES,
"prev-aggregate-nbestlist=i" => \$prev_aggregate_nbl_size, #number of previous step to consider when loading data (default =-1, i.e. all previous)
"maximum-iterations=i" => \$maximum_iterations,
"pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER
"pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
"threads=i" => \$__THREADS
) or exit(1);
# the 4 required parameters can be supplied on the command line directly
@ -258,6 +261,9 @@ Options:
--random-directions ... search only in random directions
--number-of-random-directions=int ... number of random directions
(also works with regular optimizer, default: 0)
--pairwise-ranked ... Use PRO for optimisation (Hopkins and May, emnlp 2011)
--threads=NUMBER ... Use multi-threaded mert (must be compiled in).
";
exit 1;
}
@ -716,6 +722,10 @@ while(1) {
$cmd = $cmd." --ifile run$run.$weights_in_file";
}
if ($__THREADS) {
$cmd = $cmd." --threads $__THREADS";
}
if ($___PAIRWISE_RANKED_OPTIMIZER) {
$cmd .= " --pro pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer -fvals -maxi 30 -nobias binary pro.data";
}