2008-05-15 18:04:42 +04:00
|
|
|
/**
|
2008-05-15 18:17:34 +04:00
|
|
|
\description The is the main for the new version of the mert algorithm develloppped during the 2nd MT marathon
|
2008-05-15 18:04:42 +04:00
|
|
|
*/
|
2008-05-15 15:06:32 +04:00
|
|
|
|
|
|
|
#include <limits>
|
|
|
|
#include "Data.h"
|
|
|
|
#include "Point.h"
|
|
|
|
#include "Scorer.h"
|
|
|
|
#include "ScoreData.h"
|
|
|
|
#include "FeatureData.h"
|
|
|
|
#include "Optimizer.h"
|
2008-05-15 18:04:42 +04:00
|
|
|
#include "getopt.h"
|
2008-05-15 23:09:01 +04:00
|
|
|
#include "Types.h"
|
2008-05-15 18:04:42 +04:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
|
|
|
#include <cmath>
|
2008-05-16 11:09:15 +04:00
|
|
|
#include "Util.h"
|
2008-05-15 15:06:32 +04:00
|
|
|
|
|
|
|
|
|
|
|
float min_interval = 1e-3;
|
|
|
|
|
2008-05-15 18:04:42 +04:00
|
|
|
using namespace std;
|
|
|
|
|
2008-05-15 15:06:32 +04:00
|
|
|
void usage(void) {
|
2008-05-16 00:49:49 +04:00
|
|
|
cerr<<"usage: mert -d <dimensions> (mandatory )"<<endl;
|
|
|
|
cerr<<"[-n retry ntimes (default 1)]"<<endl;
|
|
|
|
cerr<<"[-o\tthe indexes to optimize(default all)]"<<endl;
|
|
|
|
cerr<<"[-t\tthe optimizer(default Powell)]"<<endl;
|
2008-05-16 11:09:15 +04:00
|
|
|
cerr<<"[--sctype] the scorer type (default BLEU)"<<endl;
|
|
|
|
cerr<<"[--scfile] the scorer data file (default score.data)"<<endl;
|
|
|
|
cerr<<"[--ffile] the feature data file data file (default feature.data)"<<endl;
|
2008-05-16 18:21:24 +04:00
|
|
|
cerr<<"[-v] verbose level"<<endl;
|
2008-05-15 15:06:32 +04:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2008-05-15 18:04:42 +04:00
|
|
|
static struct option long_options[] =
|
|
|
|
{
|
2008-05-16 14:57:24 +04:00
|
|
|
{"pdim", 1, 0, 'd'},
|
2008-05-16 00:49:49 +04:00
|
|
|
{"ntry",1,0,'n'},
|
|
|
|
{"optimize",1,0,'o'},
|
2008-05-15 18:04:42 +04:00
|
|
|
{"type",1,0,'t'},
|
2008-05-16 00:49:49 +04:00
|
|
|
{"sctype",1,0,'s'},
|
|
|
|
{"scfile",1,0,'S'},
|
|
|
|
{"ffile",1,0,'F'},
|
2008-05-16 11:09:15 +04:00
|
|
|
{"verbose",1,0,'v'},
|
2008-05-15 18:04:42 +04:00
|
|
|
{0, 0, 0, 0}
|
|
|
|
};
|
|
|
|
int option_index;
|
2008-05-15 15:06:32 +04:00
|
|
|
|
2008-05-15 18:04:42 +04:00
|
|
|
int main (int argc, char **argv) {
|
2008-05-16 14:57:24 +04:00
|
|
|
int c,pdim,i;
|
|
|
|
pdim=-1;
|
2008-05-15 18:04:42 +04:00
|
|
|
int ntry=1;
|
|
|
|
string type("powell");
|
|
|
|
string scorertype("BLEU");
|
2008-05-16 00:49:49 +04:00
|
|
|
string scorerfile("statscore.data");
|
|
|
|
string featurefile("features.data");
|
2008-05-15 18:04:42 +04:00
|
|
|
vector<unsigned> tooptimize;
|
2008-05-15 23:09:01 +04:00
|
|
|
vector<parameter_t> start;
|
2008-05-16 11:09:15 +04:00
|
|
|
while ((c=getopt_long (argc, argv, "d:n:t:s:S:F:v:", long_options, &option_index)) != -1) {
|
2008-05-15 15:06:32 +04:00
|
|
|
switch (c) {
|
|
|
|
case 'd':
|
2008-05-16 14:57:24 +04:00
|
|
|
pdim = strtol(optarg, NULL, 10);
|
2008-05-15 15:06:32 +04:00
|
|
|
break;
|
|
|
|
case 'n':
|
2008-05-15 18:04:42 +04:00
|
|
|
ntry=strtol(optarg, NULL, 10);
|
2008-05-15 15:06:32 +04:00
|
|
|
break;
|
2008-05-15 18:04:42 +04:00
|
|
|
case 't':
|
|
|
|
type=string(optarg);
|
2008-05-16 00:49:49 +04:00
|
|
|
break;
|
2008-05-15 18:04:42 +04:00
|
|
|
case's':
|
|
|
|
scorertype=string(optarg);
|
2008-05-16 00:49:49 +04:00
|
|
|
break;
|
|
|
|
case 'S':
|
|
|
|
scorerfile=string(optarg);
|
|
|
|
case 'F':
|
|
|
|
featurefile=string(optarg);
|
|
|
|
break;
|
2008-05-16 11:09:15 +04:00
|
|
|
case 'v':
|
|
|
|
setverboselevel(strtol(optarg,NULL,10));
|
|
|
|
break;
|
2008-05-15 15:06:32 +04:00
|
|
|
default:
|
|
|
|
usage();
|
|
|
|
}
|
|
|
|
}
|
2008-05-16 14:57:24 +04:00
|
|
|
if (pdim < 0)
|
2008-05-16 11:09:15 +04:00
|
|
|
usage();
|
2008-05-16 14:57:24 +04:00
|
|
|
if(tooptimize.empty()){
|
|
|
|
tooptimize.resize(pdim);//We'll optimize on everything
|
|
|
|
for(i=0;i<pdim;i++)
|
2008-05-15 18:04:42 +04:00
|
|
|
tooptimize[i]=i;
|
|
|
|
}
|
2008-05-16 14:57:24 +04:00
|
|
|
ifstream opt("init.opt");
|
|
|
|
if(opt.fail()){
|
|
|
|
cerr<<"could not open init.opt"<<endl;
|
|
|
|
exit(3);
|
|
|
|
}
|
|
|
|
start.resize(pdim);//to do:read from file
|
|
|
|
int j;
|
|
|
|
for( j=0;j<pdim&&!opt.fail();j++)
|
|
|
|
opt>>start[j];
|
|
|
|
if(j<pdim){
|
|
|
|
cerr<<"error could not initialize start point with init.opt"<<endl;
|
|
|
|
exit(3);
|
|
|
|
}
|
|
|
|
|
|
|
|
opt.close();
|
|
|
|
//it make sense to know what parameter set were used to generate the nbest
|
2008-05-16 00:49:49 +04:00
|
|
|
ScorerFactory SF;
|
2008-05-16 14:57:24 +04:00
|
|
|
Scorer *TheScorer=SF.getScorer(scorertype);
|
2008-05-16 00:49:49 +04:00
|
|
|
ScoreData *SD=new ScoreData(*TheScorer);
|
|
|
|
SD->load(scorerfile);
|
2008-05-16 14:57:24 +04:00
|
|
|
FeatureData *FD=new FeatureData();
|
|
|
|
FD->load(featurefile);
|
|
|
|
Optimizer *O=OptimizerFactory::BuildOptimizer(pdim,tooptimize,start,type);
|
2008-05-15 18:04:42 +04:00
|
|
|
O->SetScorer(TheScorer);
|
|
|
|
O->SetFData(FD);
|
2008-05-16 14:57:24 +04:00
|
|
|
Point P(start);//Generate from the full feature set. Warning: must ne done after Optimiezr initialiazation
|
|
|
|
statscore_t best=O->Run(P);
|
2008-05-16 20:51:39 +04:00
|
|
|
Point bestP=P;
|
2008-05-16 14:57:24 +04:00
|
|
|
statscore_t mean=best;
|
|
|
|
statscore_t var=best*best;
|
|
|
|
|
|
|
|
vector<parameter_t> min(Point::getdim());
|
|
|
|
vector<parameter_t> max(Point::getdim());
|
|
|
|
|
|
|
|
for(int d=0;d<Point::getdim();d++){
|
2008-05-16 11:09:15 +04:00
|
|
|
min[d]=0.0;
|
|
|
|
max[d]=1.0;
|
|
|
|
}
|
2008-05-16 14:57:24 +04:00
|
|
|
//note: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result!
|
|
|
|
|
|
|
|
for(int i=1;i<ntry;i++){
|
|
|
|
P.Randomize(min,max);
|
|
|
|
statscore_t score=O->Run(P);
|
|
|
|
if(score>best){
|
|
|
|
best=score;
|
|
|
|
bestP=P;
|
2008-05-15 18:04:42 +04:00
|
|
|
}
|
2008-05-16 14:57:24 +04:00
|
|
|
mean+=score;
|
|
|
|
var+=(score*score);
|
|
|
|
}
|
|
|
|
mean/=(float)ntry;
|
|
|
|
var/=(float)ntry;
|
|
|
|
var=sqrt(abs(var-mean*mean));
|
2008-05-16 18:21:24 +04:00
|
|
|
if(ntry>1)
|
|
|
|
cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl;
|
2008-05-16 14:57:24 +04:00
|
|
|
cerr<<"best score"<<best<<endl;
|
|
|
|
ofstream res("weights.txt");
|
|
|
|
res<<bestP<<endl;
|
2008-05-15 15:06:32 +04:00
|
|
|
}
|