Fix usage of whitespaces for Optimizers.

This commit is contained in:
Tetsuo Kiso 2011-11-12 18:47:31 +09:00
parent 6c845323ac
commit eecfb171da
2 changed files with 193 additions and 186 deletions

View File

@ -9,17 +9,17 @@
using namespace std; using namespace std;
static const float MIN_FLOAT=-1.0*numeric_limits<float>::max(); static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
static const float MAX_FLOAT=numeric_limits<float>::max(); static const float MAX_FLOAT = numeric_limits<float>::max();
namespace { namespace {
/** /**
* Compute the intersection of 2 lines. * Compute the intersection of 2 lines.
*/ */
float intersect(float m1, float b1,float m2,float b2) inline float intersect(float m1, float b1, float m2, float b2)
{ {
float isect = ((b2-b1)/(m1-m2)); float isect = (b2 - b1) / (m1 - m2);
if (!isfinite(isect)) { if (!isfinite(isect)) {
isect = MAX_FLOAT; isect = MAX_FLOAT;
} }
@ -39,98 +39,100 @@ void Optimizer::SetFData(FeatureData *_FData)
FData = _FData; FData = _FData;
} }
Optimizer::Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start, unsigned int nrandom):scorer(NULL),FData(NULL),number_of_random_directions(nrandom) Optimizer::Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
: scorer(NULL), FData(NULL), number_of_random_directions(nrandom)
{ {
// Warning: the init vector is a full set of parameters, of dimension pdim! // Warning: the init vector is a full set of parameters, of dimension pdim!
Point::pdim=Pd; Point::pdim = Pd;
assert(start.size()==Pd); assert(start.size() == Pd);
Point::dim=i2O.size(); Point::dim = i2O.size();
Point::optindices=i2O; Point::optindices = i2O;
if (Point::pdim>Point::dim) { if (Point::pdim > Point::dim) {
for (unsigned int i=0; i<Point::pdim; i++) { for (unsigned int i = 0; i < Point::pdim; i++) {
unsigned int j = 0; unsigned int j = 0;
while (j<Point::dim && i!=i2O[j]) while (j < Point::dim && i != i2O[j])
j++; j++;
// The index i wasnt found on optindices, it is a fixed index, // The index i wasnt found on optindices, it is a fixed index,
// we use the value of the start vector. // we use the value of the start vector.
if (j==Point::dim) if (j == Point::dim)
Point::fixedweights[i]=start[i]; Point::fixedweights[i] = start[i];
} }
} }
} }
Optimizer::~Optimizer() {} Optimizer::~Optimizer() {}
statscore_t Optimizer::GetStatScore(const Point& param)const statscore_t Optimizer::GetStatScore(const Point& param) const
{ {
vector<unsigned> bests; vector<unsigned> bests;
Get1bests(param,bests); Get1bests(param, bests);
//copy(bests.begin(),bests.end(),ostream_iterator<unsigned>(cerr," ")); //copy(bests.begin(),bests.end(),ostream_iterator<unsigned>(cerr," "));
statscore_t score = GetStatScore(bests); statscore_t score = GetStatScore(bests);
return score; return score;
} }
map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap,float newt,pair<unsigned,unsigned> newdiff) map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, pair<unsigned,unsigned> newdiff)
{ {
map<float,diff_t>::iterator it=thresholdmap.find(newt); map<float,diff_t>::iterator it = thresholdmap.find(newt);
if(it!=thresholdmap.end()) { if (it != thresholdmap.end()) {
//the threshold already exists!! this is very unlikely // the threshold already exists!! this is very unlikely
if(it->second.back().first==newdiff.first) if (it->second.back().first == newdiff.first)
it->second.back().second=newdiff.second;//there was already a diff for this sentence, we change the 1 best; // there was already a diff for this sentence, we change the 1 best;
it->second.back().second = newdiff.second;
else else
it->second.push_back(newdiff); it->second.push_back(newdiff);
} else { } else {
//normal case // normal case
pair< map<float,diff_t >::iterator,bool > ins=thresholdmap.insert(threshold(newt,diff_t(1,newdiff))); pair<map<float,diff_t>::iterator, bool> ins = thresholdmap.insert(threshold(newt, diff_t(1, newdiff)));
assert(ins.second);//we really inserted something assert(ins.second); // we really inserted something
it=ins.first; it = ins.first;
} }
return it; return it;
} }
statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, Point& bestpoint) const
statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,Point& bestpoint)const
{ {
// We are looking for the best Point on the line y=Origin+x*direction // We are looking for the best Point on the line y=Origin+x*direction
float min_int=0.0001; float min_int = 0.0001;
//typedef pair<unsigned,unsigned> diff;//first the sentence that changes, second is the new 1best for this sentence //typedef pair<unsigned,unsigned> diff;//first the sentence that changes, second is the new 1best for this sentence
//list<threshold> thresholdlist; //list<threshold> thresholdlist;
map<float,diff_t> thresholdmap; map<float,diff_t> thresholdmap;
thresholdmap[MIN_FLOAT]=diff_t(); thresholdmap[MIN_FLOAT] = diff_t();
vector<unsigned> first1best; // the vector of nbests for x=-inf vector<unsigned> first1best; // the vector of nbests for x=-inf
for(unsigned int S=0; S<size(); S++) { for (unsigned int S = 0; S < size(); S++) {
map<float,diff_t >::iterator previnserted=thresholdmap.begin(); map<float,diff_t >::iterator previnserted = thresholdmap.begin();
// First, we determine the translation with the best feature score // First, we determine the translation with the best feature score
// for each sentence and each value of x. // for each sentence and each value of x.
//cerr << "Sentence " << S << endl; //cerr << "Sentence " << S << endl;
multimap<float,unsigned> gradient; multimap<float, unsigned> gradient;
vector<float> f0; vector<float> f0;
f0.resize(FData->get(S).size()); f0.resize(FData->get(S).size());
for(unsigned j=0; j<FData->get(S).size(); j++) { for (unsigned j = 0; j < FData->get(S).size(); j++) {
gradient.insert(pair<float,unsigned>(direction*(FData->get(S,j)),j));//gradient of the feature function for this particular target sentence // gradient of the feature function for this particular target sentence
f0[j]=origin*FData->get(S,j);//compute the feature function at the origin point gradient.insert(pair<float, unsigned>(direction * (FData->get(S,j)), j));
// compute the feature function at the origin point
f0[j] = origin * FData->get(S, j);
} }
// Now let's compute the 1best for each value of x. // Now let's compute the 1best for each value of x.
// vector<pair<float,unsigned> > onebest; // vector<pair<float,unsigned> > onebest;
multimap<float,unsigned>::iterator gradientit=gradient.begin(); multimap<float,unsigned>::iterator gradientit = gradient.begin();
multimap<float,unsigned>::iterator highest_f0=gradient.begin(); multimap<float,unsigned>::iterator highest_f0 = gradient.begin();
float smallest=gradientit->first;//smallest gradient float smallest = gradientit->first;//smallest gradient
// Several candidates can have the lowest slope (e.g., for word penalty where the gradient is an integer). // Several candidates can have the lowest slope (e.g., for word penalty where the gradient is an integer).
gradientit++; gradientit++;
while(gradientit!=gradient.end()&&gradientit->first==smallest) { while (gradientit != gradient.end() && gradientit->first == smallest) {
// cerr<<"ni"<<gradientit->second<<endl;; // cerr<<"ni"<<gradientit->second<<endl;;
//cerr<<"fos"<<f0[gradientit->second]<<" "<<f0[index]<<" "<<index<<endl; //cerr<<"fos"<<f0[gradientit->second]<<" "<<f0[index]<<" "<<index<<endl;
if(f0[gradientit->second]>f0[highest_f0->second]) if (f0[gradientit->second] > f0[highest_f0->second])
highest_f0=gradientit;//the highest line is the one with he highest f0 highest_f0 = gradientit;//the highest line is the one with he highest f0
gradientit++; gradientit++;
} }
@ -139,27 +141,27 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
// Now we look for the intersections points indicating a change of 1 best. // Now we look for the intersections points indicating a change of 1 best.
// We use the fact that the function is convex, which means that the gradient can only go up. // We use the fact that the function is convex, which means that the gradient can only go up.
while(gradientit!=gradient.end()) { while (gradientit != gradient.end()) {
map<float,unsigned>::iterator leftmost=gradientit; map<float,unsigned>::iterator leftmost = gradientit;
float m=gradientit->first; float m = gradientit->first;
float b=f0[gradientit->second]; float b = f0[gradientit->second];
multimap<float,unsigned>::iterator gradientit2=gradientit; multimap<float,unsigned>::iterator gradientit2 = gradientit;
gradientit2++; gradientit2++;
float leftmostx=MAX_FLOAT; float leftmostx = MAX_FLOAT;
for(; gradientit2!=gradient.end(); gradientit2++) { for (; gradientit2 != gradient.end(); gradientit2++) {
//cerr<<"--"<<d++<<' '<<gradientit2->first<<' '<<gradientit2->second<<endl; //cerr<<"--"<<d++<<' '<<gradientit2->first<<' '<<gradientit2->second<<endl;
// Look for all candidate with a gradient bigger than the current one, and // Look for all candidate with a gradient bigger than the current one, and
// find the one with the leftmost intersection. // find the one with the leftmost intersection.
float curintersect; float curintersect;
if(m!=gradientit2->first) { if (m != gradientit2->first) {
curintersect=intersect(m,b,gradientit2->first,f0[gradientit2->second]); curintersect = intersect(m, b, gradientit2->first, f0[gradientit2->second]);
//cerr << "curintersect: " << curintersect << " leftmostx: " << leftmostx << endl; //cerr << "curintersect: " << curintersect << " leftmostx: " << leftmostx << endl;
if(curintersect<=leftmostx) { if (curintersect<=leftmostx) {
// We have found an intersection to the left of the leftmost we had so far. // We have found an intersection to the left of the leftmost we had so far.
// We might have curintersect==leftmostx for example is 2 candidates are the same // We might have curintersect==leftmostx for example is 2 candidates are the same
// in that case its better its better to update leftmost to gradientit2 to avoid some recomputing later. // in that case its better its better to update leftmost to gradientit2 to avoid some recomputing later.
leftmostx=curintersect; leftmostx = curintersect;
leftmost=gradientit2; // this is the new reference leftmost = gradientit2; // this is the new reference
} }
} }
} }
@ -168,15 +170,15 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
// The rightmost bestindex is the one with the highest slope. // The rightmost bestindex is the one with the highest slope.
// They should be equal but there might be. // They should be equal but there might be.
assert(abs(leftmost->first-gradient.rbegin()->first)<0.0001); assert(abs(leftmost->first-gradient.rbegin()->first) < 0.0001);
// A small difference due to rounding error // A small difference due to rounding error
break; break;
} }
// We have found the next intersection! // We have found the next intersection!
pair<unsigned,unsigned> newd(S,leftmost->second);//new onebest for Sentence S is leftmost->second pair<unsigned,unsigned> newd(S, leftmost->second);//new onebest for Sentence S is leftmost->second
if(leftmostx-previnserted->first<min_int) { if (leftmostx-previnserted->first < min_int) {
// Require that the intersection Point be at least min_int to the right of the previous // Require that the intersection Point be at least min_int to the right of the previous
// one (for this sentence). If not, we replace the previous intersection Point with // one (for this sentence). If not, we replace the previous intersection Point with
// this one. // this one.
@ -186,45 +188,45 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
// be wrong we are going to replace previnsert by the new one because we do not want to keep // be wrong we are going to replace previnsert by the new one because we do not want to keep
// 2 very close threshold: if the minima is there it could be an artifact. // 2 very close threshold: if the minima is there it could be an artifact.
map<float,diff_t>::iterator tit=thresholdmap.find(leftmostx); map<float,diff_t>::iterator tit = thresholdmap.find(leftmostx);
if(tit==previnserted) { if (tit == previnserted) {
// The threshold is the same as before can happen if 2 candidates are the same for example. // The threshold is the same as before can happen if 2 candidates are the same for example.
assert(previnserted->second.back().first==newd.first); assert(previnserted->second.back().first == newd.first);
previnserted->second.back()=newd; // just replace the 1 best for sentence S previnserted->second.back()=newd; // just replace the 1 best for sentence S
// previnsert doesn't change // previnsert doesn't change
} else { } else {
if(tit==thresholdmap.end()) { if (tit == thresholdmap.end()) {
thresholdmap[leftmostx]=previnserted->second; // We keep the diffs at previnsert thresholdmap[leftmostx]=previnserted->second; // We keep the diffs at previnsert
thresholdmap.erase(previnserted); // erase old previnsert thresholdmap.erase(previnserted); // erase old previnsert
previnserted=thresholdmap.find(leftmostx); // point previnsert to the new threshold previnserted = thresholdmap.find(leftmostx); // point previnsert to the new threshold
previnserted->second.back()=newd; // We update the diff for sentence S previnserted->second.back()=newd; // We update the diff for sentence S
// Threshold already exists but is not the previous one. // Threshold already exists but is not the previous one.
} else { } else {
// We append the diffs in previnsert to tit before destroying previnsert. // We append the diffs in previnsert to tit before destroying previnsert.
tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end()); tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end());
assert(tit->second.back().first==newd.first); assert(tit->second.back().first == newd.first);
tit->second.back()=newd; // change diff for sentence S tit->second.back()=newd; // change diff for sentence S
thresholdmap.erase(previnserted); // erase old previnsert thresholdmap.erase(previnserted); // erase old previnsert
previnserted=tit; // point previnsert to the new threshold previnserted = tit; // point previnsert to the new threshold
} }
} }
assert(previnserted != thresholdmap.end()); assert(previnserted != thresholdmap.end());
} else { //normal insertion process } else { //normal insertion process
previnserted=AddThreshold(thresholdmap,leftmostx,newd); previnserted = AddThreshold(thresholdmap, leftmostx, newd);
} }
gradientit=leftmost; gradientit = leftmost;
} // while(gradientit!=gradient.end()){ } // while (gradientit!=gradient.end()){
} // loop on S } // loop on S
// Now the thresholdlist is up to date: it contains a list of all the parameter_ts where // Now the thresholdlist is up to date: it contains a list of all the parameter_ts where
// the function changed its value, along with the nbest list for the interval after each threshold. // the function changed its value, along with the nbest list for the interval after each threshold.
map<float,diff_t >::iterator thrit; map<float,diff_t >::iterator thrit;
if(verboselevel()>6) { if (verboselevel() > 6) {
cerr << "Thresholds:(" <<thresholdmap.size()<<")"<< endl; cerr << "Thresholds:(" << thresholdmap.size() << ")" << endl;
for (thrit = thresholdmap.begin(); thrit!=thresholdmap.end(); thrit++) { for (thrit = thresholdmap.begin(); thrit != thresholdmap.end(); thrit++) {
cerr << "x: " << thrit->first << " diffs"; cerr << "x: " << thrit->first << " diffs";
for (size_t j = 0; j < thrit->second.size(); ++j) { for (size_t j = 0; j < thrit->second.size(); ++j) {
cerr << " " <<thrit->second[j].first << "," << thrit->second[j].second; cerr << " " <<thrit->second[j].first << "," << thrit->second[j].second;
@ -234,20 +236,20 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
} }
// Last thing to do is compute the Stat score (i.e., BLEU) and find the minimum. // Last thing to do is compute the Stat score (i.e., BLEU) and find the minimum.
thrit=thresholdmap.begin(); thrit = thresholdmap.begin();
++thrit; // first diff corrrespond to MIN_FLOAT and first1best ++thrit; // first diff corrrespond to MIN_FLOAT and first1best
diffs_t diffs; diffs_t diffs;
for(; thrit!=thresholdmap.end(); thrit++) for (; thrit != thresholdmap.end(); thrit++)
diffs.push_back(thrit->second); diffs.push_back(thrit->second);
vector<statscore_t> scores=GetIncStatScore(first1best,diffs); vector<statscore_t> scores = GetIncStatScore(first1best, diffs);
thrit=thresholdmap.begin(); thrit = thresholdmap.begin();
statscore_t bestscore=MIN_FLOAT; statscore_t bestscore = MIN_FLOAT;
float bestx=MIN_FLOAT; float bestx = MIN_FLOAT;
// We skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best. // We skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best.
assert(scores.size()==thresholdmap.size()); assert(scores.size() == thresholdmap.size());
for(unsigned int sc=0; sc!=scores.size(); sc++) { for (unsigned int sc = 0; sc != scores.size(); sc++) {
//cerr << "x=" << thrit->first << " => " << scores[sc] << endl; //cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
if (scores[sc] > bestscore) { if (scores[sc] > bestscore) {
// This is the score for the interval [lit2->first, (lit2+1)->first] // This is the score for the interval [lit2->first, (lit2+1)->first]
@ -274,7 +276,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
if (leftx == MIN_FLOAT) { if (leftx == MIN_FLOAT) {
bestx = rightx-1000; bestx = rightx-1000;
} else if (rightx == MAX_FLOAT) { } else if (rightx == MAX_FLOAT) {
bestx = leftx+0.1; bestx = leftx + 0.1;
} else { } else {
bestx = 0.5 * (rightx + leftx); bestx = 0.5 * (rightx + leftx);
} }
@ -283,39 +285,39 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
++thrit; ++thrit;
} }
if(abs(bestx)<0.00015) { if (abs(bestx) < 0.00015) {
// The origin of the line is the best point! We put it back at 0 // The origin of the line is the best point! We put it back at 0
// so we do not propagate rounding erros. // so we do not propagate rounding erros.
bestx=0.0; bestx = 0.0;
// Finally, we manage to extract the best score; // Finally, we manage to extract the best score;
// now we convert bestx (position on the line) to a point. // now we convert bestx (position on the line) to a point.
if(verboselevel()>4) if (verboselevel() > 4)
cerr<<"best point on line at origin"<<endl; cerr << "best point on line at origin" << endl;
} }
if(verboselevel()>3) { if (verboselevel() > 3) {
// cerr<<"end Lineopt, bestx="<<bestx<<endl; // cerr<<"end Lineopt, bestx="<<bestx<<endl;
} }
bestpoint=direction*bestx+origin; bestpoint = direction * bestx + origin;
bestpoint.SetScore(bestscore); bestpoint.SetScore(bestscore);
return bestscore; return bestscore;
} }
void Optimizer::Get1bests(const Point& P,vector<unsigned>& bests)const void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
{ {
assert(FData); assert(FData);
bests.clear(); bests.clear();
bests.resize(size()); bests.resize(size());
for(unsigned i=0; i<size(); i++) { for (unsigned i = 0; i < size(); i++) {
float bestfs=MIN_FLOAT; float bestfs = MIN_FLOAT;
unsigned idx=0; unsigned idx = 0;
unsigned j; unsigned j;
for(j=0; j<FData->get(i).size(); j++) { for (j = 0; j < FData->get(i).size(); j++) {
float curfs=P*FData->get(i,j); float curfs = P * FData->get(i, j);
if(curfs>bestfs) { if (curfs > bestfs) {
bestfs=curfs; bestfs = curfs;
idx=j; idx = j;
} }
} }
bests[i]=idx; bests[i]=idx;
@ -323,18 +325,18 @@ void Optimizer::Get1bests(const Point& P,vector<unsigned>& bests)const
} }
statscore_t Optimizer::Run(Point& P)const statscore_t Optimizer::Run(Point& P) const
{ {
if(!FData) { if (!FData) {
cerr<<"error trying to optimize without Features loaded"<<endl; cerr << "error trying to optimize without Features loaded" << endl;
exit(2); exit(2);
} }
if(!scorer) { if (!scorer) {
cerr<<"error trying to optimize without a Scorer loaded"<<endl; cerr << "error trying to optimize without a Scorer loaded" << endl;
exit(2); exit(2);
} }
if (scorer->getReferenceSize()!=FData->size()) { if (scorer->getReferenceSize() != FData->size()) {
cerr<<"error length mismatch between feature file and score file"<<endl; cerr << "error length mismatch between feature file and score file" << endl;
exit(2); exit(2);
} }
@ -354,108 +356,108 @@ statscore_t Optimizer::Run(Point& P)const
} }
vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst,vector<vector <pair<unsigned,unsigned> > > thediffs)const vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst, vector<vector <pair<unsigned,unsigned> > > thediffs) const
{ {
assert(scorer); assert(scorer);
vector<statscore_t> theres; vector<statscore_t> theres;
scorer->score(thefirst,thediffs,theres); scorer->score(thefirst, thediffs, theres);
return theres; return theres;
} }
statscore_t SimpleOptimizer::TrueRun(Point& P)const statscore_t SimpleOptimizer::TrueRun(Point& P) const
{ {
statscore_t prevscore=0; statscore_t prevscore = 0;
statscore_t bestscore=MIN_FLOAT; statscore_t bestscore = MIN_FLOAT;
Point best; Point best;
// If P is already defined and provides a score, // If P is already defined and provides a score,
// We must improve over this score. // We must improve over this score.
if(P.GetScore() > bestscore) { if (P.GetScore() > bestscore) {
bestscore = P.GetScore(); bestscore = P.GetScore();
best = P; best = P;
} }
int nrun=0; int nrun = 0;
do { do {
++nrun; ++nrun;
if(verboselevel()>2&&nrun>1) if (verboselevel() > 2 && nrun > 1)
cerr<<"last diff="<<bestscore-prevscore<<" nrun "<<nrun<<endl; cerr << "last diff=" << bestscore-prevscore << " nrun " << nrun << endl;
prevscore=bestscore; prevscore = bestscore;
Point linebest; Point linebest;
for(unsigned int d=0; d<Point::getdim()+number_of_random_directions; d++) { for (unsigned int d = 0; d < Point::getdim()+number_of_random_directions; d++) {
if(verboselevel()>4) { if (verboselevel() > 4) {
// cerr<<"minimizing along direction "<<d<<endl; // cerr<<"minimizing along direction "<<d<<endl;
cerr<<"starting point: " << P << " => " << prevscore << endl; cerr << "starting point: " << P << " => " << prevscore << endl;
} }
Point direction; Point direction;
if (d<Point::getdim()) { // regular updates along one dimension if (d < Point::getdim()) { // regular updates along one dimension
for(unsigned int i=0; i<Point::getdim(); i++) for (unsigned int i = 0; i < Point::getdim(); i++)
direction[i]=0.0; direction[i]=0.0;
direction[d]=1.0; direction[d]=1.0;
} }
else { // random direction update else { // random direction update
direction.Randomize(); direction.Randomize();
} }
statscore_t curscore=LineOptimize(P,direction,linebest);//find the minimum on the line statscore_t curscore = LineOptimize(P, direction, linebest);//find the minimum on the line
if(verboselevel()>5) { if (verboselevel() > 5) {
cerr<<"direction: "<< d << " => " << curscore << endl; cerr << "direction: " << d << " => " << curscore << endl;
cerr<<"\tending point: "<< linebest << " => " << curscore << endl; cerr << "\tending point: "<< linebest << " => " << curscore << endl;
} }
if(curscore>bestscore) { if (curscore > bestscore) {
bestscore=curscore; bestscore = curscore;
best=linebest; best = linebest;
if(verboselevel()>3) { if (verboselevel() > 3) {
cerr<<"new best dir:"<<d<<" ("<<nrun<<")"<<endl; cerr << "new best dir:" << d << " (" << nrun << ")" << endl;
cerr<<"new best Point "<<best<< " => " <<curscore<<endl; cerr << "new best Point " << best << " => " << curscore << endl;
} }
} }
} }
P=best; //update the current vector with the best point on all line tested P = best; //update the current vector with the best point on all line tested
if(verboselevel()>3) if (verboselevel() > 3)
cerr<<nrun<<"\t"<<P<<endl; cerr << nrun << "\t" << P << endl;
} while(bestscore-prevscore>kEPS); } while (bestscore-prevscore > kEPS);
if(verboselevel()>2) { if (verboselevel() > 2) {
cerr<<"end Powell Algo, nrun="<<nrun<<endl; cerr << "end Powell Algo, nrun=" << nrun << endl;
cerr<<"last diff="<<bestscore-prevscore<<endl; cerr << "last diff=" << bestscore-prevscore << endl;
cerr<<"\t"<<P<<endl; cerr << "\t" << P << endl;
} }
return bestscore; return bestscore;
} }
statscore_t RandomDirectionOptimizer::TrueRun(Point& P)const statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
{ {
statscore_t prevscore = P.GetScore(); statscore_t prevscore = P.GetScore();
// do specified number of random direction optimizations // do specified number of random direction optimizations
unsigned int nrun = 0; unsigned int nrun = 0;
unsigned int nrun_no_change = 0; unsigned int nrun_no_change = 0;
for(; nrun_no_change<number_of_random_directions; nrun++, nrun_no_change++) for (; nrun_no_change < number_of_random_directions; nrun++, nrun_no_change++)
{ {
// choose a random direction in which to optimize // choose a random direction in which to optimize
Point direction; Point direction;
direction.Randomize(); direction.Randomize();
//find the minimum on the line //find the minimum on the line
statscore_t score=LineOptimize(P,direction,P); statscore_t score = LineOptimize(P, direction, P);
if(verboselevel()>4) { if (verboselevel() > 4) {
cerr<<"direction: "<< direction << " => " << score; cerr << "direction: " << direction << " => " << score;
cerr<<" ("<< (score-prevscore) << ")" << endl; cerr << " (" << (score-prevscore) << ")" << endl;
cerr<<"\tending point: "<< P << " => " << score << endl; cerr << "\tending point: " << P << " => " << score << endl;
} }
if (score-prevscore > kEPS) if (score-prevscore > kEPS)
nrun_no_change=0; nrun_no_change = 0;
prevscore = score; prevscore = score;
} }
if(verboselevel()>2) { if (verboselevel() > 2) {
cerr<<"end Powell Algo, nrun="<<nrun<<endl; cerr << "end Powell Algo, nrun=" << nrun << endl;
} }
return prevscore; return prevscore;
} }
@ -475,7 +477,7 @@ vector<string> OptimizerFactory::typenames;
void OptimizerFactory::SetTypeNames() void OptimizerFactory::SetTypeNames()
{ {
if(typenames.empty()) { if (typenames.empty()) {
typenames.resize(NOPTIMIZER); typenames.resize(NOPTIMIZER);
typenames[POWELL]="powell"; typenames[POWELL]="powell";
typenames[RANDOM_DIRECTION]="random-direction"; typenames[RANDOM_DIRECTION]="random-direction";
@ -485,7 +487,7 @@ void OptimizerFactory::SetTypeNames()
} }
vector<string> OptimizerFactory::GetTypeNames() vector<string> OptimizerFactory::GetTypeNames()
{ {
if(typenames.empty()) if (typenames.empty())
SetTypeNames(); SetTypeNames();
return typenames; return typenames;
} }
@ -493,38 +495,38 @@ vector<string> OptimizerFactory::GetTypeNames()
OptimizerFactory::OptType OptimizerFactory::GetOType(string type) OptimizerFactory::OptType OptimizerFactory::GetOType(string type)
{ {
unsigned int thetype; unsigned int thetype;
if(typenames.empty()) if (typenames.empty())
SetTypeNames(); SetTypeNames();
for(thetype=0; thetype<typenames.size(); thetype++) for (thetype = 0; thetype < typenames.size(); thetype++)
if(typenames[thetype]==type) if (typenames[thetype] == type)
break; break;
return((OptType)thetype); return((OptType)thetype);
} }
Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,vector<unsigned> i2o,vector<parameter_t> start,string type, unsigned int nrandom) Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim, vector<unsigned> i2o, vector<parameter_t> start, string type, unsigned int nrandom)
{ {
OptType T=GetOType(type); OptType T = GetOType(type);
if(T==NOPTIMIZER) { if (T == NOPTIMIZER) {
cerr<<"Error: unknown Optimizer type "<<type<<endl; cerr << "Error: unknown Optimizer type " << type << endl;
cerr<<"Known Algorithm are:"<<endl; cerr << "Known Algorithm are:" << endl;
unsigned int thetype; unsigned int thetype;
for(thetype=0; thetype<typenames.size(); thetype++) for (thetype = 0; thetype < typenames.size(); thetype++)
cerr<<typenames[thetype]<<endl; cerr << typenames[thetype] << endl;
throw ("unknown Optimizer Type"); throw ("unknown Optimizer Type");
} }
switch((OptType)T) { switch ((OptType)T) {
case POWELL: case POWELL:
return new SimpleOptimizer(dim,i2o,start,nrandom); return new SimpleOptimizer(dim, i2o, start, nrandom);
break; break;
case RANDOM_DIRECTION: case RANDOM_DIRECTION:
return new RandomDirectionOptimizer(dim,i2o,start,nrandom); return new RandomDirectionOptimizer(dim, i2o, start, nrandom);
break; break;
case RANDOM: case RANDOM:
return new RandomOptimizer(dim,i2o,start,nrandom); return new RandomOptimizer(dim, i2o, start, nrandom);
break; break;
default: default:
cerr<<"Error: unknown optimizer"<<type<<endl; cerr << "Error: unknown optimizer" << type << endl;
return NULL; return NULL;
} }
} }

View File

@ -9,7 +9,6 @@
#include "Point.h" #include "Point.h"
#include "Types.h" #include "Types.h"
typedef float featurescore; typedef float featurescore;
using namespace std; using namespace std;
@ -25,45 +24,45 @@ protected:
unsigned int number_of_random_directions; unsigned int number_of_random_directions;
public: public:
Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom); Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom);
void SetScorer(Scorer *_scorer); void SetScorer(Scorer *_scorer);
void SetFData(FeatureData *_FData); void SetFData(FeatureData *_FData);
virtual ~Optimizer(); virtual ~Optimizer();
unsigned size()const { unsigned size() const {
return (FData?FData->size():0); return FData ? FData->size() : 0;
} }
/** /**
* Generic wrapper around TrueRun to check a few things. Non virtual. * Generic wrapper around TrueRun to check a few things. Non virtual.
*/ */
statscore_t Run(Point&)const; statscore_t Run(Point&) const;
/** /**
* Main function that performs an optimization. * Main function that performs an optimization.
*/ */
virtual statscore_t TrueRun(Point&)const=0; virtual statscore_t TrueRun(Point&) const = 0;
/** /**
* Given a set of lambdas, get the nbest for each sentence. * Given a set of lambdas, get the nbest for each sentence.
*/ */
void Get1bests(const Point& param,vector<unsigned>& bests)const; void Get1bests(const Point& param,vector<unsigned>& bests) const;
/** /**
* Given a set of nbests, get the Statistical score. * Given a set of nbests, get the Statistical score.
*/ */
statscore_t GetStatScore(const vector<unsigned>& nbests)const { statscore_t GetStatScore(const vector<unsigned>& nbests) const {
return scorer->score(nbests); return scorer->score(nbests);
} }
statscore_t GetStatScore(const Point& param)const; statscore_t GetStatScore(const Point& param) const;
vector<statscore_t > GetIncStatScore(vector<unsigned> ref,vector<vector <pair<unsigned,unsigned> > >)const; vector<statscore_t> GetIncStatScore(vector<unsigned> ref, vector<vector<pair<unsigned,unsigned> > >) const;
/** /**
* Get the optimal Lambda and the best score in a particular direction from a given Point. * Get the optimal Lambda and the best score in a particular direction from a given Point.
*/ */
statscore_t LineOptimize(const Point& start,const Point& direction,Point& best)const; statscore_t LineOptimize(const Point& start, const Point& direction, Point& best) const;
}; };
@ -71,51 +70,57 @@ public:
* Default basic optimizer. * Default basic optimizer.
* This class implements Powell's method. * This class implements Powell's method.
*/ */
class SimpleOptimizer: public Optimizer class SimpleOptimizer : public Optimizer
{ {
private: private:
const float kEPS; const float kEPS;
public: public:
SimpleOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom): Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {} SimpleOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
virtual statscore_t TrueRun(Point&)const; : Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {}
virtual statscore_t TrueRun(Point&) const;
}; };
/** /**
* An optimizer with random directions. * An optimizer with random directions.
*/ */
class RandomDirectionOptimizer: public Optimizer class RandomDirectionOptimizer : public Optimizer
{ {
private: private:
const float kEPS; const float kEPS;
public: public:
RandomDirectionOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom): Optimizer(dim,i2O,start,nrandom), kEPS(0.0001) {} RandomDirectionOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
virtual statscore_t TrueRun(Point&)const; : Optimizer(dim, i2O, start, nrandom), kEPS(0.0001) {}
virtual statscore_t TrueRun(Point&) const;
}; };
/** /**
* Dumb baseline optimizer: just picks a random point and quits. * Dumb baseline optimizer: just picks a random point and quits.
*/ */
class RandomOptimizer: public Optimizer class RandomOptimizer : public Optimizer
{ {
public: public:
RandomOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start, unsigned int nrandom): Optimizer(dim,i2O,start,nrandom) {} RandomOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
virtual statscore_t TrueRun(Point&)const; : Optimizer(dim, i2O, start, nrandom) {}
virtual statscore_t TrueRun(Point&) const;
}; };
class OptimizerFactory class OptimizerFactory
{ {
public: public:
// unsigned dim;
//Point Start;
static vector<string> GetTypeNames(); static vector<string> GetTypeNames();
static Optimizer* BuildOptimizer(unsigned dim,vector<unsigned>tooptimize,vector<parameter_t> start,string type,unsigned int nrandom); static Optimizer* BuildOptimizer(unsigned dim, vector<unsigned> tooptimize, vector<parameter_t> start, string type, unsigned int nrandom);
private: private:
OptimizerFactory() {} OptimizerFactory() {}
~OptimizerFactory() {} ~OptimizerFactory() {}
// Add new optimizer here BEFORE NOPTIMZER // Add new optimizer here BEFORE NOPTIMZER
enum OptType {POWELL=0,RANDOM_DIRECTION=1,RANDOM,NOPTIMIZER}; enum OptType {
POWELL = 0,
RANDOM_DIRECTION = 1,
RANDOM,NOPTIMIZER
};
static OptType GetOType(string); static OptType GetOType(string);
static vector<string> typenames; static vector<string> typenames;
static void SetTypeNames(); static void SetTypeNames();