diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp index fb762cee1..13067ea1d 100644 --- a/mert/Optimizer.cpp +++ b/mert/Optimizer.cpp @@ -9,17 +9,17 @@ using namespace std; -static const float MIN_FLOAT=-1.0*numeric_limits::max(); -static const float MAX_FLOAT=numeric_limits::max(); +static const float MIN_FLOAT = -1.0 * numeric_limits::max(); +static const float MAX_FLOAT = numeric_limits::max(); namespace { /** * Compute the intersection of 2 lines. */ -float intersect(float m1, float b1,float m2,float b2) +inline float intersect(float m1, float b1, float m2, float b2) { - float isect = ((b2-b1)/(m1-m2)); + float isect = (b2 - b1) / (m1 - m2); if (!isfinite(isect)) { isect = MAX_FLOAT; } @@ -39,98 +39,100 @@ void Optimizer::SetFData(FeatureData *_FData) FData = _FData; } -Optimizer::Optimizer(unsigned Pd,vector i2O,vector start, unsigned int nrandom):scorer(NULL),FData(NULL),number_of_random_directions(nrandom) +Optimizer::Optimizer(unsigned Pd, vector i2O, vector start, unsigned int nrandom) + : scorer(NULL), FData(NULL), number_of_random_directions(nrandom) { // Warning: the init vector is a full set of parameters, of dimension pdim! - Point::pdim=Pd; + Point::pdim = Pd; - assert(start.size()==Pd); - Point::dim=i2O.size(); - Point::optindices=i2O; - if (Point::pdim>Point::dim) { - for (unsigned int i=0; i Point::dim) { + for (unsigned int i = 0; i < Point::pdim; i++) { unsigned int j = 0; - while (j bests; - Get1bests(param,bests); + Get1bests(param, bests); //copy(bests.begin(),bests.end(),ostream_iterator(cerr," ")); statscore_t score = GetStatScore(bests); return score; } -map::iterator AddThreshold(map& thresholdmap,float newt,pair newdiff) +map::iterator AddThreshold(map& thresholdmap, float newt, pair newdiff) { - map::iterator it=thresholdmap.find(newt); - if(it!=thresholdmap.end()) { - //the threshold already exists!! this is very unlikely - if(it->second.back().first==newdiff.first) - it->second.back().second=newdiff.second;//there was already a diff for this sentence, we change the 1 best; + map::iterator it = thresholdmap.find(newt); + if (it != thresholdmap.end()) { + // the threshold already exists!! this is very unlikely + if (it->second.back().first == newdiff.first) + // there was already a diff for this sentence, we change the 1 best; + it->second.back().second = newdiff.second; else it->second.push_back(newdiff); } else { - //normal case - pair< map::iterator,bool > ins=thresholdmap.insert(threshold(newt,diff_t(1,newdiff))); - assert(ins.second);//we really inserted something - it=ins.first; + // normal case + pair::iterator, bool> ins = thresholdmap.insert(threshold(newt, diff_t(1, newdiff))); + assert(ins.second); // we really inserted something + it = ins.first; } return it; } - -statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,Point& bestpoint)const +statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, Point& bestpoint) const { - // We are looking for the best Point on the line y=Origin+x*direction - float min_int=0.0001; + float min_int = 0.0001; //typedef pair diff;//first the sentence that changes, second is the new 1best for this sentence //list thresholdlist; map thresholdmap; - thresholdmap[MIN_FLOAT]=diff_t(); + thresholdmap[MIN_FLOAT] = diff_t(); vector first1best; // the vector of nbests for x=-inf - for(unsigned int S=0; S::iterator previnserted=thresholdmap.begin(); + for (unsigned int S = 0; S < size(); S++) { + map::iterator previnserted = thresholdmap.begin(); // First, we determine the translation with the best feature score // for each sentence and each value of x. //cerr << "Sentence " << S << endl; - multimap gradient; + multimap gradient; vector f0; f0.resize(FData->get(S).size()); - for(unsigned j=0; jget(S).size(); j++) { - gradient.insert(pair(direction*(FData->get(S,j)),j));//gradient of the feature function for this particular target sentence - f0[j]=origin*FData->get(S,j);//compute the feature function at the origin point + for (unsigned j = 0; j < FData->get(S).size(); j++) { + // gradient of the feature function for this particular target sentence + gradient.insert(pair(direction * (FData->get(S,j)), j)); + // compute the feature function at the origin point + f0[j] = origin * FData->get(S, j); } // Now let's compute the 1best for each value of x. // vector > onebest; - multimap::iterator gradientit=gradient.begin(); - multimap::iterator highest_f0=gradient.begin(); + multimap::iterator gradientit = gradient.begin(); + multimap::iterator highest_f0 = gradient.begin(); - float smallest=gradientit->first;//smallest gradient + float smallest = gradientit->first;//smallest gradient // Several candidates can have the lowest slope (e.g., for word penalty where the gradient is an integer). gradientit++; - while(gradientit!=gradient.end()&&gradientit->first==smallest) { + while (gradientit != gradient.end() && gradientit->first == smallest) { // cerr<<"ni"<second<second]<<" "<second]>f0[highest_f0->second]) - highest_f0=gradientit;//the highest line is the one with he highest f0 + if (f0[gradientit->second] > f0[highest_f0->second]) + highest_f0 = gradientit;//the highest line is the one with he highest f0 gradientit++; } @@ -139,27 +141,27 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P // Now we look for the intersections points indicating a change of 1 best. // We use the fact that the function is convex, which means that the gradient can only go up. - while(gradientit!=gradient.end()) { - map::iterator leftmost=gradientit; - float m=gradientit->first; - float b=f0[gradientit->second]; - multimap::iterator gradientit2=gradientit; + while (gradientit != gradient.end()) { + map::iterator leftmost = gradientit; + float m = gradientit->first; + float b = f0[gradientit->second]; + multimap::iterator gradientit2 = gradientit; gradientit2++; - float leftmostx=MAX_FLOAT; - for(; gradientit2!=gradient.end(); gradientit2++) { + float leftmostx = MAX_FLOAT; + for (; gradientit2 != gradient.end(); gradientit2++) { //cerr<<"--"<first<<' '<second<first) { - curintersect=intersect(m,b,gradientit2->first,f0[gradientit2->second]); + if (m != gradientit2->first) { + curintersect = intersect(m, b, gradientit2->first, f0[gradientit2->second]); //cerr << "curintersect: " << curintersect << " leftmostx: " << leftmostx << endl; - if(curintersect<=leftmostx) { + if (curintersect<=leftmostx) { // We have found an intersection to the left of the leftmost we had so far. // We might have curintersect==leftmostx for example is 2 candidates are the same // in that case its better its better to update leftmost to gradientit2 to avoid some recomputing later. - leftmostx=curintersect; - leftmost=gradientit2; // this is the new reference + leftmostx = curintersect; + leftmost = gradientit2; // this is the new reference } } } @@ -168,15 +170,15 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P // The rightmost bestindex is the one with the highest slope. // They should be equal but there might be. - assert(abs(leftmost->first-gradient.rbegin()->first)<0.0001); + assert(abs(leftmost->first-gradient.rbegin()->first) < 0.0001); // A small difference due to rounding error break; } // We have found the next intersection! - pair newd(S,leftmost->second);//new onebest for Sentence S is leftmost->second + pair newd(S, leftmost->second);//new onebest for Sentence S is leftmost->second - if(leftmostx-previnserted->firstfirst < min_int) { // Require that the intersection Point be at least min_int to the right of the previous // one (for this sentence). If not, we replace the previous intersection Point with // this one. @@ -186,45 +188,45 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P // be wrong we are going to replace previnsert by the new one because we do not want to keep // 2 very close threshold: if the minima is there it could be an artifact. - map::iterator tit=thresholdmap.find(leftmostx); - if(tit==previnserted) { + map::iterator tit = thresholdmap.find(leftmostx); + if (tit == previnserted) { // The threshold is the same as before can happen if 2 candidates are the same for example. - assert(previnserted->second.back().first==newd.first); + assert(previnserted->second.back().first == newd.first); previnserted->second.back()=newd; // just replace the 1 best for sentence S // previnsert doesn't change } else { - if(tit==thresholdmap.end()) { + if (tit == thresholdmap.end()) { thresholdmap[leftmostx]=previnserted->second; // We keep the diffs at previnsert thresholdmap.erase(previnserted); // erase old previnsert - previnserted=thresholdmap.find(leftmostx); // point previnsert to the new threshold + previnserted = thresholdmap.find(leftmostx); // point previnsert to the new threshold previnserted->second.back()=newd; // We update the diff for sentence S // Threshold already exists but is not the previous one. } else { // We append the diffs in previnsert to tit before destroying previnsert. tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end()); - assert(tit->second.back().first==newd.first); + assert(tit->second.back().first == newd.first); tit->second.back()=newd; // change diff for sentence S thresholdmap.erase(previnserted); // erase old previnsert - previnserted=tit; // point previnsert to the new threshold + previnserted = tit; // point previnsert to the new threshold } } assert(previnserted != thresholdmap.end()); } else { //normal insertion process - previnserted=AddThreshold(thresholdmap,leftmostx,newd); + previnserted = AddThreshold(thresholdmap, leftmostx, newd); } - gradientit=leftmost; - } // while(gradientit!=gradient.end()){ + gradientit = leftmost; + } // while (gradientit!=gradient.end()){ } // loop on S // Now the thresholdlist is up to date: it contains a list of all the parameter_ts where // the function changed its value, along with the nbest list for the interval after each threshold. map::iterator thrit; - if(verboselevel()>6) { - cerr << "Thresholds:(" < 6) { + cerr << "Thresholds:(" << thresholdmap.size() << ")" << endl; + for (thrit = thresholdmap.begin(); thrit != thresholdmap.end(); thrit++) { cerr << "x: " << thrit->first << " diffs"; for (size_t j = 0; j < thrit->second.size(); ++j) { cerr << " " <second[j].first << "," << thrit->second[j].second; @@ -234,20 +236,20 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P } // Last thing to do is compute the Stat score (i.e., BLEU) and find the minimum. - thrit=thresholdmap.begin(); + thrit = thresholdmap.begin(); ++thrit; // first diff corrrespond to MIN_FLOAT and first1best diffs_t diffs; - for(; thrit!=thresholdmap.end(); thrit++) + for (; thrit != thresholdmap.end(); thrit++) diffs.push_back(thrit->second); - vector scores=GetIncStatScore(first1best,diffs); + vector scores = GetIncStatScore(first1best, diffs); - thrit=thresholdmap.begin(); - statscore_t bestscore=MIN_FLOAT; - float bestx=MIN_FLOAT; + thrit = thresholdmap.begin(); + statscore_t bestscore = MIN_FLOAT; + float bestx = MIN_FLOAT; // We skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best. - assert(scores.size()==thresholdmap.size()); - for(unsigned int sc=0; sc!=scores.size(); sc++) { + assert(scores.size() == thresholdmap.size()); + for (unsigned int sc = 0; sc != scores.size(); sc++) { //cerr << "x=" << thrit->first << " => " << scores[sc] << endl; if (scores[sc] > bestscore) { // This is the score for the interval [lit2->first, (lit2+1)->first] @@ -274,7 +276,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P if (leftx == MIN_FLOAT) { bestx = rightx-1000; } else if (rightx == MAX_FLOAT) { - bestx = leftx+0.1; + bestx = leftx + 0.1; } else { bestx = 0.5 * (rightx + leftx); } @@ -283,39 +285,39 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P ++thrit; } - if(abs(bestx)<0.00015) { + if (abs(bestx) < 0.00015) { // The origin of the line is the best point! We put it back at 0 // so we do not propagate rounding erros. - bestx=0.0; + bestx = 0.0; // Finally, we manage to extract the best score; // now we convert bestx (position on the line) to a point. - if(verboselevel()>4) - cerr<<"best point on line at origin"< 4) + cerr << "best point on line at origin" << endl; } - if(verboselevel()>3) { + if (verboselevel() > 3) { // cerr<<"end Lineopt, bestx="<& bests)const +void Optimizer::Get1bests(const Point& P, vector& bests) const { assert(FData); bests.clear(); bests.resize(size()); - for(unsigned i=0; iget(i).size(); j++) { - float curfs=P*FData->get(i,j); - if(curfs>bestfs) { - bestfs=curfs; - idx=j; + for (j = 0; j < FData->get(i).size(); j++) { + float curfs = P * FData->get(i, j); + if (curfs > bestfs) { + bestfs = curfs; + idx = j; } } bests[i]=idx; @@ -323,18 +325,18 @@ void Optimizer::Get1bests(const Point& P,vector& bests)const } -statscore_t Optimizer::Run(Point& P)const +statscore_t Optimizer::Run(Point& P) const { - if(!FData) { - cerr<<"error trying to optimize without Features loaded"<getReferenceSize()!=FData->size()) { - cerr<<"error length mismatch between feature file and score file"<getReferenceSize() != FData->size()) { + cerr << "error length mismatch between feature file and score file" << endl; exit(2); } @@ -354,108 +356,108 @@ statscore_t Optimizer::Run(Point& P)const } -vector Optimizer::GetIncStatScore(vector thefirst,vector > > thediffs)const +vector Optimizer::GetIncStatScore(vector thefirst, vector > > thediffs) const { assert(scorer); vector theres; - scorer->score(thefirst,thediffs,theres); + scorer->score(thefirst, thediffs, theres); return theres; } -statscore_t SimpleOptimizer::TrueRun(Point& P)const +statscore_t SimpleOptimizer::TrueRun(Point& P) const { - statscore_t prevscore=0; - statscore_t bestscore=MIN_FLOAT; + statscore_t prevscore = 0; + statscore_t bestscore = MIN_FLOAT; Point best; // If P is already defined and provides a score, // We must improve over this score. - if(P.GetScore() > bestscore) { + if (P.GetScore() > bestscore) { bestscore = P.GetScore(); best = P; } - int nrun=0; + int nrun = 0; do { ++nrun; - if(verboselevel()>2&&nrun>1) - cerr<<"last diff="< 2 && nrun > 1) + cerr << "last diff=" << bestscore-prevscore << " nrun " << nrun << endl; + prevscore = bestscore; Point linebest; - for(unsigned int d=0; d4) { + for (unsigned int d = 0; d < Point::getdim()+number_of_random_directions; d++) { + if (verboselevel() > 4) { // cerr<<"minimizing along direction "< " << prevscore << endl; + cerr << "starting point: " << P << " => " << prevscore << endl; } Point direction; - if (d5) { - cerr<<"direction: "<< d << " => " << curscore << endl; - cerr<<"\tending point: "<< linebest << " => " << curscore << endl; + statscore_t curscore = LineOptimize(P, direction, linebest);//find the minimum on the line + if (verboselevel() > 5) { + cerr << "direction: " << d << " => " << curscore << endl; + cerr << "\tending point: "<< linebest << " => " << curscore << endl; } - if(curscore>bestscore) { - bestscore=curscore; - best=linebest; - if(verboselevel()>3) { - cerr<<"new best dir:"< " < bestscore) { + bestscore = curscore; + best = linebest; + if (verboselevel() > 3) { + cerr << "new best dir:" << d << " (" << nrun << ")" << endl; + cerr << "new best Point " << best << " => " << curscore << endl; } } } - P=best; //update the current vector with the best point on all line tested - if(verboselevel()>3) - cerr<kEPS); + P = best; //update the current vector with the best point on all line tested + if (verboselevel() > 3) + cerr << nrun << "\t" << P << endl; + } while (bestscore-prevscore > kEPS); - if(verboselevel()>2) { - cerr<<"end Powell Algo, nrun="< 2) { + cerr << "end Powell Algo, nrun=" << nrun << endl; + cerr << "last diff=" << bestscore-prevscore << endl; + cerr << "\t" << P << endl; } return bestscore; } -statscore_t RandomDirectionOptimizer::TrueRun(Point& P)const +statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const { statscore_t prevscore = P.GetScore(); // do specified number of random direction optimizations unsigned int nrun = 0; unsigned int nrun_no_change = 0; - for(; nrun_no_change4) { - cerr<<"direction: "<< direction << " => " << score; - cerr<<" ("<< (score-prevscore) << ")" << endl; - cerr<<"\tending point: "<< P << " => " << score << endl; + statscore_t score = LineOptimize(P, direction, P); + if (verboselevel() > 4) { + cerr << "direction: " << direction << " => " << score; + cerr << " (" << (score-prevscore) << ")" << endl; + cerr << "\tending point: " << P << " => " << score << endl; } if (score-prevscore > kEPS) - nrun_no_change=0; + nrun_no_change = 0; prevscore = score; } - if(verboselevel()>2) { - cerr<<"end Powell Algo, nrun="< 2) { + cerr << "end Powell Algo, nrun=" << nrun << endl; } return prevscore; } @@ -475,7 +477,7 @@ vector OptimizerFactory::typenames; void OptimizerFactory::SetTypeNames() { - if(typenames.empty()) { + if (typenames.empty()) { typenames.resize(NOPTIMIZER); typenames[POWELL]="powell"; typenames[RANDOM_DIRECTION]="random-direction"; @@ -485,7 +487,7 @@ void OptimizerFactory::SetTypeNames() } vector OptimizerFactory::GetTypeNames() { - if(typenames.empty()) + if (typenames.empty()) SetTypeNames(); return typenames; } @@ -493,38 +495,38 @@ vector OptimizerFactory::GetTypeNames() OptimizerFactory::OptType OptimizerFactory::GetOType(string type) { unsigned int thetype; - if(typenames.empty()) + if (typenames.empty()) SetTypeNames(); - for(thetype=0; thetype i2o,vector start,string type, unsigned int nrandom) +Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim, vector i2o, vector start, string type, unsigned int nrandom) { - OptType T=GetOType(type); - if(T==NOPTIMIZER) { - cerr<<"Error: unknown Optimizer type "< i2O,vector start,unsigned int nrandom); + Optimizer(unsigned Pd, vector i2O, vector start, unsigned int nrandom); void SetScorer(Scorer *_scorer); void SetFData(FeatureData *_FData); virtual ~Optimizer(); - unsigned size()const { - return (FData?FData->size():0); + unsigned size() const { + return FData ? FData->size() : 0; } /** * Generic wrapper around TrueRun to check a few things. Non virtual. */ - statscore_t Run(Point&)const; + statscore_t Run(Point&) const; /** * Main function that performs an optimization. */ - virtual statscore_t TrueRun(Point&)const=0; + virtual statscore_t TrueRun(Point&) const = 0; /** * Given a set of lambdas, get the nbest for each sentence. */ - void Get1bests(const Point& param,vector& bests)const; + void Get1bests(const Point& param,vector& bests) const; /** * Given a set of nbests, get the Statistical score. */ - statscore_t GetStatScore(const vector& nbests)const { + statscore_t GetStatScore(const vector& nbests) const { return scorer->score(nbests); } - statscore_t GetStatScore(const Point& param)const; + statscore_t GetStatScore(const Point& param) const; - vector GetIncStatScore(vector ref,vector > >)const; + vector GetIncStatScore(vector ref, vector > >) const; /** * Get the optimal Lambda and the best score in a particular direction from a given Point. */ - statscore_t LineOptimize(const Point& start,const Point& direction,Point& best)const; + statscore_t LineOptimize(const Point& start, const Point& direction, Point& best) const; }; @@ -71,51 +70,57 @@ public: * Default basic optimizer. * This class implements Powell's method. */ -class SimpleOptimizer: public Optimizer +class SimpleOptimizer : public Optimizer { private: const float kEPS; public: - SimpleOptimizer(unsigned dim,vector i2O,vector start,unsigned int nrandom): Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {} - virtual statscore_t TrueRun(Point&)const; + SimpleOptimizer(unsigned dim, vector i2O, vector start, unsigned int nrandom) + : Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {} + virtual statscore_t TrueRun(Point&) const; }; /** * An optimizer with random directions. */ -class RandomDirectionOptimizer: public Optimizer +class RandomDirectionOptimizer : public Optimizer { private: const float kEPS; public: - RandomDirectionOptimizer(unsigned dim,vector i2O,vector start,unsigned int nrandom): Optimizer(dim,i2O,start,nrandom), kEPS(0.0001) {} - virtual statscore_t TrueRun(Point&)const; + RandomDirectionOptimizer(unsigned dim, vector i2O, vector start, unsigned int nrandom) + : Optimizer(dim, i2O, start, nrandom), kEPS(0.0001) {} + virtual statscore_t TrueRun(Point&) const; }; /** * Dumb baseline optimizer: just picks a random point and quits. */ -class RandomOptimizer: public Optimizer +class RandomOptimizer : public Optimizer { public: - RandomOptimizer(unsigned dim,vector i2O,vector start, unsigned int nrandom): Optimizer(dim,i2O,start,nrandom) {} - virtual statscore_t TrueRun(Point&)const; + RandomOptimizer(unsigned dim, vector i2O, vector start, unsigned int nrandom) + : Optimizer(dim, i2O, start, nrandom) {} + virtual statscore_t TrueRun(Point&) const; }; class OptimizerFactory { public: - // unsigned dim; - //Point Start; static vector GetTypeNames(); - static Optimizer* BuildOptimizer(unsigned dim,vectortooptimize,vector start,string type,unsigned int nrandom); + static Optimizer* BuildOptimizer(unsigned dim, vector tooptimize, vector start, string type, unsigned int nrandom); private: OptimizerFactory() {} ~OptimizerFactory() {} // Add new optimizer here BEFORE NOPTIMZER - enum OptType {POWELL=0,RANDOM_DIRECTION=1,RANDOM,NOPTIMIZER}; + enum OptType { + POWELL = 0, + RANDOM_DIRECTION = 1, + RANDOM,NOPTIMIZER + }; + static OptType GetOType(string); static vector typenames; static void SetTypeNames();