change from int to unsigned where needed

add some debugging output (to remove later)

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1794 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
nicolabertoldi 2008-05-23 11:48:16 +00:00
parent e8d26f4f5c
commit c9593648bb
14 changed files with 84 additions and 64 deletions

View File

@ -90,7 +90,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles) {
}
void BleuScorer::prepareStats(int sid, const string& text, ScoreStats& entry) {
void BleuScorer::prepareStats(unsigned int sid, const string& text, ScoreStats& entry) {
//cerr << text << endl;
//dump_counts(*_refcounts[sid]);
if (sid >= _refcounts.size()) {

View File

@ -131,6 +131,6 @@ void FeatureArray::load(const std::string &file, bool bin)
void FeatureArray::merge(FeatureArray& e)
{
//dummy implementation
for (int i=0; i< e.size(); i++)
for (unsigned int i=0; i< e.size(); i++)
add(e.get(i));
}

View File

@ -48,7 +48,7 @@ public:
return array_[i];
#endif
}
inline bool exists(int i){ return (i<array_.size())?true:false; }
inline bool exists(unsigned int i){ return (i<array_.size())?true:false; }
inline void setIndex(){ };

View File

@ -19,7 +19,7 @@ array_(stats.array_)
FeatureStats::FeatureStats(const size_t size)
{
for(int i = 0; i < size; i++)
for(unsigned int i = 0; i < size; i++)
array_.push_back(0);
};
@ -32,7 +32,7 @@ FeatureStats::FeatureStats(std::string &theString)
void FeatureStats::set(std::string &theString)
{
std::string substring, stringBuf;
std::string::size_type loc;
int nextPound;
FeatureStatsType sc;
// TRACE_ERR("Decompounding string: " << theString << std::endl);
@ -79,7 +79,7 @@ void FeatureStats::savetxt(std::ofstream& outFile)
outFile << " " << *i;
i++;
}
outFile << std::endl;
// outFile << std::endl;
}

View File

@ -9,9 +9,9 @@ Scorer.o \
Optimizer.o \
ifndef DEBUG
CFLAGS=-O3 -DTRACE_ENABLE
CFLAGS=-O3 -DTRACE_ENABLE -Wall
else
CFLAGS=-DTRACE_ENABLE -g -DDEBUG
CFLAGS=-DTRACE_ENABLE -g -DDEBUG -Wall
endif

View File

@ -30,17 +30,18 @@ Optimizer::Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start)
//warning: the init vector is a full set of parameters, of dimension pdim!
Point::pdim=Pd;
assert(start.size()==Pd);
Point::dim=i2O.size();
Point::optindices=i2O;
if(Point::pdim<Point::dim){
for(int i=0;i<Point::pdim;i++){
int j;
if (Point::pdim<Point::dim){
for (unsigned int i=0;i<Point::pdim;i++){
unsigned int j;
for(j=0;j<Point::dim;j++)
if(i==i2O[j])
break;
if (i==i2O[j])
break;
if(j==Point::dim)//the index i wasnt found on optindices, it is a fixed index, we use the valu of hte start vector
Point::fixedweights[i]=start[i];
Point::fixedweights[i]=start[i];
}
}
};
@ -53,10 +54,9 @@ Optimizer::~Optimizer(){
statscore_t Optimizer::GetStatScore(const Point& param)const{
vector<unsigned> bests;
Get1bests(param,bests);
//cerr << "1BESTS: ";
//copy(bests.begin(),bests.end(),ostream_iterator<unsigned>(cerr," "));
statscore_t score = GetStatScore(bests);
//cerr << " score = " << score << endl;
cerr << "1BESTS: " << param << " => " << score << endl;
return score;
};
@ -91,11 +91,11 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
float min_int=0.0001;
//typedef pair<unsigned,unsigned> diff;//first the sentence that changes, second is the new 1best for this sentence
//list<threshold> thresholdlist;
map<float,diff_t> thresholdmap;
thresholdmap[MIN_FLOAT]=diff_t();
vector<unsigned> first1best;//the vector of nbests for x=-inf
for(int S=0;S<size();S++){
for(unsigned int S=0;S<size();S++){
map<float,diff_t >::iterator previnserted=thresholdmap.begin();
//first we determine the translation with the best feature score for each sentence and each value of x
//cerr << "Sentence " << S << endl;
@ -212,7 +212,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
statscore_t bestscore=MIN_FLOAT;
float bestx=MIN_FLOAT;
assert(scores.size()==thresholdmap.size());//we skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best
for(int sc=0;sc!=scores.size();sc++){
for(unsigned int sc=0;sc!=scores.size();sc++){
//cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
if (scores[sc] > bestscore) {
//This is the score for the interval [lit2->first, (lit2+1)->first]
@ -297,8 +297,12 @@ statscore_t Optimizer::Run(Point& P)const{
cerr<<"error size mismatch between FeatureData and Scorer"<<endl;
exit(2);
}
if(verboselevel()>1)
cerr<<"starting point: "<< P;
statscore_t score=GetStatScore(P);
P.score=score;
if(verboselevel()>1)
cerr<<"starting point: "<< P << " => "<< P.score << endl;
statscore_t s=TrueRun(P);
P.score=s;//just in case its not done in TrueRun
if (verboselevel()>1)
@ -323,6 +327,9 @@ vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst,vector<
float SimpleOptimizer::eps=0.0001;
statscore_t SimpleOptimizer::TrueRun(Point& P)const{
statscore_t score=GetStatScore(P);
statscore_t prevscore=0;
statscore_t bestscore=MIN_FLOAT;
Point best;
@ -335,13 +342,13 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const{
Point linebest;
for(int d=0;d<Point::getdim();d++){
for(unsigned int d=0;d<Point::getdim();d++){
if(verboselevel()>4){
// cerr<<"minimizing along direction "<<d<<endl;
cerr<<"starting point: " << P << " => " << prevscore << endl;
}
Point direction;
for(int i=0;i<Point::getdim();i++)
for(unsigned int i=0;i<Point::getdim();i++)
direction[i];
direction[d]=1.0;
statscore_t curscore=LineOptimize(P,direction,linebest);//find the minimum on the line
@ -378,7 +385,7 @@ Just return a random point*/
statscore_t RandomOptimizer::TrueRun(Point& P)const{
vector<parameter_t> min(Point::getdim());
vector<parameter_t> max(Point::getdim());
for(int d=0;d<Point::getdim();d++){
for(unsigned int d=0;d<Point::getdim();d++){
min[d]=0.0;
max[d]=1.0;
}
@ -405,7 +412,7 @@ vector<string> OptimizerFactory::GetTypeNames(){
}
OptimizerFactory::OptType OptimizerFactory::GetOType(string type){
int thetype;
unsigned int thetype;
if(typenames.empty())
SetTypeNames();
for(thetype=0;thetype<typenames.size();thetype++)
@ -420,7 +427,7 @@ Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,vector<unsigned> i2o,ve
if(T==NOPTIMIZER){
cerr<<"Error: unknown Optimizer type "<<type<<endl;
cerr<<"Known Algorithm are:"<<endl;
int thetype;
unsigned int thetype;
for(thetype=0;thetype<typenames.size();thetype++)
cerr<<typenames[thetype]<<endl;
throw ("unknown Optimizer Type");

View File

@ -32,7 +32,7 @@ void PerScorer::setReferenceFiles(const vector<string>& referenceFiles) {
}
void PerScorer::prepareStats(int sid, const string& text, ScoreStats& entry) {
void PerScorer::prepareStats(unsigned int sid, const string& text, ScoreStats& entry) {
if (sid >= _reflengths.size()) {
stringstream msg;
msg << "Sentence id (" << sid << ") not found in reference set";

View File

@ -16,17 +16,17 @@ unsigned Point::ncall=0;
void Point::Randomize(const vector<parameter_t>& min,const vector<parameter_t>& max){
assert(min.size()==Point::dim);
assert(max.size()==Point::dim);
for (int i=0; i<size(); i++)
for (unsigned int i=0; i<size(); i++)
operator[](i)= min[i] + (float)random()/(float)RAND_MAX * (float)(max[i]-min[i]);
}
void Point::Normalize(){
parameter_t norm=0.0;
for (int i=0; i<size(); i++)
for (unsigned int i=0; i<size(); i++)
norm+= operator[](i)*operator[](i);
if(norm!=0.0){
norm=sqrt(norm);
for (int i=0; i<size(); i++)
for (unsigned int i=0; i<size(); i++)
operator[](i)/=norm;
}
}
@ -34,11 +34,11 @@ void Point::Normalize(){
//Can initialize from a vector of dim or pdim
Point::Point(const vector<parameter_t>& init):vector<parameter_t>(Point::dim){
if(init.size()==dim){
for (int i=0; i<Point::dim; i++)
for (unsigned int i=0; i<Point::dim; i++)
operator[](i)=init[i];
}else{
assert(init.size()==pdim);
for (int i=0; i<Point::dim; i++)
for (unsigned int i=0; i<Point::dim; i++)
operator[](i)=init[optindices[i]];
}
};
@ -77,10 +77,11 @@ Point Point::operator*(float l)const{
ostream& operator<<(ostream& o,const Point& P){
vector<parameter_t> w=P.GetAllWeights();
for(int i=0;i<Point::pdim;i++)
o<<w[i]<<' ';
// o<<endl;
return o;
// o << "[" << Point::pdim << "] ";
for(unsigned int i=0;i<Point::pdim;i++)
o << w[i] << " ";
// o << "=> " << P.GetScore();
return o;
};
vector<parameter_t> Point::GetAllWeights()const{
@ -89,10 +90,10 @@ vector<parameter_t> Point::GetAllWeights()const{
w=*this;
}else{
w.resize(pdim);
for (int i=0; i<size(); i++)
for (unsigned int i=0; i<size(); i++)
w[optindices[i]]=operator[](i);
for(map<unsigned,float >::iterator it=fixedweights.begin();it!=fixedweights.end();it++)
w[it->first]=it->second;
for(map<unsigned,float >::iterator it=fixedweights.begin();it!=fixedweights.end();it++)
w[it->first]=it->second;
}
return w;
};

View File

@ -14,17 +14,17 @@ class Point:public vector<parameter_t>{
friend class Optimizer;
private:
/**The indices over which we optimize*/
static vector<unsigned> optindices;
static vector<unsigned int> optindices;
/**dimension of optindices and of the parent vector*/
static unsigned dim;
static unsigned int dim;
/**fixed weights in case of partial optimzation*/
static map<unsigned,parameter_t> fixedweights;
static map<unsigned int,parameter_t> fixedweights;
/**total size of the parameter space; we have pdim=FixedWeight.size()+optinidices.size()*/
static unsigned pdim;
static unsigned ncall;
static unsigned int pdim;
static unsigned int ncall;
public:
static unsigned getdim(){return dim;}
static unsigned getpdim(){return pdim;}
static unsigned int getdim(){return dim;}
static unsigned int getpdim(){return pdim;}
static bool OptimizeAll(){return fixedweights.empty();};
statscore_t score;
Point():vector<parameter_t>(dim){};
@ -39,6 +39,7 @@ class Point:public vector<parameter_t>{
void Normalize();
/**return a vector of size pdim where all weights have been put(including fixed ones)*/
vector<parameter_t> GetAllWeights()const;
statscore_t GetScore()const { return score; };
};
#endif

View File

@ -38,7 +38,7 @@ public:
inline void clear() { array_.clear(); }
inline ScoreArray get(int i){ return array_.at(i); }
inline bool exists(int i){ return (i<array_.size())?true:false; }
inline bool exists(unsigned int i){ return (i<array_.size())?true:false; }
inline ScoreStats get(int i, int j){ return array_.at(i).get(j); }

View File

@ -19,7 +19,7 @@ array_(stats.array_)
ScoreStats::ScoreStats(const size_t size)
{
for(int i = 0; i < size; i++)
for(unsigned int i = 0; i < size; i++)
array_.push_back(0);
};
@ -32,7 +32,7 @@ ScoreStats::ScoreStats(std::string &theString)
void ScoreStats::set(std::string &theString)
{
std::string substring, stringBuf;
std::string::size_type loc;
int nextPound;
ScoreStatsType sc;
while (!theString.empty()){

View File

@ -28,8 +28,9 @@ class Scorer {
public:
Scorer(const string& name): _name(name), _scoreData(0),_preserveCase(false) {}
Scorer(const string& name): _name(name), _scoreData(0),_preserveCase(false) {};
virtual ~Scorer(){};
/**
* set the reference files. This must be called before prepareStats.
**/
@ -141,8 +142,9 @@ class Scorer {
class StatisticsBasedScorer : public Scorer {
public:
StatisticsBasedScorer(const string& name): Scorer(name) {}
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
StatisticsBasedScorer(const string& name): Scorer(name) {}
~StatisticsBasedScorer(){};
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
statscores_t& scores);
protected:
@ -162,7 +164,7 @@ class BleuScorer: public StatisticsBasedScorer {
public:
BleuScorer() : StatisticsBasedScorer("BLEU"),_refLengthStrategy(SHORTEST) {}
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
virtual void prepareStats(unsigned int sid, const string& text, ScoreStats& entry);
static const int LENGTH;
protected:
@ -171,6 +173,7 @@ class BleuScorer: public StatisticsBasedScorer {
private:
//no copy
BleuScorer(const BleuScorer&);
~BleuScorer(){};
BleuScorer& operator=(const BleuScorer&);
@ -229,7 +232,7 @@ class PerScorer: public StatisticsBasedScorer {
public:
PerScorer() : StatisticsBasedScorer("PER") {}
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
virtual void prepareStats(unsigned int sid, const string& text, ScoreStats& entry);
protected:
@ -239,6 +242,7 @@ class PerScorer: public StatisticsBasedScorer {
//no copy
PerScorer(const PerScorer&);
~PerScorer(){};
PerScorer& operator=(const PerScorer&);
// data extracted from reference files

View File

@ -22,7 +22,7 @@ int setverboselevel(int v){
int getNextPound(std::string &theString, std::string &substring, const std::string delimiter)
{
int pos = 0;
unsigned int pos = 0;
//skip all occurrences of delimiter
while ( pos == 0 )

View File

@ -31,8 +31,9 @@ void usage(void) {
cerr<<"[-t\tthe optimizer(default powell)]"<<endl;
cerr<<"[--sctype|-s] the scorer type (default BLEU)"<<endl;
cerr<<"[--scfile|-S] the scorer data file (default score.data)"<<endl;
cerr<<"[--ffile|-F] the feature data file data file (default feature.data)"<<endl;
cerr<<"[-v] verbose level"<<endl;
cerr<<"[--ffile|-F] the feature data file (default feature.data)"<<endl;
cerr<<"[--ifile|-i] the starting point data file (default init.opt)"<<endl;
cerr<<"[-v] verbose level"<<endl;
exit(1);
}
@ -45,7 +46,8 @@ static struct option long_options[] =
{"sctype",1,0,'s'},
{"scfile",1,0,'S'},
{"ffile",1,0,'F'},
{"verbose",1,0,'v'},
{"ifile",1,0,'i'},
{"verbose",1,0,'v'},
{0, 0, 0, 0}
};
int option_index;
@ -58,7 +60,8 @@ int main (int argc, char **argv) {
string scorertype("BLEU");
string scorerfile("statscore.data");
string featurefile("features.data");
vector<unsigned> tooptimize;
string initfile("init.opt");
vector<unsigned> tooptimize;
vector<parameter_t> start;
while ((c=getopt_long (argc, argv, "d:n:t:s:S:F:v:", long_options, &option_index)) != -1) {
switch (c) {
@ -80,6 +83,9 @@ int main (int argc, char **argv) {
case 'F':
featurefile=string(optarg);
break;
case 'i':
initfile=string(optarg);
break;
case 'v':
setverboselevel(strtol(optarg,NULL,10));
break;
@ -98,9 +104,9 @@ int main (int argc, char **argv) {
for(i=0;i<pdim;i++)
tooptimize[i]=i;
}
ifstream opt("init.opt");
ifstream opt(initfile.c_str());
if(opt.fail()){
cerr<<"could not open init.opt"<<endl;
cerr<<"could not open initfile: " << initfile << endl;
exit(3);
}
start.resize(pdim);//to do:read from file
@ -108,7 +114,7 @@ int main (int argc, char **argv) {
for( j=0;j<pdim&&!opt.fail();j++)
opt>>start[j];
if(j<pdim){
cerr<<"error could not initialize start point with init.opt"<<endl;
cerr<<"error could not initialize start point with " << initfile << endl;
exit(3);
}
@ -136,7 +142,7 @@ int main (int argc, char **argv) {
vector<parameter_t> min(Point::getdim());
vector<parameter_t> max(Point::getdim());
for(int d=0;d<Point::getdim();d++){
for(unsigned int d=0;d<Point::getdim();d++){
min[d]=0.0;
max[d]=1.0;
}
@ -158,6 +164,7 @@ int main (int argc, char **argv) {
if(ntry>1)
cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl;
cerr<<"best score: "<<best<<endl;
cerr << "Best point: " << bestP << " => " << best << endl;
ofstream res("weights.txt");
res<<bestP<<endl;
timer.stop("Stopping...");