fixed a bug related to the handling of feature names

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1821 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
nicolabertoldi 2008-06-05 11:26:54 +00:00
parent 37cf805139
commit 44d7e0e0f7
5 changed files with 56 additions and 40 deletions

View File

@ -62,33 +62,31 @@ void Data::loadnbest(const std::string &file)
nextPound = getNextPound(stringBuf, substring, "|||"); //third field
if (!existsFeatureNames()){
std::string stringsupport=substring;
// adding feature names
std::string tmpname="";
size_t tmpidx=0;
while (!stringsupport.empty()){
// TRACE_ERR("Decompounding: " << substring << std::endl);
nextPound = getNextPound(stringsupport, subsubstring);
// string ending with ":" are skipped, because they are the names of the features
if ((loc = subsubstring.find(":")) != subsubstring.length()-1){
featname2idx_[tmpname+"_"+stringify(tmpidx)]=idx2featname_.size();
idx2featname_[idx2featname_.size()]=tmpname+"_"+stringify(tmpidx);
tmpidx++;
}
else{
tmpidx=0;
tmpname=subsubstring.substr(0,subsubstring.size() - 1);
}
}
std::string features="";
for (size_t i=0; i<idx2featname_.size(); i++)
features+=idx2featname_[i]+" ";
std::string stringsupport=substring;
// adding feature names
std::string features="";
std::string tmpname="";
NumberOfFeatures(idx2featname_.size());
Features(features);
TRACE_ERR("number_of_features: " << NumberOfFeatures() << std::endl);
TRACE_ERR("features: " << Features() << std::endl);
size_t tmpidx=0;
while (!stringsupport.empty()){
// TRACE_ERR("Decompounding: " << substring << std::endl);
nextPound = getNextPound(stringsupport, subsubstring);
// string ending with ":" are skipped, because they are the names of the features
if ((loc = subsubstring.find(":")) != subsubstring.length()-1){
features+=tmpname+"_"+stringify(tmpidx)+" ";
tmpidx++;
}
else{
tmpidx=0;
tmpname=subsubstring.substr(0,subsubstring.size() - 1);
}
}
NumberOfFeatures(idx2featname_.size());
Features(features);
setFeatureMap();
}
// adding features
@ -103,6 +101,18 @@ void Data::loadnbest(const std::string &file)
}
featdata->add(featentry,sentence_index);
}
inp.close();
}
void Data::setFeatureMap()
{
std::string substring, stringBuf;
stringBuf=Features();
while (!stringBuf.empty()){
getNextPound(stringBuf, substring);
featname2idx_[substring]=idx2featname_.size();
idx2featname_[idx2featname_.size()]=substring;
}
}

View File

@ -54,6 +54,7 @@ public:
void load(const std::string &featfile,const std::string &scorefile){
featdata->load(featfile);
scoredata->load(scorefile);
setFeatureMap();
}
void save(const std::string &featfile,const std::string &scorefile, bool bin=false){
@ -62,8 +63,17 @@ public:
}
bool existsFeatureNames(){ return (idx2featname_.size() > 0)?true:false; };
std::string getFeatureName(size_t idx){ return idx2featname_[idx]; };
size_t getFeatureIndex(const std::string& name){ return featname2idx_[name]; };
std::string getFeatureName(size_t idx){
if (idx >= idx2featname_.size())
throw runtime_error("Error: you required the " + idx + "-th feature, but there are only " + (idx2featname_.size()) + " features");
return idx2featname_[idx];
};
size_t getFeatureIndex(const std::string& name){
if (featname2idx_.find(name)!=featname2idx_.end())
throw runtime_error("Error: feature is unknown");
return featname2idx_[name];
};
void setFeatureMap();
};

View File

@ -39,9 +39,7 @@ void FeatureData::load(ifstream& inFile)
{
FeatureArray entry;
int iter=0;
while (!inFile.eof()){
if (!inFile.good()){
std::cerr << "ERROR FeatureData::load inFile.good()" << std::endl;
@ -50,14 +48,15 @@ void FeatureData::load(ifstream& inFile)
entry.clear();
entry.load(inFile);
if (entry.size() == 0){
return;
if (entry.size() == 0)
break;
if (size() == 0){
number_of_features=entry.NumberOfFeatures();
features=entry.Features();
}
add(entry);
iter++;
}
if (size()>0)
number_of_features=get(0).NumberOfFeatures();
}

View File

@ -63,7 +63,6 @@ void ScoreArray::loadtxt(ifstream& inFile)
std::string::size_type loc;
TRACE_ERR("starting loadtxt..." << std::endl);
std::getline(inFile, stringBuf);
if (!inFile.good()){
return;
@ -93,7 +92,7 @@ void ScoreArray::loadtxt(ifstream& inFile)
std::getline(inFile, stringBuf);
if (!stringBuf.empty()){
// TRACE_ERR("Reading: " << stringBuf << std::endl);
if ((loc = stringBuf.find(SCORES_TXT_END)) != 0){
if ((loc = stringBuf.find(SCORES_TXT_END)) != 0){
TRACE_ERR("ERROR: ScoreArray::loadtxt(): Wrong footer");
return;
}

View File

@ -46,8 +46,6 @@ void ScoreData::load(ifstream& inFile)
int iter=0;
while (!inFile.eof()){
TRACE_ERR("iter " << iter << " size " << size() << std::endl);
entry.clear();
entry.loadtxt(inFile);