2008-05-14 11:57:45 +04:00
|
|
|
/*
|
|
|
|
* FeatureData.h
|
|
|
|
* met - Minimum Error Training
|
|
|
|
*
|
|
|
|
* Created by Nicola Bertoldi on 13/05/08.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef FEATURE_DATA_H
|
|
|
|
#define FEATURE_DATA_H
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
#include <limits>
|
|
|
|
#include <vector>
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
#include "Util.h"
|
|
|
|
#include "FeatureArray.h"
|
|
|
|
|
|
|
|
class FeatureData
|
|
|
|
{
|
2008-05-27 20:50:52 +04:00
|
|
|
|
2008-05-14 11:57:45 +04:00
|
|
|
protected:
|
2008-05-27 20:50:52 +04:00
|
|
|
featdata_t array_;
|
|
|
|
idx2name idx2arrayname_; //map from index to name of array
|
|
|
|
name2idx arrayname2idx_; //map from name to index of array
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2008-06-05 21:03:54 +04:00
|
|
|
|
2008-05-14 11:57:45 +04:00
|
|
|
private:
|
2008-05-27 20:50:52 +04:00
|
|
|
size_t number_of_features;
|
2008-06-05 11:23:34 +04:00
|
|
|
std::string features;
|
2008-06-05 21:03:54 +04:00
|
|
|
|
|
|
|
map<std::string, size_t> featname2idx_; //map from name to index of features
|
|
|
|
map<size_t, std::string> idx2featname_; //map from index to name of features
|
2008-05-14 11:57:45 +04:00
|
|
|
|
|
|
|
public:
|
|
|
|
FeatureData();
|
|
|
|
|
|
|
|
~FeatureData(){};
|
|
|
|
|
|
|
|
inline void clear() { array_.clear(); }
|
|
|
|
|
2008-05-27 20:50:52 +04:00
|
|
|
inline FeatureArray get(const std::string& idx){ return array_.at(getIndex(idx)); }
|
|
|
|
inline FeatureArray& get(size_t idx){ return array_.at(idx); }
|
|
|
|
inline const FeatureArray& get(size_t idx) const{ return array_.at(idx); }
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2008-05-27 20:50:52 +04:00
|
|
|
inline bool exists(const std::string & sent_idx){ return exists(getIndex(sent_idx)); }
|
|
|
|
inline bool exists(int sent_idx){ return (sent_idx>-1 && sent_idx<(int) array_.size())?true:false; }
|
2008-05-14 18:16:05 +04:00
|
|
|
|
2008-05-27 20:50:52 +04:00
|
|
|
inline FeatureStats& get(size_t i, size_t j){ return array_.at(i).get(j); }
|
|
|
|
inline const FeatureStats& get(size_t i, size_t j) const { return array_.at(i).get(j); }
|
2008-05-14 18:16:05 +04:00
|
|
|
|
|
|
|
void add(FeatureArray& e);
|
2008-05-27 20:50:52 +04:00
|
|
|
void add(FeatureStats& e, const std::string& sent_idx);
|
2008-05-14 11:57:45 +04:00
|
|
|
|
|
|
|
inline size_t size(){ return array_.size(); }
|
2008-06-05 11:23:34 +04:00
|
|
|
inline size_t NumberOfFeatures() const{ return number_of_features; }
|
|
|
|
inline void NumberOfFeatures(size_t v){ number_of_features = v; }
|
|
|
|
inline std::string Features() const{ return features; }
|
|
|
|
inline void Features(const std::string f){ features = f; }
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2008-05-14 18:16:05 +04:00
|
|
|
void save(const std::string &file, bool bin=false);
|
|
|
|
void save(ofstream& outFile, bool bin=false);
|
|
|
|
inline void save(bool bin=false){ save("/dev/stdout", bin); }
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2008-05-14 18:16:05 +04:00
|
|
|
void load(ifstream& inFile);
|
|
|
|
void load(const std::string &file);
|
2008-05-27 20:50:52 +04:00
|
|
|
|
|
|
|
bool check_consistency();
|
|
|
|
void setIndex();
|
|
|
|
|
|
|
|
inline int getIndex(const std::string& idx){
|
|
|
|
name2idx::iterator i = arrayname2idx_.find(idx);
|
|
|
|
if (i!=arrayname2idx_.end())
|
|
|
|
return i->second;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline std::string getIndex(size_t idx){
|
|
|
|
idx2name::iterator i = idx2arrayname_.find(idx);
|
|
|
|
if (i!=idx2arrayname_.end())
|
|
|
|
throw runtime_error("there is no entry at index " + idx);
|
|
|
|
return i->second;
|
|
|
|
}
|
2008-06-05 21:03:54 +04:00
|
|
|
|
|
|
|
|
|
|
|
bool existsFeatureNames(){ return (idx2featname_.size() > 0)?true:false; };
|
|
|
|
|
|
|
|
std::string getFeatureName(size_t idx){
|
|
|
|
if (idx >= idx2featname_.size())
|
|
|
|
throw runtime_error("Error: you required an too big index");
|
|
|
|
return idx2featname_[idx];
|
|
|
|
};
|
|
|
|
|
|
|
|
size_t getFeatureIndex(const std::string& name){
|
2010-01-08 18:12:28 +03:00
|
|
|
if (featname2idx_.find(name)==featname2idx_.end())
|
|
|
|
throw runtime_error("Error: feature " + name +" is unknown");
|
2008-06-05 21:03:54 +04:00
|
|
|
return featname2idx_[name];
|
|
|
|
};
|
|
|
|
|
|
|
|
void setFeatureMap(const std::string feat);
|
2008-05-14 11:57:45 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2008-05-14 15:11:55 +04:00
|
|
|
#endif
|
2008-05-15 10:44:36 +04:00
|
|
|
|