added an (optional) positivity constraint to MERT

This commit is contained in:
tosch 2012-03-30 11:50:23 +02:00
parent 1d9f043e8f
commit 47ee26951e
7 changed files with 78 additions and 24 deletions

View File

@ -113,8 +113,15 @@ public:
size_t getFeatureIndex(const std::string& name) const {
map<std::string, size_t>::const_iterator it = m_feature_name_to_index.find(name);
if (it == m_feature_name_to_index.end())
throw runtime_error("Error: feature " + name + " is unknown");
if (it == m_feature_name_to_index.end()) {
std::string msg = "Error: feature " + name + " is unknown. Known features: ";
for (std::map<std::string, size_t>::const_iterator it = m_feature_name_to_index.begin(); it != m_feature_name_to_index.end(); it++) {
msg += it->first;
msg += ", ";
}
throw runtime_error(msg);
}
return it->second;
}

View File

@ -32,8 +32,8 @@ inline float intersect(float m1, float b1, float m2, float b2)
} // namespace
Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
: m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom)
Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& pos, const vector<parameter_t>& start, unsigned int nrandom)
: m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom), positive(pos)
{
// Warning: the init vector is a full set of parameters, of dimension m_pdim!
Point::m_pdim = Pd;
@ -243,7 +243,16 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
CHECK(scores.size() == thresholdmap.size());
for (unsigned int sc = 0; sc != scores.size(); sc++) {
//cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
if (scores[sc] > bestscore) {
//enforce positivity
Point respoint = origin + direction * thrit->first;
bool is_valid = true;
for (uint k=0; k < respoint.getdim(); k++) {
if (positive[k] && respoint[k] <= 0.0)
is_valid = false;
}
if (is_valid && scores[sc] > bestscore) {
// This is the score for the interval [lit2->first, (lit2+1)->first]
// unless we're at the last score, when it's the score
// for the interval [lit2->first,+inf].

View File

@ -22,8 +22,10 @@ protected:
FeatureDataHandle m_feature_data; // no accessor for them only child can use them
unsigned int m_num_random_directions;
const vector<bool>& positive;
public:
Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom);
Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& positive, const vector<parameter_t>& start, unsigned int nrandom);
void SetScorer(Scorer *scorer) { m_scorer = scorer; }
void SetFeatureData(FeatureDataHandle feature_data) { m_feature_data = feature_data; }
@ -75,8 +77,9 @@ class SimpleOptimizer : public Optimizer
private:
const float kEPS;
public:
SimpleOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {}
SimpleOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start,nrandom), kEPS(0.0001) {}
virtual statscore_t TrueRun(Point&) const;
};
@ -88,8 +91,9 @@ class RandomDirectionOptimizer : public Optimizer
private:
const float kEPS;
public:
RandomDirectionOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, start, nrandom), kEPS(0.0001) {}
RandomDirectionOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start, nrandom), kEPS(0.0001) {}
virtual statscore_t TrueRun(Point&) const;
};
@ -99,8 +103,9 @@ public:
class RandomOptimizer : public Optimizer
{
public:
RandomOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, start, nrandom) {}
RandomOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start, nrandom) {}
virtual statscore_t TrueRun(Point&) const;
};

View File

@ -35,6 +35,7 @@ OptimizerFactory::OptimizerType OptimizerFactory::GetOptimizerType(const string&
Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,
const vector<unsigned>& i2o,
const std::vector<bool>& positive,
const vector<parameter_t>& start,
const string& type,
unsigned int nrandom)
@ -51,13 +52,13 @@ Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,
switch (opt_type) {
case POWELL:
return new SimpleOptimizer(dim, i2o, start, nrandom);
return new SimpleOptimizer(dim, i2o, positive, start, nrandom);
break;
case RANDOM_DIRECTION:
return new RandomDirectionOptimizer(dim, i2o, start, nrandom);
return new RandomDirectionOptimizer(dim, i2o, positive, start, nrandom);
break;
case RANDOM:
return new RandomOptimizer(dim, i2o, start, nrandom);
return new RandomOptimizer(dim, i2o, positive, start, nrandom);
break;
default:
cerr << "Error: unknown optimizer" << type << endl;

View File

@ -27,6 +27,7 @@ class OptimizerFactory
static Optimizer* BuildOptimizer(unsigned dim,
const std::vector<unsigned>& to_optimize,
const std::vector<bool>& positive,
const std::vector<parameter_t>& start,
const std::string& type,
unsigned int nrandom);

View File

@ -9,11 +9,11 @@ namespace {
inline bool CheckBuildOptimizer(unsigned dim,
const vector<unsigned>& to_optimize,
const vector<bool>& positive,
const vector<parameter_t>& start,
const string& type,
unsigned int num_random) {
boost::scoped_ptr<Optimizer> optimizer(
OptimizerFactory::BuildOptimizer(dim, to_optimize, start, type, num_random));
boost::scoped_ptr<Optimizer> optimizer(OptimizerFactory::BuildOptimizer(dim, to_optimize, positive, start, type, num_random));
return optimizer.get() != NULL;
}
@ -39,8 +39,11 @@ BOOST_AUTO_TEST_CASE(optimizer_build) {
start.push_back(0.1);
start.push_back(0.2);
const unsigned int num_random = 1;
std::vector<bool> positive(dim);
for (int k=0; k < dim; k++)
positive[k] = false;
BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, start, "powell", num_random));
BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, start, "random", num_random));
BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, start, "random-direction", num_random));
BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, positive, start, "powell", num_random));
BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, positive, start, "random", num_random));
BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, positive, start, "random-direction", num_random));
}

View File

@ -36,6 +36,7 @@ const char kDefaultScorer[] = "BLEU";
const char kDefaultScorerFile[] = "statscore.data";
const char kDefaultFeatureFile[] = "features.data";
const char kDefaultInitFile[] = "init.opt";
const char kDefaultPositiveString[] = "";
// Used when saving optimized weights.
const char kOutputFile[] = "weights.txt";
@ -108,6 +109,7 @@ void usage(int ret)
cerr << "[--scfile|-S] comma separated list of scorer data files (default " << kDefaultScorerFile << ")" << endl;
cerr << "[--ffile|-F] comma separated list of feature data files (default " << kDefaultFeatureFile << ")" << endl;
cerr << "[--ifile|-i] the starting point data file (default " << kDefaultInitFile << ")" << endl;
cerr << "[--positive|-P] indexes with positive weights (default none)"<<endl;
#ifdef WITH_THREADS
cerr << "[--threads|-T] use multiple threads (default 1)" << endl;
#endif
@ -125,6 +127,7 @@ static struct option long_options[] = {
{"rseed", required_argument, 0, 'r'},
{"optimize", 1, 0, 'o'},
{"pro", required_argument, 0, 'p'},
{"positive",1,0,'P'},
{"type", 1, 0, 't'},
{"sctype", 1, 0, 's'},
{"scconfig", required_argument, 0, 'c'},
@ -154,6 +157,7 @@ struct ProgramOption {
string scorer_file;
string feature_file;
string init_file;
string positive_string;
size_t num_threads;
float shard_size;
size_t shard_count;
@ -171,6 +175,7 @@ struct ProgramOption {
scorer_file(kDefaultScorerFile),
feature_file(kDefaultFeatureFile),
init_file(kDefaultInitFile),
positive_string(kDefaultPositiveString),
num_threads(1),
shard_size(0),
shard_count(0) { }
@ -180,7 +185,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
int c;
int option_index;
while ((c = getopt_long(argc, argv, "o:r:d:n:m:t:s:S:F:v:p:", long_options, &option_index)) != -1) {
while ((c = getopt_long(argc, argv, "o:r:d:n:m:t:s:S:F:v:p:P:", long_options, &option_index)) != -1) {
switch (c) {
case 'o':
opt->to_optimize_str = string(optarg);
@ -234,6 +239,9 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
case 'h':
usage(0);
break;
case 'P':
opt->positive_string = string(optarg);
break;
default:
usage(1);
}
@ -253,6 +261,7 @@ int main(int argc, char **argv)
vector<vector<parameter_t> > start_list;
vector<parameter_t> min;
vector<parameter_t> max;
vector<bool> positive;
// NOTE: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result!
if (option.pdim < 0)
@ -348,9 +357,7 @@ int main(int argc, char **argv)
scorer->setScoreData(data.getScoreData().get());
//ADDED_BY_TS
data.removeDuplicates();
//END_ADDED
PrintUserTime("Data loaded");
@ -390,6 +397,27 @@ int main(int argc, char **argv)
}
}
positive.resize(option.pdim);
for (int i=0; i<option.pdim; i++)
positive[i] = false;
if (option.positive_string.length() > 0) {
// Parse string to get weights that need to be positive
std::string substring;
int index;
while (!option.positive_string.empty()) {
getNextPound(option.positive_string, substring, ",");
index = data.getFeatureIndex(substring);
//index = strtol(substring.c_str(), NULL, 10);
if (index >= 0 && index < option.pdim) {
positive[index] = true;
} else {
cerr << "Index " << index << " is out of bounds in positivity list. Allowed indexes are [0," << (option.pdim-1) << "]." << endl;
}
}
}
// treat sparse features just like regular features
if (data.hasSparseFeatures()) {
data.mergeSparseFeatures();
@ -433,7 +461,7 @@ int main(int argc, char **argv)
data_ref = shards[i]; //use the sharded data if it exists
vector<OptimizationTask*>& tasks = allTasks[i];
Optimizer *optimizer = OptimizerFactory::BuildOptimizer(option.pdim, to_optimize, start_list[0], option.optimize_type, option.nrandom);
Optimizer *optimizer = OptimizerFactory::BuildOptimizer(option.pdim, to_optimize, positive, start_list[0], option.optimize_type, option.nrandom);
optimizer->SetScorer(data_ref.getScorer());
optimizer->SetFeatureData(data_ref.getFeatureData());
// A task for each start point