/** * \description This is the main for the new version of the mert algorithm developed during the 2nd MT marathon */ #include #include #include #include #include #include #include #include #include "Data.h" #include "Point.h" #include "Scorer.h" #include "ScorerFactory.h" #include "ScoreData.h" #include "FeatureData.h" #include "Optimizer.h" #include "Types.h" #include "Timer.h" #include "Util.h" #include "../moses/src/ThreadPool.h" using namespace std; namespace { /** * Runs an optimisation, or a random restart. */ class OptimizationTask : public Moses::Task { public: OptimizationTask(Optimizer* optimizer, const Point& point) : m_optimizer(optimizer), m_point(point) {} ~OptimizationTask() {} virtual void Run() { m_score = m_optimizer->Run(m_point); } virtual bool DeleteAfterExecution() { return false; } void resetOptimizer() { if (m_optimizer) { delete m_optimizer; m_optimizer = NULL; } } statscore_t getScore() const { return m_score; } const Point& getPoint() const { return m_point; } private: // Do not allow the user to instanciate without arguments. OptimizationTask() {} Optimizer* m_optimizer; Point m_point; statscore_t m_score; }; void usage(int ret) { cerr << "usage: mert -d (mandatory)" << endl; cerr << "[-n] retry ntimes (default 1)" << endl; cerr << "[-m] number of random directions in powell (default 0)"<< endl; cerr << "[-o] the indexes to optimize(default all)" << endl; cerr << "[-t] the optimizer(default powell)" << endl; cerr << "[-r] the random seed (defaults to system clock)" << endl; cerr << "[--sctype|-s] the scorer type (default BLEU)" << endl; cerr << "[--scconfig|-c] configuration string passed to scorer" << endl; cerr << "[--scfile|-S] comma separated list of scorer data files (default score.data)" << endl; cerr << "[--ffile|-F] comma separated list of feature data files (default feature.data)" << endl; cerr << "[--ifile|-i] the starting point data file (default init.opt)" << endl; #ifdef WITH_THREADS cerr << "[--threads|-T] use multiple threads (default 1)" << endl; #endif cerr << "[--shard-count] Split data into shards, optimize for each shard and average" << endl; cerr << "[--shard-size] Shard size as proportion of data. If 0, use non-overlapping shards" << endl; cerr << "[-v] verbose level" << endl; cerr << "[--help|-h] print this message and exit" << endl; exit(ret); } static struct option long_options[] = { {"pdim", 1, 0, 'd'}, {"ntry", 1, 0, 'n'}, {"nrandom", 1, 0, 'm'}, {"rseed", required_argument, 0, 'r'}, {"optimize", 1, 0, 'o'}, {"pro", required_argument, 0, 'p'}, {"type", 1, 0, 't'}, {"sctype", 1, 0, 's'}, {"scconfig", required_argument, 0, 'c'}, {"scfile", 1, 0, 'S'}, {"ffile", 1, 0, 'F'}, {"ifile", 1, 0, 'i'}, #ifdef WITH_THREADS {"threads", required_argument, 0, 'T'}, #endif {"shard-count", required_argument, 0, 'a'}, {"shard-size", required_argument, 0, 'b'}, {"verbose", 1, 0, 'v'}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; struct ProgramOption { string to_optimize_str; int pdim; int ntry; int nrandom; int seed; bool has_seed; string optimize_type; string scorer_type; string scorer_config; string scorer_file; string feature_file; string init_file; size_t num_threads; float shard_size; size_t shard_count; ProgramOption() : to_optimize_str(""), pdim(-1), ntry(1), nrandom(0), seed(0), has_seed(false), optimize_type("powell"), scorer_type("BLEU"), scorer_config(""), scorer_file("statscore.data"), feature_file("features.data"), init_file("init.opt"), num_threads(1), shard_size(0), shard_count(0) { } }; void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) { int c; int option_index; while ((c = getopt_long(argc, argv, "o:r:d:n:m:t:s:S:F:v:p:", long_options, &option_index)) != -1) { switch (c) { case 'o': opt->to_optimize_str = string(optarg); break; case 'd': opt->pdim = strtol(optarg, NULL, 10); break; case 'n': opt->ntry = strtol(optarg, NULL, 10); break; case 'm': opt->nrandom = strtol(optarg, NULL, 10); break; case 'r': opt->seed = strtol(optarg, NULL, 10); opt->has_seed = true; break; case 't': opt->optimize_type = string(optarg); break; case's': opt->scorer_type = string(optarg); break; case 'c': opt->scorer_config = string(optarg); break; case 'S': opt->scorer_file = string(optarg); break; case 'F': opt->feature_file = string(optarg); break; case 'i': opt->init_file = string(optarg); break; case 'v': setverboselevel(strtol(optarg, NULL, 10)); break; #ifdef WITH_THREADS case 'T': opt->num_threads = strtol(optarg, NULL, 10); if (opt->num_threads < 1) opt->num_threads = 1; break; #endif case 'a': opt->shard_count = strtof(optarg, NULL); break; case 'b': opt->shard_size = strtof(optarg, NULL); break; case 'h': usage(0); break; default: usage(1); } } } } // anonymous namespace int main(int argc, char **argv) { ResetUserTime(); ProgramOption option; ParseCommandOptions(argc, argv, &option); vector to_optimize; vector > start_list; vector min; vector max; // NOTE: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result! if (option.pdim < 0) usage(1); cerr << "shard_size = " << option.shard_size << " shard_count = " << option.shard_count << endl; if (option.shard_size && !option.shard_count) { cerr << "Error: shard-size provided without shard-count" << endl; exit(1); } if (option.shard_size > 1 || option.shard_size < 0) { cerr << "Error: shard-size should be between 0 and 1" << endl; exit(1); } if (option.has_seed) { cerr << "Seeding random numbers with " << option.seed << endl; srandom(option.seed); } else { cerr << "Seeding random numbers with system clock " << endl; srandom(time(NULL)); } // read in starting points string onefile; while (!option.init_file.empty()) { getNextPound(option.init_file, onefile, ","); vector start; ifstream opt(onefile.c_str()); if (opt.fail()) { cerr << "could not open initfile: " << option.init_file << endl; exit(3); } start.resize(option.pdim);//to do:read from file int j; for (j = 0; j < option.pdim && !opt.fail(); j++) { opt >> start[j]; } if (j < option.pdim) { cerr << option.init_file << ":Too few starting weights." << endl; exit(3); } start_list.push_back(start); // for the first time, also read in the min/max values for scores if (start_list.size() == 1) { min.resize(option.pdim); for (j = 0; j < option.pdim && !opt.fail(); j++) { opt >> min[j]; } if (j < option.pdim) { cerr << option.init_file << ":Too few minimum weights." << endl; cerr << "error could not initialize start point with " << option.init_file << endl; cerr << "j: " << j << ", pdim: " << option.pdim << endl; exit(3); } max.resize(option.pdim); for (j = 0; j < option.pdim && !opt.fail(); j++) { opt >> max[j]; } if (j < option.pdim) { cerr << option.init_file << ":Too few maximum weights." << endl; exit(3); } } opt.close(); } vector ScoreDataFiles; if (option.scorer_file.length() > 0) { Tokenize(option.scorer_file.c_str(), ',', &ScoreDataFiles); } vector FeatureDataFiles; if (option.feature_file.length() > 0) { Tokenize(option.feature_file.c_str(), ',', &FeatureDataFiles); } if (ScoreDataFiles.size() != FeatureDataFiles.size()) { throw runtime_error("Error: there is a different number of previous score and feature files"); } // it make sense to know what parameter set were used to generate the nbest Scorer *TheScorer = ScorerFactory::getScorer(option.scorer_type, option.scorer_config); //load data Data D(*TheScorer); for (size_t i = 0; i < ScoreDataFiles.size(); i++) { cerr<<"Loading Data from: "<< ScoreDataFiles.at(i) << " and " << FeatureDataFiles.at(i) << endl; D.load(FeatureDataFiles.at(i), ScoreDataFiles.at(i)); } //ADDED_BY_TS D.remove_duplicates(); //END_ADDED PrintUserTime("Data loaded"); // starting point score over latest n-best, accumulative n-best //vector bests; //compute bests with sparse features needs to be implemented //currently sparse weights are not even loaded //statscore_t score = TheScorer->score(bests); if (option.to_optimize_str.length() > 0) { cerr << "Weights to optimize: " << option.to_optimize_str << endl; // Parse string to get weights to optimize, and set them as active string substring; int index; while (!option.to_optimize_str.empty()) { getNextPound(option.to_optimize_str, substring, ","); index = D.getFeatureIndex(substring); cerr << "FeatNameIndex:" << index << " to insert" << endl; //index = strtol(substring.c_str(), NULL, 10); if (index >= 0 && index < option.pdim) { to_optimize.push_back(index); } else { cerr << "Index " << index << " is out of bounds. Allowed indexes are [0," << option.pdim - 1 << "]." << endl; } } } else { //set all weights as active to_optimize.resize(option.pdim);//We'll optimize on everything for (int i = 0; i < option.pdim; i++) { to_optimize[i] = 1; } } // treat sparse features just like regular features if (D.hasSparseFeatures()) { D.mergeSparseFeatures(); } #ifdef WITH_THREADS cerr << "Creating a pool of " << option.num_threads << " threads" << endl; Moses::ThreadPool pool(option.num_threads); #endif Point::setpdim(option.pdim); Point::setdim(to_optimize.size()); //starting points consist of specified points and random restarts vector startingPoints; for (size_t i = 0; i < start_list.size(); ++i) { startingPoints.push_back(Point(start_list[i], min, max)); } for (int i = 0; i < option.ntry; ++i) { startingPoints.push_back(Point(start_list[0], min, max)); startingPoints.back().Randomize(); } vector > allTasks(1); //optional sharding vector shards; if (option.shard_count) { D.createShards(option.shard_count, option.shard_size, option.scorer_config, shards); allTasks.resize(option.shard_count); } // launch tasks for (size_t i = 0; i < allTasks.size(); ++i) { Data& data = D; if (option.shard_count) data = shards[i]; //use the sharded data if it exists vector& tasks = allTasks[i]; Optimizer *O = OptimizerFactory::BuildOptimizer(option.pdim, to_optimize, start_list[0], option.optimize_type, option.nrandom); O->SetScorer(data.getScorer()); O->SetFData(data.getFeatureData()); //A task for each start point for (size_t j = 0; j < startingPoints.size(); ++j) { OptimizationTask* task = new OptimizationTask(O, startingPoints[j]); tasks.push_back(task); #ifdef WITH_THREADS pool.Submit(task); #else task->Run(); #endif } } // wait for all threads to finish #ifdef WITH_THREADS pool.Stop(true); #endif statscore_t total = 0; Point totalP; // collect results for (size_t i = 0; i < allTasks.size(); ++i) { statscore_t best = 0, mean = 0, var = 0; Point bestP; for (size_t j = 0; j < allTasks[i].size(); ++j) { statscore_t score = allTasks[i][j]->getScore(); mean += score; var += score * score; if (score > best) { bestP = allTasks[i][j]->getPoint(); best = score; } } mean /= (float)option.ntry; var /= (float)option.ntry; var = sqrt(abs(var - mean * mean)); if (verboselevel() > 1) { cerr << "shard " << i << " best score: " << best << " variance of the score (for " << option.ntry << " try): " << var << endl; } totalP += bestP; total += best; if (verboselevel() > 1) cerr << "bestP " << bestP << endl; } //cerr << "totalP: " << totalP << endl; Point finalP = totalP * (1.0 / allTasks.size()); statscore_t final = total / allTasks.size(); if (verboselevel() > 1) cerr << "bestP: " << finalP << endl; // L1-Normalization of the best Point if ((int)to_optimize.size() == option.pdim) { finalP.NormalizeL1(); } cerr << "Best point: " << finalP << " => " << final << endl; ofstream res("weights.txt"); res << finalP << endl; for (size_t i = 0; i < allTasks.size(); ++i) { allTasks[i][0]->resetOptimizer(); for (size_t j = 0; j < allTasks[i].size(); ++j) { delete allTasks[i][j]; } } delete TheScorer; PrintUserTime("Stopping..."); return 0; }