#include #include #include #include #include #include #include #include "Scorer.h" #include "Timer.h" #include "Util.h" #include "ScorerFactory.h" using namespace std; void evaluate(const string& candFile); void addStats(vector& stats1, const vector& stats2); float average(const vector& list); float stdDeviation(const vector& list, float avg); string int2string(int n); Scorer* g_scorer = NULL; int g_bootstrap = 0; bool g_has_more_files = false; bool g_has_more_scorers = false; const float g_alpha = 0.05; void usage() { cerr<<"usage: evaluator [options] --reference ref1[,ref2[,ref3...]] --candidate cand1[,cand2[,cand3...]] "< refFiles; vector candFiles; vector scorerTypes; if (reference.length() == 0) throw runtime_error("You have to specify at least one reference file."); split(reference,',',refFiles); if (candidate.length() == 0) throw runtime_error("You have to specify at least one candidate file."); split(candidate,',',candFiles); if (scorerType.length() == 0) throw runtime_error("You have to specify at least one scorer."); split(scorerType,';',scorerTypes); if (candFiles.size() > 1) g_has_more_files = true; if (scorerTypes.size() > 1) g_has_more_scorers = true; for (vector::const_iterator fileIt = candFiles.begin(); fileIt != candFiles.end(); ++fileIt) { for (vector::const_iterator scorerIt = scorerTypes.begin(); scorerIt != scorerTypes.end(); ++scorerIt) { g_scorer = ScorerFactory::getScorer(*scorerIt,scorerConfig); g_scorer->setReferenceFiles(refFiles); evaluate(*fileIt); delete g_scorer; } } return EXIT_SUCCESS; } catch (const exception& e) { cerr << "Exception: " << e.what() << endl; return EXIT_FAILURE; } } void evaluate(const string& candFile) { ifstream cand(candFile.c_str()); if (!cand.good()) throw runtime_error("Error opening candidate file"); vector entries; // Loading sentences and preparing statistics ScoreStats scoreentry; string line; while (getline(cand, line)) { g_scorer->prepareStats(entries.size(), line, scoreentry); entries.push_back(scoreentry); } int n = entries.size(); if (g_bootstrap) { vector scores; for (int i = 0; i < g_bootstrap; ++i) { // TODO: Use smart pointer for exceptional-safety. ScoreData* scoredata = new ScoreData(*g_scorer); for (int j = 0; j < n; ++j) { int randomIndex = random() % n; string str_j = int2string(j); scoredata->add(entries[randomIndex], str_j); } g_scorer->setScoreData(scoredata); candidates_t candidates(n, 0); float score = g_scorer->score(candidates); scores.push_back(score); delete scoredata; } float avg = average(scores); sort(scores.begin(), scores.end()); int lbIdx = scores.size() * (g_alpha / 2); int rbIdx = scores.size() * (1 - g_alpha / 2); float lb = scores[lbIdx]; float rb = scores[rbIdx]; if (g_has_more_files) cout << candFile << "\t"; if (g_has_more_scorers) cout << g_scorer->getName() << "\t"; cout.setf(ios::fixed,ios::floatfield); cout.precision(4); cout << avg << "\t[" << lb << "," << rb << "]"<< endl; } else { // TODO: Use smart pointer for exceptional-safety. ScoreData* scoredata = new ScoreData(*g_scorer); for (int sid = 0; sid < n; ++sid) { string str_sid = int2string(sid); scoredata->add(entries[sid], str_sid); } g_scorer->setScoreData(scoredata); candidates_t candidates(n, 0); float score = g_scorer->score(candidates); delete scoredata; if (g_has_more_files) cout << candFile << "\t"; if (g_has_more_scorers) cout << g_scorer->getName() << "\t"; cout.setf(ios::fixed,ios::floatfield); cout.precision(4); cout << score << endl; } } string int2string(int n) { stringstream ss; ss << n; return ss.str(); } float average(const vector& list) { float sum = 0; for (vector::const_iterator it = list.begin(); it != list.end(); ++it) sum += *it; return sum / list.size(); }