2010-09-15 18:36:07 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2010 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2010-09-15 19:38:46 +04:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <ctime>
|
2010-09-15 18:36:07 +04:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <boost/program_options.hpp>
|
|
|
|
|
2010-09-15 19:38:46 +04:00
|
|
|
#include "FeatureVector.h"
|
2010-09-15 18:36:07 +04:00
|
|
|
#include "StaticData.h"
|
2010-09-16 20:23:52 +04:00
|
|
|
#include "ChartTrellisPathList.h"
|
2010-09-17 11:35:31 +04:00
|
|
|
#include "ChartTrellisPath.h"
|
|
|
|
#include "ScoreComponentCollection.h"
|
2010-09-15 18:36:07 +04:00
|
|
|
#include "Decoder.h"
|
2010-09-15 19:38:46 +04:00
|
|
|
#include "Optimiser.h"
|
2010-09-15 18:36:07 +04:00
|
|
|
|
|
|
|
using namespace Mira;
|
|
|
|
using namespace std;
|
|
|
|
using namespace Moses;
|
|
|
|
namespace po = boost::program_options;
|
|
|
|
|
2010-09-16 20:23:52 +04:00
|
|
|
void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const TranslationSystem* system, long translationId);
|
|
|
|
|
2010-09-15 19:38:46 +04:00
|
|
|
bool loadSentences(const string& filename, vector<string>& sentences) {
|
|
|
|
ifstream in(filename.c_str());
|
|
|
|
if (!in) return false;
|
|
|
|
string line;
|
|
|
|
while(getline(in,line)) {
|
|
|
|
sentences.push_back(line);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2010-09-15 18:36:07 +04:00
|
|
|
int main(int argc, char** argv) {
|
|
|
|
bool help;
|
|
|
|
int verbosity;
|
|
|
|
string mosesConfigFile;
|
|
|
|
string inputFile;
|
|
|
|
vector<string> referenceFiles;
|
|
|
|
po::options_description desc("Allowed options");
|
|
|
|
desc.add_options()
|
|
|
|
("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
|
|
|
|
("config,f",po::value<string>(&mosesConfigFile),"Moses ini file")
|
|
|
|
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
|
|
|
|
("input-file,i",po::value<string>(&inputFile),"Input file containing tokenised source")
|
|
|
|
("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training");
|
|
|
|
|
|
|
|
po::options_description cmdline_options;
|
|
|
|
cmdline_options.add(desc);
|
|
|
|
po::variables_map vm;
|
|
|
|
po::store(po::command_line_parser(argc,argv).
|
|
|
|
options(cmdline_options).run(), vm);
|
|
|
|
po::notify(vm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (help) {
|
2010-09-15 19:38:46 +04:00
|
|
|
std::cout << "Usage: " + string(argv[0]) + " -f mosesini-file -i input-file -r reference-file(s) [options]" << std::endl;
|
2010-09-15 18:36:07 +04:00
|
|
|
std::cout << desc << std::endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mosesConfigFile.empty()) {
|
|
|
|
cerr << "Error: No moses ini file specified" << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (inputFile.empty()) {
|
|
|
|
cerr << "Error: No input file specified" << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!referenceFiles.size()) {
|
|
|
|
cerr << "Error: No reference files specified" << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//load input and references
|
2010-09-15 19:38:46 +04:00
|
|
|
vector<string> inputSentences;
|
|
|
|
if (!loadSentences(inputFile, inputSentences)) {
|
|
|
|
cerr << "Error: Failed to load input sentences from " << inputFile << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
vector< vector<string> > referenceSentences(referenceFiles.size());
|
|
|
|
for (size_t i = 0; i < referenceFiles.size(); ++i) {
|
|
|
|
if (!loadSentences(referenceFiles[i], referenceSentences[i])) {
|
|
|
|
cerr << "Error: Failed to load reference sentences from " << referenceFiles[i] << endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (referenceSentences[i].size() != inputSentences.size()) {
|
|
|
|
cerr << "Error: Input file length (" << inputSentences.size() <<
|
|
|
|
") != (" << referenceSentences[i].size() << ") length of reference file " << i <<
|
|
|
|
endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
//initialise moses
|
2010-09-16 11:49:12 +04:00
|
|
|
initMoses(mosesConfigFile, verbosity);//, argc, argv);
|
2010-09-15 18:36:07 +04:00
|
|
|
|
|
|
|
//Main loop:
|
2010-09-15 19:38:46 +04:00
|
|
|
srand(time(NULL));
|
2010-09-16 20:23:52 +04:00
|
|
|
MosesDecoder decoder;
|
2010-09-17 11:35:31 +04:00
|
|
|
DummyOptimiser optimiser;
|
2010-09-15 20:09:43 +04:00
|
|
|
size_t epochs = 1;
|
2010-09-15 19:38:46 +04:00
|
|
|
|
2010-09-17 11:35:31 +04:00
|
|
|
std::vector<float> currWeights, newWeights, losses;
|
|
|
|
|
2010-09-15 20:09:43 +04:00
|
|
|
for (size_t epoch = 0; epoch < epochs; ++epoch) {
|
|
|
|
//TODO: batch
|
|
|
|
for (size_t sid = 0; sid < inputSentences.size(); ++sid) {
|
|
|
|
const string& input = inputSentences[sid];
|
|
|
|
const vector<string>& refs = referenceSentences[sid];
|
|
|
|
|
|
|
|
//run decoder (TODO: hope & fear)
|
2010-09-17 11:35:31 +04:00
|
|
|
MosesChart::TrellisPathList models, hopes, fears;
|
|
|
|
|
|
|
|
vector<const Moses::ScoreComponentCollection*> scores;
|
2010-09-16 20:23:52 +04:00
|
|
|
|
2010-09-17 11:35:31 +04:00
|
|
|
StaticData &staticNonConst = StaticData::InstanceNonConst();
|
2010-09-15 20:09:43 +04:00
|
|
|
|
2010-09-17 11:35:31 +04:00
|
|
|
// MODEL
|
|
|
|
PARAM_VEC bleuWeight(1, "0");
|
|
|
|
|
|
|
|
staticNonConst.GetParameter()->OverwriteParam("-weight-b", bleuWeight);
|
|
|
|
staticNonConst.ReLoadParameter();
|
|
|
|
decoder.getNBest(input, 100, models);
|
|
|
|
decoder.OutputNBestList(models, scores);
|
|
|
|
|
|
|
|
// HOPE
|
|
|
|
bleuWeight[0] = "+1";
|
|
|
|
staticNonConst.GetParameter()->OverwriteParam("-weight-b", bleuWeight);
|
|
|
|
staticNonConst.ReLoadParameter();
|
|
|
|
decoder.getNBest(input, 100, hopes);
|
|
|
|
decoder.OutputNBestList(hopes, scores);
|
|
|
|
|
|
|
|
// FEAR
|
|
|
|
bleuWeight[0] = "-1";
|
|
|
|
staticNonConst.GetParameter()->OverwriteParam("-weight-b", bleuWeight);
|
|
|
|
staticNonConst.ReLoadParameter();
|
|
|
|
decoder.getNBest(input, 100, fears);
|
|
|
|
decoder.OutputNBestList(fears, scores);
|
|
|
|
|
|
|
|
//extract scores from nbest + oracle
|
2010-09-16 20:23:52 +04:00
|
|
|
|
2010-09-15 20:09:43 +04:00
|
|
|
//run optimiser
|
2010-09-17 11:35:31 +04:00
|
|
|
const MosesChart::TrellisPath &pathOracle = hopes.Get(0);
|
|
|
|
const Moses::ScoreComponentCollection &oracle = pathOracle.GetScoreBreakdown();
|
|
|
|
|
|
|
|
optimiser.updateWeights(currWeights
|
|
|
|
, scores
|
|
|
|
, losses
|
|
|
|
, oracle
|
|
|
|
, newWeights);
|
|
|
|
|
2010-09-15 20:09:43 +04:00
|
|
|
//update moses weights
|
2010-09-16 20:23:52 +04:00
|
|
|
|
|
|
|
decoder.cleanup();
|
2010-09-15 20:09:43 +04:00
|
|
|
}
|
2010-09-15 19:38:46 +04:00
|
|
|
}
|
2010-09-15 18:36:07 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
2010-09-15 19:38:46 +04:00
|
|
|
exit(0);
|
2010-09-15 18:36:07 +04:00
|
|
|
}
|
2010-09-16 20:23:52 +04:00
|
|
|
|