/*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2010 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include #include #include #include #include #include "FeatureVector.h" #include "StaticData.h" #include "ChartTrellisPathList.h" #include "ChartTrellisPath.h" #include "ScoreComponentCollection.h" #include "Decoder.h" #include "Optimiser.h" using namespace Mira; using namespace std; using namespace Moses; namespace po = boost::program_options; void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const TranslationSystem* system, long translationId); bool loadSentences(const string& filename, vector& sentences) { ifstream in(filename.c_str()); if (!in) return false; string line; while(getline(in,line)) { sentences.push_back(line); } return true; } int main(int argc, char** argv) { bool help; int verbosity; string mosesConfigFile; string inputFile; vector referenceFiles; po::options_description desc("Allowed options"); desc.add_options() ("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit") ("config,f",po::value(&mosesConfigFile),"Moses ini file") ("verbosity,v", po::value(&verbosity)->default_value(0), "Verbosity level") ("input-file,i",po::value(&inputFile),"Input file containing tokenised source") ("reference-files,r", po::value >(&referenceFiles), "Reference translation files for training"); po::options_description cmdline_options; cmdline_options.add(desc); po::variables_map vm; po::store(po::command_line_parser(argc,argv). options(cmdline_options).run(), vm); po::notify(vm); if (help) { std::cout << "Usage: " + string(argv[0]) + " -f mosesini-file -i input-file -r reference-file(s) [options]" << std::endl; std::cout << desc << std::endl; return 0; } if (mosesConfigFile.empty()) { cerr << "Error: No moses ini file specified" << endl; return 1; } if (inputFile.empty()) { cerr << "Error: No input file specified" << endl; return 1; } if (!referenceFiles.size()) { cerr << "Error: No reference files specified" << endl; return 1; } //load input and references vector inputSentences; if (!loadSentences(inputFile, inputSentences)) { cerr << "Error: Failed to load input sentences from " << inputFile << endl; return 1; } vector< vector > referenceSentences(referenceFiles.size()); for (size_t i = 0; i < referenceFiles.size(); ++i) { if (!loadSentences(referenceFiles[i], referenceSentences[i])) { cerr << "Error: Failed to load reference sentences from " << referenceFiles[i] << endl; return 1; } if (referenceSentences[i].size() != inputSentences.size()) { cerr << "Error: Input file length (" << inputSentences.size() << ") != (" << referenceSentences[i].size() << ") length of reference file " << i << endl; return 1; } } //initialise moses initMoses(mosesConfigFile, verbosity);//, argc, argv); //Main loop: srand(time(NULL)); MosesDecoder decoder; Perceptron optimiser; size_t epochs = 1; std::vector losses; for (size_t epoch = 0; epoch < epochs; ++epoch) { //TODO: batch for (size_t sid = 0; sid < inputSentences.size(); ++sid) { const string& input = inputSentences[sid]; const vector& refs = referenceSentences[sid]; //run decoder (TODO: hope & fear) MosesChart::TrellisPathList models, hopes, fears; vector scores; StaticData &staticNonConst = StaticData::InstanceNonConst(); // MODEL PARAM_VEC bleuWeight(1, "0"); staticNonConst.GetParameter()->OverwriteParam("-weight-b", bleuWeight); staticNonConst.ReLoadParameter(); decoder.getNBest(input, 100, models); decoder.OutputNBestList(models, scores); // HOPE bleuWeight[0] = "+1"; staticNonConst.GetParameter()->OverwriteParam("-weight-b", bleuWeight); staticNonConst.ReLoadParameter(); decoder.getNBest(input, 100, hopes); decoder.OutputNBestList(hopes, scores); // FEAR bleuWeight[0] = "-1"; staticNonConst.GetParameter()->OverwriteParam("-weight-b", bleuWeight); staticNonConst.ReLoadParameter(); decoder.getNBest(input, 100, fears); decoder.OutputNBestList(fears, scores); //extract scores from nbest + oracle //run optimiser const MosesChart::TrellisPath &pathOracle = hopes.Get(0); const Moses::ScoreComponentCollection &oracle = pathOracle.GetScoreBreakdown(); ScoreComponentCollection mosesWeights; //TODO vector > allScores; vector > allLosses; allScores.push_back(scores); allLosses.push_back(losses); optimiser.updateWeights(mosesWeights , allScores , allLosses , oracle); //update moses weights decoder.cleanup(); } } exit(0); }