2010-09-15 18:36:07 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2009 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#include "Decoder.h"
|
|
|
|
#include "Manager.h"
|
2010-09-17 14:17:27 +04:00
|
|
|
#include "Sentence.h"
|
2010-09-17 21:25:51 +04:00
|
|
|
#include "InputType.h"
|
2010-09-15 18:36:07 +04:00
|
|
|
#include "TranslationSystem.h"
|
2010-09-16 20:23:52 +04:00
|
|
|
#include "Phrase.h"
|
2010-09-17 14:17:27 +04:00
|
|
|
#include "TrellisPathList.h"
|
2010-09-16 20:23:52 +04:00
|
|
|
#include "DummyScoreProducers.h"
|
2010-09-15 18:36:07 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
using namespace Moses;
|
|
|
|
|
|
|
|
|
|
|
|
namespace Mira {
|
|
|
|
|
2010-09-17 14:17:27 +04:00
|
|
|
//Decoder::~Decoder() {}
|
2010-09-15 18:36:07 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Allocates a char* and copies string into it.
|
|
|
|
**/
|
|
|
|
static char* strToChar(const string& s) {
|
|
|
|
char* c = new char[s.size()+1];
|
|
|
|
strcpy(c,s.c_str());
|
|
|
|
return c;
|
|
|
|
}
|
2010-09-16 20:23:52 +04:00
|
|
|
|
2011-02-24 13:54:16 +03:00
|
|
|
void initMoses(const string& inifile, int debuglevel, int argc, vector<string> decoder_params) {
|
2010-09-15 19:38:46 +04:00
|
|
|
static int BASE_ARGC = 5;
|
2010-09-15 18:36:07 +04:00
|
|
|
Parameter* params = new Parameter();
|
|
|
|
char ** mosesargv = new char*[BASE_ARGC + argc];
|
|
|
|
mosesargv[0] = strToChar("-f");
|
|
|
|
mosesargv[1] = strToChar(inifile);
|
|
|
|
mosesargv[2] = strToChar("-v");
|
|
|
|
stringstream dbgin;
|
|
|
|
dbgin << debuglevel;
|
|
|
|
mosesargv[3] = strToChar(dbgin.str());
|
2010-09-15 19:38:46 +04:00
|
|
|
mosesargv[4] = strToChar("-mbr"); //so we can do nbest
|
2010-09-15 18:36:07 +04:00
|
|
|
|
|
|
|
for (int i = 0; i < argc; ++i) {
|
2011-02-24 13:54:16 +03:00
|
|
|
char *cstr = &(decoder_params[i])[0];
|
|
|
|
mosesargv[BASE_ARGC + i] = cstr;
|
2010-09-15 18:36:07 +04:00
|
|
|
}
|
2011-02-24 13:54:16 +03:00
|
|
|
|
2010-09-15 18:36:07 +04:00
|
|
|
params->LoadParam(BASE_ARGC + argc,mosesargv);
|
|
|
|
StaticData::LoadDataStatic(params);
|
|
|
|
for (int i = 0; i < BASE_ARGC; ++i) {
|
|
|
|
delete[] mosesargv[i];
|
|
|
|
}
|
|
|
|
delete[] mosesargv;
|
|
|
|
}
|
2010-09-17 12:33:22 +04:00
|
|
|
|
2010-12-10 19:34:43 +03:00
|
|
|
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
|
2010-10-22 20:50:42 +04:00
|
|
|
: m_manager(NULL) {
|
2011-02-24 13:54:16 +03:00
|
|
|
// force initialisation of the phrase dictionary (TODO: what for?)
|
2010-09-17 19:17:33 +04:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
2011-02-24 13:54:16 +03:00
|
|
|
m_sentence = new Sentence(Input);
|
|
|
|
stringstream in("Initialising decoder..\n");
|
|
|
|
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
|
|
|
|
m_sentence->Read(in,inputFactorOrder);
|
2010-12-10 19:34:43 +03:00
|
|
|
|
2010-10-25 19:16:34 +04:00
|
|
|
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
2011-02-24 13:54:16 +03:00
|
|
|
m_manager = new Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
|
|
|
|
m_manager->ProcessSentence();
|
2010-12-10 19:34:43 +03:00
|
|
|
|
2010-10-22 20:50:42 +04:00
|
|
|
// Add the bleu feature
|
2010-12-10 19:34:43 +03:00
|
|
|
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
|
2010-09-17 14:17:27 +04:00
|
|
|
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
|
2010-09-17 19:17:33 +04:00
|
|
|
m_bleuScoreFeature->LoadReferences(refs);
|
2010-10-22 20:50:42 +04:00
|
|
|
}
|
2010-09-15 18:36:07 +04:00
|
|
|
|
2010-10-22 20:50:42 +04:00
|
|
|
void MosesDecoder::cleanup() {
|
|
|
|
delete m_manager;
|
|
|
|
delete m_sentence;
|
|
|
|
}
|
|
|
|
|
2010-09-17 19:17:33 +04:00
|
|
|
vector<const Word*> MosesDecoder::getNBest(const std::string& source,
|
|
|
|
size_t sentenceid,
|
2010-09-17 14:17:27 +04:00
|
|
|
size_t count,
|
2010-09-17 18:32:27 +04:00
|
|
|
float bleuObjectiveWeight,
|
2010-09-17 19:17:33 +04:00
|
|
|
float bleuScoreWeight,
|
2010-10-22 20:50:42 +04:00
|
|
|
vector< ScoreComponentCollection>& featureValues,
|
2010-10-25 19:16:34 +04:00
|
|
|
vector< float>& bleuScores,
|
2010-11-29 17:11:19 +03:00
|
|
|
bool oracle,
|
2010-12-14 17:51:04 +03:00
|
|
|
bool distinct,
|
2011-01-07 21:57:38 +03:00
|
|
|
bool ignoreUWeight,
|
|
|
|
size_t rank)
|
2010-09-17 14:17:27 +04:00
|
|
|
{
|
2011-02-24 13:54:16 +03:00
|
|
|
StaticData &staticData = StaticData::InstanceNonConst();
|
2010-09-16 20:23:52 +04:00
|
|
|
|
2011-02-24 13:54:16 +03:00
|
|
|
m_sentence = new Sentence(Input);
|
2010-09-15 19:38:46 +04:00
|
|
|
stringstream in(source + "\n");
|
|
|
|
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
|
2010-09-16 20:23:52 +04:00
|
|
|
m_sentence->Read(in,inputFactorOrder);
|
2011-02-24 13:54:16 +03:00
|
|
|
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
2010-09-16 20:23:52 +04:00
|
|
|
|
2010-11-24 20:06:54 +03:00
|
|
|
// set the weight for the bleu feature
|
2010-09-17 18:32:27 +04:00
|
|
|
ostringstream bleuWeightStr;
|
|
|
|
bleuWeightStr << bleuObjectiveWeight;
|
|
|
|
PARAM_VEC bleuWeight(1,bleuWeightStr.str());
|
2010-11-24 20:06:54 +03:00
|
|
|
|
2010-09-17 18:32:27 +04:00
|
|
|
staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
|
2010-11-24 20:06:54 +03:00
|
|
|
staticData.ReLoadBleuScoreFeatureParameter();
|
2010-09-17 18:32:27 +04:00
|
|
|
|
2010-10-22 20:50:42 +04:00
|
|
|
m_bleuScoreFeature->SetCurrentSourceLength((*m_sentence).GetSize());
|
2010-09-17 19:17:33 +04:00
|
|
|
m_bleuScoreFeature->SetCurrentReference(sentenceid);
|
|
|
|
|
2010-09-17 18:32:27 +04:00
|
|
|
//run the decoder
|
2010-09-17 14:17:27 +04:00
|
|
|
m_manager = new Moses::Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
|
2010-09-16 20:23:52 +04:00
|
|
|
m_manager->ProcessSentence();
|
2010-09-17 14:17:27 +04:00
|
|
|
TrellisPathList sentences;
|
2010-11-29 17:11:19 +03:00
|
|
|
m_manager->CalcNBest(count,sentences, distinct);
|
2010-09-17 11:35:31 +04:00
|
|
|
|
2010-10-22 20:50:42 +04:00
|
|
|
// read off the feature values and bleu scores for each sentence in the nbest list
|
|
|
|
Moses::TrellisPathList::const_iterator iter;
|
|
|
|
for (iter = sentences.begin() ; iter != sentences.end() ; ++iter) {
|
|
|
|
const Moses::TrellisPath &path = **iter;
|
|
|
|
featureValues.push_back(path.GetScoreBreakdown());
|
|
|
|
float bleuScore = getBleuScore(featureValues.back());
|
|
|
|
bleuScores.push_back(bleuScore);
|
|
|
|
|
|
|
|
//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
|
|
|
|
float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
|
2010-11-18 19:24:51 +03:00
|
|
|
cerr << "Total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
|
2010-12-14 17:51:04 +03:00
|
|
|
|
2011-01-07 21:57:38 +03:00
|
|
|
Phrase bestPhrase = path.GetTargetPhrase();
|
2011-01-07 21:03:05 +03:00
|
|
|
|
2011-01-07 21:57:38 +03:00
|
|
|
cerr << "Rank " << rank << ": ";
|
|
|
|
Phrase phrase = path.GetTargetPhrase();
|
|
|
|
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
|
|
|
const Word &word = phrase.GetWord(pos);
|
|
|
|
Word *newWord = new Word(word);
|
|
|
|
cerr << *newWord << " ";
|
|
|
|
}
|
|
|
|
|
|
|
|
cerr << endl;
|
2010-10-22 20:50:42 +04:00
|
|
|
|
|
|
|
// set bleu score to zero in the feature vector since we do not want to optimise its weight
|
|
|
|
setBleuScore(featureValues.back(), 0);
|
2010-12-14 17:51:04 +03:00
|
|
|
|
|
|
|
if (ignoreUWeight) {
|
|
|
|
const UnknownWordPenaltyProducer *unknownWPP = (const_cast<TranslationSystem&>(system)).GetUnknownWordPenaltyProducer();
|
|
|
|
(featureValues.back()).Assign(unknownWPP, 0);
|
|
|
|
}
|
2010-09-17 19:17:33 +04:00
|
|
|
}
|
|
|
|
|
2010-10-22 20:50:42 +04:00
|
|
|
// get the best
|
|
|
|
vector<const Word*> best;
|
2010-10-25 19:16:34 +04:00
|
|
|
if (oracle) {
|
|
|
|
|
|
|
|
assert(sentences.GetSize() > 0);
|
|
|
|
const TrellisPath &path = sentences.at(0);
|
|
|
|
Phrase bestPhrase = path.GetTargetPhrase();
|
|
|
|
|
|
|
|
for (size_t pos = 0; pos < bestPhrase.GetSize(); ++pos) {
|
|
|
|
const Word &word = bestPhrase.GetWord(pos);
|
|
|
|
Word *newWord = new Word(word);
|
|
|
|
best.push_back(newWord);
|
|
|
|
}
|
|
|
|
}
|
2010-09-17 19:17:33 +04:00
|
|
|
|
|
|
|
return best;
|
2010-10-22 20:50:42 +04:00
|
|
|
}
|
2010-09-17 14:17:27 +04:00
|
|
|
|
2010-11-24 20:06:54 +03:00
|
|
|
size_t MosesDecoder::getCurrentInputLength() {
|
|
|
|
return (*m_sentence).GetSize();
|
|
|
|
}
|
2010-09-17 19:17:33 +04:00
|
|
|
|
2010-09-17 14:17:27 +04:00
|
|
|
float MosesDecoder::getBleuScore(const ScoreComponentCollection& scores) {
|
|
|
|
return scores.GetScoreForProducer(m_bleuScoreFeature);
|
|
|
|
}
|
2010-09-17 16:54:58 +04:00
|
|
|
|
|
|
|
void MosesDecoder::setBleuScore(ScoreComponentCollection& scores, float bleu) {
|
2010-10-22 20:50:42 +04:00
|
|
|
scores.Assign(m_bleuScoreFeature, bleu);
|
2010-09-17 16:54:58 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
ScoreComponentCollection MosesDecoder::getWeights() {
|
2010-10-07 02:06:49 +04:00
|
|
|
return StaticData::Instance().GetAllWeights();
|
2010-09-17 16:54:58 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void MosesDecoder::setWeights(const ScoreComponentCollection& weights) {
|
2011-01-26 20:51:45 +03:00
|
|
|
//cerr << "New weights: " << weights << endl;
|
2010-10-07 02:06:49 +04:00
|
|
|
StaticData::InstanceNonConst().SetAllWeights(weights);
|
2010-09-17 19:17:33 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void MosesDecoder::updateHistory(const vector<const Word*>& words) {
|
|
|
|
m_bleuScoreFeature->UpdateHistory(words);
|
2010-09-17 16:54:58 +04:00
|
|
|
}
|
2010-11-24 20:06:54 +03:00
|
|
|
|
|
|
|
void MosesDecoder::updateHistory(const vector< vector< const Word*> >& words, vector<size_t>& sourceLengths, vector<size_t>& ref_ids) {
|
|
|
|
m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids);
|
|
|
|
}
|
2011-02-24 13:54:16 +03:00
|
|
|
|
|
|
|
void MosesDecoder::calculateBleuOfCorpus(const vector< vector< const Word*> >& words, vector<size_t>& ref_ids, size_t epoch) {
|
|
|
|
vector<float> bleu = m_bleuScoreFeature->CalculateBleuOfCorpus(words, ref_ids);
|
|
|
|
cerr << "\nBleu after epoch " << epoch << ": ";
|
|
|
|
if (bleu.size() > 0) {
|
|
|
|
cerr << "\nBLEU: " << bleu[4]*100 << ", "
|
|
|
|
<< bleu[3]*100 << "/" << bleu[2]*100 << "/" << bleu[1]*100 << "/" << bleu[0]*100 << " "
|
|
|
|
<< "(BP=" << bleu[5] << ", " << "ratio=" << bleu[6] << ", "
|
|
|
|
<< "hyp_len=" << bleu[7] << ", ref_len=" << bleu[8] << ")" << endl;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cerr << "BLEU: 0" << endl;
|
|
|
|
}
|
|
|
|
}
|
2010-09-15 18:36:07 +04:00
|
|
|
}
|
2010-09-17 11:35:31 +04:00
|
|
|
|