2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#include "DecodeStepGeneration.h"
|
|
|
|
#include "GenerationDictionary.h"
|
|
|
|
#include "TranslationOption.h"
|
|
|
|
#include "TranslationOptionCollection.h"
|
|
|
|
#include "PartialTranslOptColl.h"
|
|
|
|
#include "FactorCollection.h"
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2010-01-28 15:12:57 +03:00
|
|
|
using namespace std;
|
|
|
|
|
2013-12-05 17:06:35 +04:00
|
|
|
DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict,
|
2013-05-30 15:51:40 +04:00
|
|
|
const DecodeStep* prev,
|
|
|
|
const std::vector<FeatureFunction*> &features)
|
|
|
|
: DecodeStep(dict, prev, features)
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
// helpers
|
|
|
|
typedef pair<Word, ScoreComponentCollection> WordPair;
|
|
|
|
typedef list< WordPair > WordList;
|
|
|
|
// 1st = word
|
|
|
|
// 2nd = score
|
|
|
|
typedef list< WordPair >::const_iterator WordListIterator;
|
|
|
|
|
|
|
|
/** used in generation: increases iterators when looping through the exponential number of generation expansions */
|
|
|
|
inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
|
|
|
|
, const vector< WordList > &wordListVector)
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++) {
|
|
|
|
WordListIterator &iter = wordListIterVector[currPos];
|
|
|
|
iter++;
|
|
|
|
if (iter != wordListVector[currPos].end()) {
|
|
|
|
// eg. 4 -> 5
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
// eg 9 -> 10
|
|
|
|
iter = wordListVector[currPos].begin();
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2013-05-11 17:13:26 +04:00
|
|
|
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
|
2011-02-24 16:14:42 +03:00
|
|
|
, const DecodeStep &decodeStep
|
|
|
|
, PartialTranslOptColl &outputPartialTranslOptColl
|
|
|
|
, TranslationOptionCollection * /* toc */
|
2013-08-08 20:10:56 +04:00
|
|
|
, bool /*adhereTableLimit*/) const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
|
|
|
|
// word deletion
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
|
2013-05-11 17:13:26 +04:00
|
|
|
outputPartialTranslOptColl.Add(newTransOpt);
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
return;
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
// normal generation step
|
2010-08-10 17:12:00 +04:00
|
|
|
const GenerationDictionary* generationDictionary = decodeStep.GetGenerationDictionaryFeature();
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
const Phrase &targetPhrase = inputPartialTranslOpt.GetTargetPhrase();
|
2013-08-13 23:36:32 +04:00
|
|
|
const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
|
2008-06-11 14:52:57 +04:00
|
|
|
size_t targetLength = targetPhrase.GetSize();
|
|
|
|
|
|
|
|
// generation list for each word in phrase
|
|
|
|
vector< WordList > wordListVector(targetLength);
|
|
|
|
|
|
|
|
// create generation list
|
|
|
|
int wordListVectorPos = 0;
|
2011-02-24 16:14:42 +03:00
|
|
|
for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { // going thorugh all words
|
|
|
|
// generatable factors for this word to be put in wordList
|
|
|
|
WordList &wordList = wordListVector[wordListVectorPos];
|
|
|
|
const Word &word = targetPhrase.GetWord(currPos);
|
|
|
|
|
|
|
|
// consult dictionary for possible generations for this word
|
|
|
|
const OutputWordCollection *wordColl = generationDictionary->FindWord(word);
|
|
|
|
|
|
|
|
if (wordColl == NULL) {
|
|
|
|
// word not found in generation dictionary
|
|
|
|
//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
|
|
|
|
return; // can't be part of a phrase, special handling
|
|
|
|
} else {
|
|
|
|
// sort(*wordColl, CompareWordCollScore);
|
|
|
|
OutputWordCollection::const_iterator iterWordColl;
|
|
|
|
for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl) {
|
|
|
|
const Word &outputWord = (*iterWordColl).first;
|
|
|
|
const ScoreComponentCollection& score = (*iterWordColl).second;
|
|
|
|
// enter into word list generated factor(s) and its(their) score(s)
|
|
|
|
wordList.push_back(WordPair(outputWord, score));
|
|
|
|
}
|
|
|
|
|
|
|
|
wordListVectorPos++; // done, next word
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
// use generation list (wordList)
|
|
|
|
// set up iterators (total number of expansions)
|
|
|
|
size_t numIteration = 1;
|
|
|
|
vector< WordListIterator > wordListIterVector(targetLength);
|
|
|
|
vector< const Word* > mergeWords(targetLength);
|
2011-02-24 16:14:42 +03:00
|
|
|
for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
|
|
|
|
wordListIterVector[currPos] = wordListVector[currPos].begin();
|
|
|
|
numIteration *= wordListVector[currPos].size();
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
// go thru each possible factor for each word & create hypothesis
|
2011-02-24 16:14:42 +03:00
|
|
|
for (size_t currIter = 0 ; currIter < numIteration ; currIter++) {
|
|
|
|
ScoreComponentCollection generationScore; // total score for this string of words
|
|
|
|
|
|
|
|
// create vector of words with new factors for last phrase
|
|
|
|
for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
|
|
|
|
const WordPair &wordPair = *wordListIterVector[currPos];
|
|
|
|
mergeWords[currPos] = &(wordPair.first);
|
|
|
|
generationScore.PlusEquals(wordPair.second);
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
// merge with existing trans opt
|
2011-11-21 14:49:26 +04:00
|
|
|
Phrase genPhrase( mergeWords);
|
2013-05-08 14:51:25 +04:00
|
|
|
|
|
|
|
if (IsFilteringStep()) {
|
|
|
|
if (!inputPartialTranslOpt.IsCompatible(genPhrase, m_conflictFactors))
|
|
|
|
continue;
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
|
|
|
|
2013-05-08 14:51:25 +04:00
|
|
|
const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
|
|
|
|
TargetPhrase outPhrase(inPhrase);
|
2013-05-13 16:19:25 +04:00
|
|
|
outPhrase.GetScoreBreakdown().PlusEquals(generationScore);
|
2013-05-08 14:51:25 +04:00
|
|
|
|
|
|
|
outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
|
2014-08-08 18:59:34 +04:00
|
|
|
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply);
|
2013-05-31 01:13:57 +04:00
|
|
|
|
2015-10-25 16:37:59 +03:00
|
|
|
const Range &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
|
2013-05-08 14:51:25 +04:00
|
|
|
|
|
|
|
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
|
|
|
|
assert(newTransOpt);
|
|
|
|
|
2013-08-13 23:36:32 +04:00
|
|
|
newTransOpt->SetInputPath(inputPath);
|
2013-08-08 18:58:01 +04:00
|
|
|
|
2013-05-11 17:13:26 +04:00
|
|
|
outputPartialTranslOptColl.Add(newTransOpt);
|
2013-05-08 14:51:25 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
// increment iterators
|
|
|
|
IncrementIterators(wordListIterVector, wordListVector);
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|