2013-10-02 21:42:56 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
#include <list>
|
|
|
|
#include "TranslationOptionCollectionLattice.h"
|
|
|
|
#include "ConfusionNet.h"
|
|
|
|
#include "WordLattice.h"
|
2013-10-03 14:05:53 +04:00
|
|
|
#include "DecodeGraph.h"
|
2013-10-02 21:42:56 +04:00
|
|
|
#include "DecodeStepTranslation.h"
|
|
|
|
#include "DecodeStepGeneration.h"
|
|
|
|
#include "FactorCollection.h"
|
|
|
|
#include "FF/InputFeature.h"
|
|
|
|
#include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
2013-10-03 21:58:45 +04:00
|
|
|
#include "util/exception.hh"
|
2013-10-02 21:42:56 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
|
|
|
/** constructor; just initialize the base class */
|
|
|
|
TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(
|
2013-10-03 15:03:09 +04:00
|
|
|
const WordLattice &input
|
2013-10-02 21:42:56 +04:00
|
|
|
, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
|
|
|
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
|
|
|
|
{
|
2013-11-21 21:51:01 +04:00
|
|
|
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
|
|
|
|
"Not for models using the legqacy binary phrase table");
|
2013-10-03 15:03:09 +04:00
|
|
|
|
2013-12-07 04:21:06 +04:00
|
|
|
const InputFeature &inputFeature = InputFeature::Instance();
|
|
|
|
UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified");
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2013-10-05 02:48:58 +04:00
|
|
|
size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength();
|
2013-10-02 21:42:56 +04:00
|
|
|
size_t size = input.GetSize();
|
|
|
|
|
|
|
|
// 1-word phrases
|
|
|
|
for (size_t startPos = 0; startPos < size; ++startPos) {
|
|
|
|
|
2013-10-03 15:03:09 +04:00
|
|
|
const std::vector<size_t> &nextNodes = input.GetNextNodes(startPos);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
|
|
|
const ConfusionNet::Column &col = input.GetColumn(startPos);
|
|
|
|
for (size_t i = 0; i < col.size(); ++i) {
|
|
|
|
const Word &word = col[i].first;
|
2013-11-21 21:51:01 +04:00
|
|
|
UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
|
2013-10-04 17:18:11 +04:00
|
|
|
|
2014-04-29 17:41:20 +04:00
|
|
|
size_t nextNode = nextNodes[i];
|
|
|
|
size_t endPos = startPos + nextNode - 1;
|
|
|
|
|
|
|
|
WordsRange range(startPos, endPos);
|
2014-05-01 19:47:17 +04:00
|
|
|
|
|
|
|
if (range.GetNumWordsCovered() > maxPhraseLength) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-04-29 17:41:20 +04:00
|
|
|
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
|
|
|
|
2013-10-02 21:42:56 +04:00
|
|
|
Phrase subphrase;
|
|
|
|
subphrase.AddWord(word);
|
|
|
|
|
|
|
|
const ScorePair &scores = col[i].second;
|
|
|
|
ScorePair *inputScore = new ScorePair(scores);
|
|
|
|
|
|
|
|
InputPath *path = new InputPath(subphrase, labels, range, NULL, inputScore);
|
2013-10-03 15:03:09 +04:00
|
|
|
|
|
|
|
path->SetNextNode(nextNode);
|
2013-10-03 21:58:45 +04:00
|
|
|
m_inputPathQueue.push_back(path);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
// recursive
|
|
|
|
Extend(*path, input);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const WordLattice &input)
|
|
|
|
{
|
|
|
|
size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
|
|
|
|
if (nextPos >= input.GetSize()) {
|
|
|
|
return;
|
|
|
|
}
|
2013-10-03 19:58:47 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
size_t startPos = prevPath.GetWordsRange().GetStartPos();
|
|
|
|
const Phrase &prevPhrase = prevPath.GetPhrase();
|
|
|
|
const ScorePair *prevInputScore = prevPath.GetInputScore();
|
|
|
|
UTIL_THROW_IF2(prevInputScore == NULL,
|
|
|
|
"Null previous score");
|
2013-10-03 19:58:47 +04:00
|
|
|
|
2013-10-05 02:48:58 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
|
2013-10-05 02:48:58 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
const ConfusionNet::Column &col = input.GetColumn(nextPos);
|
|
|
|
for (size_t i = 0; i < col.size(); ++i) {
|
|
|
|
const Word &word = col[i].first;
|
|
|
|
UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
size_t nextNode = nextNodes[i];
|
|
|
|
size_t endPos = nextPos + nextNode - 1;
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
WordsRange range(startPos, endPos);
|
2014-05-01 19:47:17 +04:00
|
|
|
|
|
|
|
size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength();
|
|
|
|
if (range.GetNumWordsCovered() > maxPhraseLength) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
Phrase subphrase(prevPhrase);
|
|
|
|
subphrase.AddWord(word);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
const ScorePair &scores = col[i].second;
|
|
|
|
ScorePair *inputScore = new ScorePair(*prevInputScore);
|
|
|
|
inputScore->PlusEquals(scores);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
|
2013-10-03 15:03:09 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
path->SetNextNode(nextNode);
|
|
|
|
m_inputPathQueue.push_back(path);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2014-05-01 18:10:16 +04:00
|
|
|
// recursive
|
|
|
|
Extend(*path, input);
|
2013-10-02 21:42:56 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TranslationOptionCollectionLattice::CreateTranslationOptions()
|
|
|
|
{
|
2013-10-03 21:58:45 +04:00
|
|
|
GetTargetPhraseCollectionBatch();
|
|
|
|
|
|
|
|
VERBOSE(2,"Translation Option Collection\n " << *this << endl);
|
2013-10-04 16:08:14 +04:00
|
|
|
const vector <DecodeGraph*> &decodeGraphs = StaticData::Instance().GetDecodeGraphs();
|
2013-11-21 21:51:01 +04:00
|
|
|
UTIL_THROW_IF2(decodeGraphs.size() != 1, "Multiple decoder graphs not supported yet");
|
2013-10-04 16:08:14 +04:00
|
|
|
const DecodeGraph &decodeGraph = *decodeGraphs[0];
|
2013-11-21 21:51:01 +04:00
|
|
|
UTIL_THROW_IF2(decodeGraph.GetSize() != 1, "Factored decomposition not supported yet");
|
2013-10-04 16:08:14 +04:00
|
|
|
|
|
|
|
const DecodeStep &decodeStep = **decodeGraph.begin();
|
|
|
|
const PhraseDictionary &phraseDictionary = *decodeStep.GetPhraseDictionaryFeature();
|
|
|
|
|
|
|
|
for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
|
|
|
|
const InputPath &path = *m_inputPathQueue[i];
|
2014-04-28 17:29:39 +04:00
|
|
|
|
2013-10-04 16:08:14 +04:00
|
|
|
const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary);
|
|
|
|
const WordsRange &range = path.GetWordsRange();
|
|
|
|
|
2014-04-28 17:29:39 +04:00
|
|
|
if (tpColl && tpColl->GetSize()) {
|
2014-04-29 17:41:20 +04:00
|
|
|
TargetPhraseCollection::const_iterator iter;
|
2013-10-04 16:08:14 +04:00
|
|
|
for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
|
|
|
|
const TargetPhrase &tp = **iter;
|
|
|
|
TranslationOption *transOpt = new TranslationOption(range, tp);
|
|
|
|
transOpt->SetInputPath(path);
|
2014-08-08 18:42:23 +04:00
|
|
|
transOpt->EvaluateWithSourceContext(m_source);
|
2013-10-04 16:08:14 +04:00
|
|
|
|
|
|
|
Add(transOpt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (path.GetPhrase().GetSize() == 1) {
|
|
|
|
// unknown word processing
|
2014-05-01 18:10:16 +04:00
|
|
|
ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
|
2013-10-04 16:08:14 +04:00
|
|
|
}
|
|
|
|
}
|
2013-10-03 21:58:45 +04:00
|
|
|
|
|
|
|
// Prune
|
|
|
|
Prune();
|
|
|
|
|
|
|
|
Sort();
|
|
|
|
|
|
|
|
// future score matrix
|
|
|
|
CalcFutureScore();
|
|
|
|
|
|
|
|
// Cached lex reodering costs
|
|
|
|
CacheLexReordering();
|
|
|
|
|
|
|
|
}
|
2013-10-02 21:42:56 +04:00
|
|
|
|
2013-10-04 16:08:14 +04:00
|
|
|
void TranslationOptionCollectionLattice::ProcessUnknownWord(size_t sourcePos)
|
|
|
|
{
|
|
|
|
UTIL_THROW(util::Exception, "ProcessUnknownWord() not implemented for lattice");
|
|
|
|
}
|
|
|
|
|
2013-10-03 21:58:45 +04:00
|
|
|
void TranslationOptionCollectionLattice::CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
|
|
|
|
, size_t startPosition
|
|
|
|
, size_t endPosition
|
|
|
|
, bool adhereTableLimit
|
|
|
|
, size_t graphInd)
|
2013-10-02 21:42:56 +04:00
|
|
|
{
|
2013-10-03 21:58:45 +04:00
|
|
|
UTIL_THROW(util::Exception, "CreateTranslationOptionsForRange() not implemented for lattice");
|
2013-10-02 21:42:56 +04:00
|
|
|
}
|
|
|
|
|
2013-10-02 23:02:05 +04:00
|
|
|
} // namespace
|
2013-10-02 21:42:56 +04:00
|
|
|
|
|
|
|
|