2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
2013-07-09 18:48:36 +04:00
|
|
|
#include <list>
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "TranslationOptionCollectionConfusionNet.h"
|
2013-06-21 04:17:17 +04:00
|
|
|
#include "ConfusionNet.h"
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "DecodeStep.h"
|
2013-06-28 18:43:56 +04:00
|
|
|
#include "DecodeStepTranslation.h"
|
2013-08-07 15:11:39 +04:00
|
|
|
#include "DecodeStepGeneration.h"
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "FactorCollection.h"
|
2013-07-09 18:48:36 +04:00
|
|
|
#include "FF/InputFeature.h"
|
2013-07-11 15:37:20 +04:00
|
|
|
#include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
2013-06-28 18:43:56 +04:00
|
|
|
|
|
|
|
using namespace std;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
/** constructor; just initialize the base class */
|
2013-05-11 17:13:26 +04:00
|
|
|
TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet(
|
2013-05-29 21:16:15 +04:00
|
|
|
const ConfusionNet &input
|
|
|
|
, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
|
2013-07-09 18:48:36 +04:00
|
|
|
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
|
|
|
|
{
|
|
|
|
const InputFeature *inputFeature = StaticData::Instance().GetInputFeature();
|
|
|
|
CHECK(inputFeature);
|
|
|
|
|
|
|
|
size_t size = input.GetSize();
|
2013-08-02 21:24:36 +04:00
|
|
|
m_inputPathMatrix.resize(size);
|
2013-07-09 18:48:36 +04:00
|
|
|
|
|
|
|
// 1-word phrases
|
|
|
|
for (size_t startPos = 0; startPos < size; ++startPos) {
|
2013-08-02 21:24:36 +04:00
|
|
|
vector<InputPathList> &vec = m_inputPathMatrix[startPos];
|
2013-07-09 19:56:49 +04:00
|
|
|
vec.push_back(InputPathList());
|
|
|
|
InputPathList &list = vec.back();
|
2013-07-09 18:48:36 +04:00
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
WordsRange range(startPos, startPos);
|
2013-08-02 18:54:49 +04:00
|
|
|
const NonTerminalSet &labels = input.GetLabelSet(startPos, startPos);
|
2013-07-09 18:48:36 +04:00
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
const ConfusionNet::Column &col = input.GetColumn(startPos);
|
|
|
|
for (size_t i = 0; i < col.size(); ++i) {
|
|
|
|
const Word &word = col[i].first;
|
|
|
|
Phrase subphrase;
|
|
|
|
subphrase.AddWord(word);
|
2013-07-09 18:48:36 +04:00
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
const std::vector<float> &scores = col[i].second;
|
|
|
|
ScoreComponentCollection *inputScore = new ScoreComponentCollection();
|
|
|
|
inputScore->Assign(inputFeature, scores);
|
2013-07-09 18:48:36 +04:00
|
|
|
|
2013-08-02 18:54:49 +04:00
|
|
|
InputPath *node = new InputPath(subphrase, labels, range, NULL, inputScore);
|
2013-07-09 19:56:49 +04:00
|
|
|
list.push_back(node);
|
2013-07-09 18:48:36 +04:00
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
m_phraseDictionaryQueue.push_back(node);
|
|
|
|
}
|
2013-07-09 18:48:36 +04:00
|
|
|
}
|
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
// subphrases of 2+ words
|
|
|
|
for (size_t phaseSize = 2; phaseSize <= size; ++phaseSize) {
|
|
|
|
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
|
|
|
size_t endPos = startPos + phaseSize -1;
|
2013-08-02 18:54:49 +04:00
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
WordsRange range(startPos, endPos);
|
2013-08-02 18:54:49 +04:00
|
|
|
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
2013-07-09 19:56:49 +04:00
|
|
|
|
2013-08-02 21:24:36 +04:00
|
|
|
vector<InputPathList> &vec = m_inputPathMatrix[startPos];
|
2013-07-10 21:02:38 +04:00
|
|
|
vec.push_back(InputPathList());
|
2013-07-09 19:56:49 +04:00
|
|
|
InputPathList &list = vec.back();
|
|
|
|
|
|
|
|
// loop thru every previous path
|
|
|
|
const InputPathList &prevNodes = GetInputPathList(startPos, endPos - 1);
|
2013-07-10 21:02:38 +04:00
|
|
|
|
|
|
|
int prevNodesInd = 0;
|
|
|
|
InputPathList::const_iterator iterPath;
|
|
|
|
for (iterPath = prevNodes.begin(); iterPath != prevNodes.end(); ++iterPath) {
|
2013-07-11 19:20:15 +04:00
|
|
|
//for (size_t pathInd = 0; pathInd < prevNodes.size(); ++pathInd) {
|
2013-07-10 21:02:38 +04:00
|
|
|
const InputPath &prevNode = **iterPath;
|
2013-07-11 19:20:15 +04:00
|
|
|
//const InputPath &prevNode = *prevNodes[pathInd];
|
2013-07-10 21:02:38 +04:00
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
const Phrase &prevPhrase = prevNode.GetPhrase();
|
|
|
|
const ScoreComponentCollection *prevInputScore = prevNode.GetInputScore();
|
|
|
|
CHECK(prevInputScore);
|
|
|
|
|
|
|
|
// loop thru every word at this position
|
2013-07-10 21:02:38 +04:00
|
|
|
const ConfusionNet::Column &col = input.GetColumn(endPos);
|
|
|
|
|
2013-07-09 19:56:49 +04:00
|
|
|
for (size_t i = 0; i < col.size(); ++i) {
|
|
|
|
const Word &word = col[i].first;
|
|
|
|
Phrase subphrase(prevPhrase);
|
|
|
|
subphrase.AddWord(word);
|
|
|
|
|
|
|
|
const std::vector<float> &scores = col[i].second;
|
|
|
|
ScoreComponentCollection *inputScore = new ScoreComponentCollection(*prevInputScore);
|
|
|
|
inputScore->PlusEquals(inputFeature, scores);
|
|
|
|
|
2013-08-02 18:54:49 +04:00
|
|
|
InputPath *node = new InputPath(subphrase, labels, range, &prevNode, inputScore);
|
2013-07-09 19:56:49 +04:00
|
|
|
list.push_back(node);
|
|
|
|
|
|
|
|
m_phraseDictionaryQueue.push_back(node);
|
2013-07-10 21:02:38 +04:00
|
|
|
} // for (size_t i = 0; i < col.size(); ++i) {
|
2013-07-09 19:56:49 +04:00
|
|
|
|
2013-07-10 21:02:38 +04:00
|
|
|
++prevNodesInd;
|
|
|
|
} // for (iterPath = prevNodes.begin(); iterPath != prevNodes.end(); ++iterPath) {
|
2013-07-09 19:56:49 +04:00
|
|
|
}
|
2013-07-09 18:48:36 +04:00
|
|
|
}
|
2013-07-11 15:37:20 +04:00
|
|
|
|
|
|
|
// check whether we should be using the old code to supportbinary phrase-table.
|
|
|
|
// eventually, we'll stop support the binary phrase-table and delete this legacy code
|
2013-08-24 00:34:10 +04:00
|
|
|
CheckLEGACY();
|
2013-07-09 19:56:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
InputPathList &TranslationOptionCollectionConfusionNet::GetInputPathList(size_t startPos, size_t endPos)
|
|
|
|
{
|
|
|
|
size_t offset = endPos - startPos;
|
2013-08-02 21:24:36 +04:00
|
|
|
CHECK(offset < m_inputPathMatrix[startPos].size());
|
|
|
|
return m_inputPathMatrix[startPos][offset];
|
2013-07-09 18:48:36 +04:00
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
/* forcibly create translation option for a particular source word.
|
2011-02-24 16:14:42 +03:00
|
|
|
* call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
|
2008-06-11 14:52:57 +04:00
|
|
|
* at a particular source position
|
|
|
|
*/
|
2011-02-24 16:14:42 +03:00
|
|
|
void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(size_t sourcePos)
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
ConfusionNet const& source=dynamic_cast<ConfusionNet const&>(m_source);
|
|
|
|
|
|
|
|
ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
|
2013-08-13 22:44:52 +04:00
|
|
|
const InputPathList &inputPathList = GetInputPathList(sourcePos, sourcePos);
|
|
|
|
|
|
|
|
ConfusionNet::Column::const_iterator iterCol;
|
|
|
|
InputPathList::const_iterator iterInputPath;
|
2011-02-24 16:14:42 +03:00
|
|
|
size_t j=0;
|
2013-08-13 22:44:52 +04:00
|
|
|
for(iterCol = coll.begin(), iterInputPath = inputPathList.begin();
|
2013-08-16 00:14:04 +04:00
|
|
|
iterCol != coll.end();
|
|
|
|
++iterCol , ++iterInputPath) {
|
|
|
|
const InputPath &inputPath = **iterInputPath;
|
|
|
|
size_t length = source.GetColumnIncrement(sourcePos, j++);
|
|
|
|
const Scores &inputScores = iterCol->second;
|
2013-08-13 22:44:52 +04:00
|
|
|
ProcessOneUnknownWord(inputPath ,sourcePos, length, &inputScores);
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2013-07-11 19:19:38 +04:00
|
|
|
void TranslationOptionCollectionConfusionNet::CreateTranslationOptions()
|
|
|
|
{
|
|
|
|
if (!m_useLegacy) {
|
2013-07-11 23:18:06 +04:00
|
|
|
GetTargetPhraseCollectionBatch();
|
2013-07-11 19:19:38 +04:00
|
|
|
}
|
|
|
|
TranslationOptionCollection::CreateTranslationOptions();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-06-28 18:43:56 +04:00
|
|
|
/** create translation options that exactly cover a specific input span.
|
|
|
|
* Called by CreateTranslationOptions() and ProcessUnknownWord()
|
|
|
|
* \param decodeGraph list of decoding steps
|
|
|
|
* \param factorCollection input sentence with all factors
|
|
|
|
* \param startPos first position in input sentence
|
|
|
|
* \param lastPos last position in input sentence
|
|
|
|
* \param adhereTableLimit whether phrase & generation table limits are adhered to
|
|
|
|
*/
|
|
|
|
void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRange(
|
|
|
|
const DecodeGraph &decodeGraph
|
|
|
|
, size_t startPos
|
|
|
|
, size_t endPos
|
|
|
|
, bool adhereTableLimit
|
|
|
|
, size_t graphInd)
|
2013-07-11 15:46:04 +04:00
|
|
|
{
|
|
|
|
if (m_useLegacy) {
|
2013-08-24 00:34:10 +04:00
|
|
|
CreateTranslationOptionsForRangeLEGACY(decodeGraph, startPos, endPos, adhereTableLimit, graphInd);
|
2013-07-11 19:20:15 +04:00
|
|
|
} else {
|
|
|
|
CreateTranslationOptionsForRangeNew(decodeGraph, startPos, endPos, adhereTableLimit, graphInd);
|
2013-07-11 15:46:04 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeNew(
|
|
|
|
const DecodeGraph &decodeGraph
|
|
|
|
, size_t startPos
|
|
|
|
, size_t endPos
|
|
|
|
, bool adhereTableLimit
|
|
|
|
, size_t graphInd)
|
|
|
|
{
|
|
|
|
InputPathList &inputPathList = GetInputPathList(startPos, endPos);
|
|
|
|
InputPathList::iterator iter;
|
|
|
|
for (iter = inputPathList.begin(); iter != inputPathList.end(); ++iter) {
|
2013-07-11 19:20:15 +04:00
|
|
|
InputPath &inputPath = **iter;
|
|
|
|
TranslationOptionCollection::CreateTranslationOptionsForRange(decodeGraph
|
|
|
|
, startPos
|
|
|
|
, endPos
|
|
|
|
, adhereTableLimit
|
|
|
|
, graphInd
|
|
|
|
, inputPath);
|
2013-07-11 15:46:04 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-24 00:34:10 +04:00
|
|
|
void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLEGACY(
|
2013-07-11 15:46:04 +04:00
|
|
|
const DecodeGraph &decodeGraph
|
|
|
|
, size_t startPos
|
|
|
|
, size_t endPos
|
|
|
|
, bool adhereTableLimit
|
|
|
|
, size_t graphInd)
|
2013-06-28 18:43:56 +04:00
|
|
|
{
|
|
|
|
if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
|
2013-08-07 17:18:12 +04:00
|
|
|
InputPathList &inputPathList = GetInputPathList(startPos, endPos);
|
2013-08-06 18:04:57 +04:00
|
|
|
|
2013-07-11 19:20:15 +04:00
|
|
|
// partial trans opt stored in here
|
|
|
|
PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
|
|
|
|
size_t totalEarlyPruned = 0;
|
2013-06-28 18:43:56 +04:00
|
|
|
|
2013-07-11 19:20:15 +04:00
|
|
|
// initial translation step
|
|
|
|
list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
|
|
|
|
const DecodeStep &decodeStep = **iterStep;
|
|
|
|
|
2013-08-24 00:34:10 +04:00
|
|
|
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslationLEGACY
|
2013-07-11 19:20:15 +04:00
|
|
|
(m_source, *oldPtoc
|
2013-08-06 18:04:57 +04:00
|
|
|
, startPos, endPos, adhereTableLimit, inputPathList );
|
2013-06-28 18:43:56 +04:00
|
|
|
|
2013-07-11 19:20:15 +04:00
|
|
|
// do rest of decode steps
|
|
|
|
int indexStep = 0;
|
|
|
|
|
|
|
|
for (++iterStep ; iterStep != decodeGraph.end() ; ++iterStep) {
|
|
|
|
|
2013-08-07 15:11:39 +04:00
|
|
|
const DecodeStep *decodeStep = *iterStep;
|
|
|
|
const DecodeStepTranslation *transStep =dynamic_cast<const DecodeStepTranslation*>(decodeStep);
|
|
|
|
const DecodeStepGeneration *genStep =dynamic_cast<const DecodeStepGeneration*>(decodeStep);
|
|
|
|
|
2013-07-11 19:20:15 +04:00
|
|
|
PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
|
|
|
|
|
|
|
|
// go thru each intermediate trans opt just created
|
|
|
|
const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
|
|
|
|
vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
|
|
|
|
for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) {
|
|
|
|
TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
|
|
|
|
|
2013-08-07 15:11:39 +04:00
|
|
|
if (transStep) {
|
2013-08-24 00:34:10 +04:00
|
|
|
transStep->ProcessLEGACY(inputPartialTranslOpt
|
2013-08-07 17:18:12 +04:00
|
|
|
, *decodeStep
|
|
|
|
, *newPtoc
|
|
|
|
, this
|
2013-08-08 20:10:56 +04:00
|
|
|
, adhereTableLimit);
|
2013-08-07 17:18:12 +04:00
|
|
|
} else {
|
|
|
|
CHECK(genStep);
|
|
|
|
genStep->Process(inputPartialTranslOpt
|
2013-08-07 15:11:39 +04:00
|
|
|
, *decodeStep
|
2013-07-11 19:20:15 +04:00
|
|
|
, *newPtoc
|
|
|
|
, this
|
2013-08-08 20:10:56 +04:00
|
|
|
, adhereTableLimit);
|
2013-08-07 15:11:39 +04:00
|
|
|
}
|
2013-06-28 18:43:56 +04:00
|
|
|
}
|
|
|
|
|
2013-07-11 19:20:15 +04:00
|
|
|
// last but 1 partial trans not required anymore
|
|
|
|
totalEarlyPruned += newPtoc->GetPrunedCount();
|
2013-06-28 18:43:56 +04:00
|
|
|
delete oldPtoc;
|
2013-07-11 19:20:15 +04:00
|
|
|
oldPtoc = newPtoc;
|
|
|
|
|
|
|
|
indexStep++;
|
|
|
|
} // for (++iterStep
|
|
|
|
|
|
|
|
// add to fully formed translation option list
|
|
|
|
PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc;
|
|
|
|
const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
|
|
|
|
vector<TranslationOption*>::const_iterator iterColl;
|
|
|
|
for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) {
|
|
|
|
TranslationOption *transOpt = *iterColl;
|
|
|
|
Add(transOpt);
|
|
|
|
}
|
|
|
|
|
|
|
|
lastPartialTranslOptColl.DetachAll();
|
|
|
|
totalEarlyPruned += oldPtoc->GetPrunedCount();
|
|
|
|
delete oldPtoc;
|
|
|
|
// TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
|
2013-06-28 18:43:56 +04:00
|
|
|
|
|
|
|
} // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
|
|
|
|
|
|
|
|
if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) {
|
|
|
|
CreateXmlOptionsForRange(startPos, endPos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-24 00:34:10 +04:00
|
|
|
void TranslationOptionCollectionConfusionNet::CheckLEGACY()
|
2013-07-11 15:37:20 +04:00
|
|
|
{
|
2013-07-11 19:20:15 +04:00
|
|
|
const std::vector<PhraseDictionary*> &pts = StaticData::Instance().GetPhraseDictionaries();
|
|
|
|
for (size_t i = 0; i < pts.size(); ++i) {
|
|
|
|
const PhraseDictionary *phraseDictionary = pts[i];
|
|
|
|
if (dynamic_cast<const PhraseDictionaryTreeAdaptor*>(phraseDictionary) != NULL) {
|
|
|
|
m_useLegacy = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m_useLegacy = false;
|
2013-07-11 15:37:20 +04:00
|
|
|
}
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|