prefix subphrase optimization done for confusion networks

This commit is contained in:
Hieu Hoang 2013-07-09 15:48:36 +01:00
parent ffba363961
commit 92163d5091
3 changed files with 65 additions and 9 deletions

View File

@ -1,5 +1,6 @@
#include "InputPath.h" #include "InputPath.h"
#include "ScoreComponentCollection.h" #include "ScoreComponentCollection.h"
namespace Moses namespace Moses
{ {
InputPath::InputPath(const Phrase &phrase, const WordsRange &range, const InputPath *prevNode InputPath::InputPath(const Phrase &phrase, const WordsRange &range, const InputPath *prevNode
@ -7,13 +8,8 @@ InputPath::InputPath(const Phrase &phrase, const WordsRange &range, const InputP
:m_prevNode(prevNode) :m_prevNode(prevNode)
,m_phrase(phrase) ,m_phrase(phrase)
,m_range(range) ,m_range(range)
,m_inputScore(inputScore)
{ {
if (inputScore) {
m_inputScore = new ScoreComponentCollection(*inputScore);
}
else {
m_inputScore = NULL;
}
} }
InputPath::~InputPath() InputPath::~InputPath()

View File

@ -1,11 +1,12 @@
// $Id$ // $Id$
#include <list>
#include "TranslationOptionCollectionConfusionNet.h" #include "TranslationOptionCollectionConfusionNet.h"
#include "ConfusionNet.h" #include "ConfusionNet.h"
#include "DecodeStep.h" #include "DecodeStep.h"
#include "DecodeStepTranslation.h" #include "DecodeStepTranslation.h"
#include "FactorCollection.h" #include "FactorCollection.h"
#include <list> #include "FF/InputFeature.h"
using namespace std; using namespace std;
@ -16,7 +17,63 @@ namespace Moses
TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet( TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet(
const ConfusionNet &input const ConfusionNet &input
, size_t maxNoTransOptPerCoverage, float translationOptionThreshold) , size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) {} : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold)
{
const InputFeature *inputFeature = StaticData::Instance().GetInputFeature();
CHECK(inputFeature);
size_t size = input.GetSize();
m_targetPhrasesfromPt.resize(size);
// 1-word phrases
for (size_t startPos = 0; startPos < size; ++startPos) {
vector<InputPathList> &vec = m_targetPhrasesfromPt[startPos];
vec.push_back(InputPathList());
InputPathList &list = vec.back();
WordsRange range(startPos, startPos);
const ConfusionNet::Column &col = input.GetColumn(startPos);
for (size_t i = 0; i < col.size(); ++i) {
const Word &word = col[i].first;
Phrase subphrase;
subphrase.AddWord(word);
const std::vector<float> &scores = col[i].second;
ScoreComponentCollection *inputScore = new ScoreComponentCollection();
inputScore->Assign(inputFeature, scores);
InputPath *node = new InputPath(subphrase, range, NULL, inputScore);
list.push_back(node);
}
}
/*
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
size_t endPos = startPos + phaseSize -1;
vector<InputPathList> &vec = m_targetPhrasesfromPt[startPos];
Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
WordsRange range(startPos, endPos);
InputPath *node;
if (range.GetNumWordsCovered() == 1) {
node = new InputPath(subphrase, range, NULL, NULL);
vec.push_back(node);
} else {
const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
node = new InputPath(subphrase, range, &prevNode, NULL);
vec.push_back(node);
}
m_phraseDictionaryQueue.push_back(node);
}
}
*/
}
/* forcibly create translation option for a particular source word. /* forcibly create translation option for a particular source word.
* call the base class' ProcessOneUnknownWord() for each possible word in the confusion network * call the base class' ProcessOneUnknownWord() for each possible word in the confusion network

View File

@ -15,8 +15,11 @@ class ConfusionNet;
*/ */
class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection
{ {
public:
typedef std::vector< std::vector<InputPathList> > TargetPhraseMatrix;
protected: protected:
typedef std::vector< std::vector<InputPathList> > TargetPhraseMatrix; TargetPhraseMatrix m_targetPhrasesfromPt; /*< contains translation options */
public: public:
TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold); TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);