From 92163d50913d3d14ae7796e882bf3f6533575b44 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 9 Jul 2013 15:48:36 +0100 Subject: [PATCH] prefix subphrase optimization done for confusion networks --- moses/InputPath.cpp | 8 +-- ...ranslationOptionCollectionConfusionNet.cpp | 61 ++++++++++++++++++- .../TranslationOptionCollectionConfusionNet.h | 5 +- 3 files changed, 65 insertions(+), 9 deletions(-) diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp index 354e48197..43b4207cf 100644 --- a/moses/InputPath.cpp +++ b/moses/InputPath.cpp @@ -1,5 +1,6 @@ #include "InputPath.h" #include "ScoreComponentCollection.h" + namespace Moses { InputPath::InputPath(const Phrase &phrase, const WordsRange &range, const InputPath *prevNode @@ -7,13 +8,8 @@ InputPath::InputPath(const Phrase &phrase, const WordsRange &range, const InputP :m_prevNode(prevNode) ,m_phrase(phrase) ,m_range(range) + ,m_inputScore(inputScore) { - if (inputScore) { - m_inputScore = new ScoreComponentCollection(*inputScore); - } - else { - m_inputScore = NULL; - } } InputPath::~InputPath() diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index 06e86d7c7..ebe62b0dd 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -1,11 +1,12 @@ // $Id$ +#include #include "TranslationOptionCollectionConfusionNet.h" #include "ConfusionNet.h" #include "DecodeStep.h" #include "DecodeStepTranslation.h" #include "FactorCollection.h" -#include +#include "FF/InputFeature.h" using namespace std; @@ -16,7 +17,63 @@ namespace Moses TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet( const ConfusionNet &input , size_t maxNoTransOptPerCoverage, float translationOptionThreshold) - : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) {} + : TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) +{ + const InputFeature *inputFeature = StaticData::Instance().GetInputFeature(); + CHECK(inputFeature); + + size_t size = input.GetSize(); + m_targetPhrasesfromPt.resize(size); + + // 1-word phrases + for (size_t startPos = 0; startPos < size; ++startPos) { + vector &vec = m_targetPhrasesfromPt[startPos]; + vec.push_back(InputPathList()); + InputPathList &list = vec.back(); + + WordsRange range(startPos, startPos); + + const ConfusionNet::Column &col = input.GetColumn(startPos); + for (size_t i = 0; i < col.size(); ++i) { + const Word &word = col[i].first; + Phrase subphrase; + subphrase.AddWord(word); + + const std::vector &scores = col[i].second; + ScoreComponentCollection *inputScore = new ScoreComponentCollection(); + inputScore->Assign(inputFeature, scores); + + InputPath *node = new InputPath(subphrase, range, NULL, inputScore); + list.push_back(node); + + } + } + + /* + for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) { + for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { + size_t endPos = startPos + phaseSize -1; + vector &vec = m_targetPhrasesfromPt[startPos]; + + Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); + WordsRange range(startPos, endPos); + + InputPath *node; + if (range.GetNumWordsCovered() == 1) { + node = new InputPath(subphrase, range, NULL, NULL); + vec.push_back(node); + } else { + const InputPath &prevNode = GetInputPath(startPos, endPos - 1); + node = new InputPath(subphrase, range, &prevNode, NULL); + vec.push_back(node); + } + + m_phraseDictionaryQueue.push_back(node); + } + } + */ + +} /* forcibly create translation option for a particular source word. * call the base class' ProcessOneUnknownWord() for each possible word in the confusion network diff --git a/moses/TranslationOptionCollectionConfusionNet.h b/moses/TranslationOptionCollectionConfusionNet.h index d24f7288c..0284c44c7 100644 --- a/moses/TranslationOptionCollectionConfusionNet.h +++ b/moses/TranslationOptionCollectionConfusionNet.h @@ -15,8 +15,11 @@ class ConfusionNet; */ class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection { +public: + typedef std::vector< std::vector > TargetPhraseMatrix; + protected: - typedef std::vector< std::vector > TargetPhraseMatrix; + TargetPhraseMatrix m_targetPhrasesfromPt; /*< contains translation options */ public: TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);