diff --git a/moses-cmd/IOWrapper.cpp b/moses-cmd/IOWrapper.cpp index b04257e8b..ce017ecf2 100644 --- a/moses-cmd/IOWrapper.cpp +++ b/moses-cmd/IOWrapper.cpp @@ -188,6 +188,25 @@ InputType*IOWrapper::GetInput(InputType* inputType) } } +std::map GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) +{ + const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath(); + const Phrase &inputPhrase = inputPath.GetPhrase(); + + std::map ret; + + for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) { + const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor); + if (factor) { + std::set targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos); + CHECK(targetPos.size() == 1); + ret[*targetPos.begin()] = factor; + } + } + + return ret; +} + /*** * print surface factor only for the given phrase */ @@ -195,23 +214,31 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector< char reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); - const Phrase& phrase = edge.GetCurrTargetPhrase(); + const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { out << phrase; } else { - FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().second; + FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().first; + + std::map placeholders; + if (placeholderFactor != NOT_FOUND) { + // creates map of target position -> factor for placeholders + placeholders = GetPlaceholders(edge, placeholderFactor); + } size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); - if (placeholderFactor != NOT_FOUND) { - const Factor *origFactor = phrase.GetFactor(pos, placeholderFactor); - if (origFactor) { - factor = origFactor; - } + if (placeholders.size()) { + // do placeholders + std::map::const_iterator iter = placeholders.find(pos); + if (iter != placeholders.end()) { + factor = iter->second; + } } + CHECK(factor); //preface surface form with UNK if marking unknowns diff --git a/moses-cmd/IOWrapper.h b/moses-cmd/IOWrapper.h index 7e8ae2640..519091e67 100644 --- a/moses-cmd/IOWrapper.h +++ b/moses-cmd/IOWrapper.h @@ -56,6 +56,8 @@ POSSIBILITY OF SUCH DAMAGE. namespace Moses { class ScoreComponentCollection; +class Hypothesis; +class Factor; } namespace MosesCmd @@ -154,6 +156,9 @@ void OutputFeatureScores( std::ostream& out , const Moses::FeatureFunction *ff , std::string &lastName ); +// creates a map of target positions which should be replaced by word using placeholder +std::map GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor); + } #endif diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp index eb27e41f6..3a3d3381b 100644 --- a/moses/InputPath.cpp +++ b/moses/InputPath.cpp @@ -20,14 +20,6 @@ InputPath::InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms, { //cerr << "phrase=" << phrase << " m_inputScore=" << *m_inputScore << endl; - FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().first; - if (placeholderFactor != NOT_FOUND) { - for (size_t pos = 0; pos < m_phrase.GetSize(); ++pos) { - if (m_phrase.GetFactor(pos, placeholderFactor)) { - m_placeholders.push_back(pos); - } - } - } } InputPath::~InputPath() @@ -63,26 +55,6 @@ void InputPath::SetTargetPhrases(const PhraseDictionary &phraseDictionary m_targetPhrases[&phraseDictionary] = value; } -bool InputPath::SetPlaceholders(TargetPhrase *targetPhrase) const -{ - FactorType sourcePlaceholderFactor = StaticData::Instance().GetPlaceholderFactor().first; - FactorType targetPlaceholderFactor = StaticData::Instance().GetPlaceholderFactor().second; - - const AlignmentInfo &alignments = targetPhrase->GetAlignTerm(); - for (size_t i = 0; i < m_placeholders.size(); ++i) { - size_t sourcePos = m_placeholders[i]; - set targetPos = alignments.GetAlignmentsForSource(sourcePos); - if (targetPos.size() == 1) { - const Word &sourceWord = m_phrase.GetWord(sourcePos); - Word &targetWord = targetPhrase->GetWord(*targetPos.begin()); - targetWord[targetPlaceholderFactor] = sourceWord[sourcePlaceholderFactor]; - } else { - return false; - } - } - return true; -} - const Word &InputPath::GetLastWord() const { size_t len = m_phrase.GetSize(); diff --git a/moses/InputPath.h b/moses/InputPath.h index d152b496b..b06fd485a 100644 --- a/moses/InputPath.h +++ b/moses/InputPath.h @@ -37,8 +37,6 @@ protected: std::map > m_targetPhrases; const NonTerminalSet m_sourceNonTerms; - std::vector m_placeholders; - bool SetPlaceholders(TargetPhrase *targetPhrase) const; public: explicit InputPath() diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index 10b7de174..d34ff7394 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -199,7 +199,7 @@ Parameter::Parameter() AddParam("alternate-weight-setting", "aws", "alternate set of weights to used per xml specification"); - AddParam("placeholder-factor", "Which factor to use to store the original text for placeholders"); + AddParam("placeholder-factor", "Which factors to use to store the original text for placeholders. Must be 2, source target"); } Parameter::~Parameter()