From a5a4401fe9d4ca9ad99f2c2ea9fc1120b88ac753 Mon Sep 17 00:00:00 2001 From: Matthias Huck Date: Mon, 11 Jan 2016 20:14:28 +0000 Subject: [PATCH] TargetPreferencesPhraseProperty --- moses/PP/Factory.cpp | 2 + moses/PP/TargetPreferencesPhraseProperty.cpp | 123 +++++++++++++++++++ moses/PP/TargetPreferencesPhraseProperty.h | 71 +++++++++++ 3 files changed, 196 insertions(+) create mode 100644 moses/PP/TargetPreferencesPhraseProperty.cpp create mode 100644 moses/PP/TargetPreferencesPhraseProperty.h diff --git a/moses/PP/Factory.cpp b/moses/PP/Factory.cpp index cc393b18d..72c927072 100644 --- a/moses/PP/Factory.cpp +++ b/moses/PP/Factory.cpp @@ -6,6 +6,7 @@ #include "moses/PP/CountsPhraseProperty.h" #include "moses/PP/SourceLabelsPhraseProperty.h" +#include "moses/PP/TargetPreferencesPhraseProperty.h" #include "moses/PP/TreeStructurePhraseProperty.h" #include "moses/PP/SpanLengthPhraseProperty.h" #include "moses/PP/NonTermContextProperty.h" @@ -57,6 +58,7 @@ PhrasePropertyFactory::PhrasePropertyFactory() MOSES_PNAME2("Counts", CountsPhraseProperty); MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty); + MOSES_PNAME2("TargetPreferences", TargetPreferencesPhraseProperty); MOSES_PNAME2("Tree",TreeStructurePhraseProperty); MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty); MOSES_PNAME2("NonTermContext", NonTermContextProperty); diff --git a/moses/PP/TargetPreferencesPhraseProperty.cpp b/moses/PP/TargetPreferencesPhraseProperty.cpp new file mode 100644 index 000000000..9358ee4bf --- /dev/null +++ b/moses/PP/TargetPreferencesPhraseProperty.cpp @@ -0,0 +1,123 @@ +#include "moses/PP/TargetPreferencesPhraseProperty.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Moses +{ + +void TargetPreferencesPhraseProperty::ProcessValue(const std::string &value) +{ + std::istringstream tokenizer(value); + + if (! (tokenizer >> m_nNTs)) { // first token: number of non-terminals (incl. left-hand side) + UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of non-terminals. Flawed property?"); + } + assert( m_nNTs > 0 ); + + if (! (tokenizer >> m_totalCount)) { // second token: overall rule count + UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read overall rule count. Flawed property?"); + } + assert( m_totalCount > 0.0 ); + + + // read labelled rule items + + std::priority_queue ruleLabelledCountsPQ; + + while (tokenizer.peek() != EOF) { + try { + + TargetPreferencesPhrasePropertyItem item; + size_t numberOfLHSsGivenRHS = std::numeric_limits::max(); + + if (m_nNTs == 1) { + + item.m_labelsRHSCount = m_totalCount; + + } else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule + + for (size_t i=0; i> labelRHS) ) { // RHS non-terminal label + UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side label index. Flawed property?"); + } + item.m_labelsRHS.push_back(labelRHS); + } + + if (! (tokenizer >> item.m_labelsRHSCount)) { + UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side count. Flawed property?"); + } + + if (! (tokenizer >> numberOfLHSsGivenRHS)) { + UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of left-hand sides. Flawed property?"); + } + } + + for (size_t i=0; i> labelLHS)) { // LHS non-terminal label + UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read left-hand side label index. Flawed property?"); + } + float ruleLabelledCount; + if (! (tokenizer >> ruleLabelledCount)) { + UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read count. Flawed property?"); + } + item.m_labelsLHSList.push_back( std::make_pair(labelLHS,ruleLabelledCount) ); + ruleLabelledCountsPQ.push(ruleLabelledCount); + } + + m_labelItems.push_back(item); + + } catch (const std::exception &e) { + UTIL_THROW2("TargetPreferencesPhraseProperty: Read error. Flawed property?"); + } + } + + // keep only top N label vectors + const size_t N=50; + + if (ruleLabelledCountsPQ.size() > N) { + + float topNRuleLabelledCount = std::numeric_limits::max(); + for (size_t i=0; !ruleLabelledCountsPQ.empty() && i::iterator itemIter=m_labelItems.begin(); + while (itemIter!=m_labelItems.end()) { + if (itemIter->m_labelsRHSCount < topNRuleLabelledCount) { + itemIter = m_labelItems.erase(itemIter); + } else { + std::list< std::pair >::iterator itemLHSIter=(itemIter->m_labelsLHSList).begin(); + while (itemLHSIter!=(itemIter->m_labelsLHSList).end()) { + if (itemLHSIter->second < topNRuleLabelledCount) { + itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter); + } else { + if (nKept >= N) { + itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter,(itemIter->m_labelsLHSList).end()); + } else { + ++nKept; + ++itemLHSIter; + } + } + } + if ((itemIter->m_labelsLHSList).empty()) { + itemIter = m_labelItems.erase(itemIter); + } else { + ++itemIter; + } + } + } + } +}; + +} // namespace Moses + diff --git a/moses/PP/TargetPreferencesPhraseProperty.h b/moses/PP/TargetPreferencesPhraseProperty.h new file mode 100644 index 000000000..84ef9b3c5 --- /dev/null +++ b/moses/PP/TargetPreferencesPhraseProperty.h @@ -0,0 +1,71 @@ + +#pragma once + +#include "moses/PP/PhraseProperty.h" +#include "util/exception.hh" +#include +#include + +namespace Moses +{ + +class TargetPreferencesPhrasePropertyItem +{ + friend class TargetPreferencesPhraseProperty; + +public: + TargetPreferencesPhrasePropertyItem() {}; + + float GetTargetPreferencesRHSCount() const { + return m_labelsRHSCount; + }; + + const std::list &GetTargetPreferencesRHS() const { + return m_labelsRHS; + }; + + const std::list< std::pair > &GetTargetPreferencesLHSList() const { + return m_labelsLHSList; + }; + +private: + float m_labelsRHSCount; + std::list m_labelsRHS; // should be of size nNTs-1 (empty if initial rule, i.e. no right-hand side non-terminals) + std::list< std::pair > m_labelsLHSList; // list of left-hand sides for this right-hand side, with counts +}; + + +class TargetPreferencesPhraseProperty : public PhraseProperty +{ +public: + TargetPreferencesPhraseProperty() {}; + + virtual void ProcessValue(const std::string &value); + + size_t GetNumberOfNonTerminals() const { + return m_nNTs; + } + + float GetTotalCount() const { + return m_totalCount; + } + + const std::list &GetTargetPreferencesItems() const { + return m_labelItems; + }; + + virtual const std::string *GetValueString() const { + UTIL_THROW2("TargetPreferencesPhraseProperty: value string not available in this phrase property"); + return NULL; + }; + +protected: + + size_t m_nNTs; + float m_totalCount; + + std::list m_labelItems; +}; + +} // namespace Moses +