TargetPreferencesPhraseProperty

This commit is contained in:
Matthias Huck 2016-01-11 20:14:28 +00:00
parent 885b8b33a1
commit a5a4401fe9
3 changed files with 196 additions and 0 deletions

View File

@ -6,6 +6,7 @@
#include "moses/PP/CountsPhraseProperty.h"
#include "moses/PP/SourceLabelsPhraseProperty.h"
#include "moses/PP/TargetPreferencesPhraseProperty.h"
#include "moses/PP/TreeStructurePhraseProperty.h"
#include "moses/PP/SpanLengthPhraseProperty.h"
#include "moses/PP/NonTermContextProperty.h"
@ -57,6 +58,7 @@ PhrasePropertyFactory::PhrasePropertyFactory()
MOSES_PNAME2("Counts", CountsPhraseProperty);
MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
MOSES_PNAME2("TargetPreferences", TargetPreferencesPhraseProperty);
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
MOSES_PNAME2("NonTermContext", NonTermContextProperty);

View File

@ -0,0 +1,123 @@
#include "moses/PP/TargetPreferencesPhraseProperty.h"
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <sstream>
#include <string>
#include <queue>
#include <assert.h>
#include <limits>
namespace Moses
{
void TargetPreferencesPhraseProperty::ProcessValue(const std::string &value)
{
std::istringstream tokenizer(value);
if (! (tokenizer >> m_nNTs)) { // first token: number of non-terminals (incl. left-hand side)
UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of non-terminals. Flawed property?");
}
assert( m_nNTs > 0 );
if (! (tokenizer >> m_totalCount)) { // second token: overall rule count
UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read overall rule count. Flawed property?");
}
assert( m_totalCount > 0.0 );
// read labelled rule items
std::priority_queue<float> ruleLabelledCountsPQ;
while (tokenizer.peek() != EOF) {
try {
TargetPreferencesPhrasePropertyItem item;
size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
if (m_nNTs == 1) {
item.m_labelsRHSCount = m_totalCount;
} else { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
for (size_t i=0; i<m_nNTs-1; ++i) { // RHS non-terminal labels
size_t labelRHS;
if (! (tokenizer >> labelRHS) ) { // RHS non-terminal label
UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side label index. Flawed property?");
}
item.m_labelsRHS.push_back(labelRHS);
}
if (! (tokenizer >> item.m_labelsRHSCount)) {
UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side count. Flawed property?");
}
if (! (tokenizer >> numberOfLHSsGivenRHS)) {
UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of left-hand sides. Flawed property?");
}
}
for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS non-terminal labels seen with this RHS
size_t labelLHS;
if (! (tokenizer >> labelLHS)) { // LHS non-terminal label
UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read left-hand side label index. Flawed property?");
}
float ruleLabelledCount;
if (! (tokenizer >> ruleLabelledCount)) {
UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read count. Flawed property?");
}
item.m_labelsLHSList.push_back( std::make_pair(labelLHS,ruleLabelledCount) );
ruleLabelledCountsPQ.push(ruleLabelledCount);
}
m_labelItems.push_back(item);
} catch (const std::exception &e) {
UTIL_THROW2("TargetPreferencesPhraseProperty: Read error. Flawed property?");
}
}
// keep only top N label vectors
const size_t N=50;
if (ruleLabelledCountsPQ.size() > N) {
float topNRuleLabelledCount = std::numeric_limits<int>::max();
for (size_t i=0; !ruleLabelledCountsPQ.empty() && i<N; ++i) {
topNRuleLabelledCount = ruleLabelledCountsPQ.top();
ruleLabelledCountsPQ.pop();
}
size_t nKept=0;
std::list<TargetPreferencesPhrasePropertyItem>::iterator itemIter=m_labelItems.begin();
while (itemIter!=m_labelItems.end()) {
if (itemIter->m_labelsRHSCount < topNRuleLabelledCount) {
itemIter = m_labelItems.erase(itemIter);
} else {
std::list< std::pair<size_t,float> >::iterator itemLHSIter=(itemIter->m_labelsLHSList).begin();
while (itemLHSIter!=(itemIter->m_labelsLHSList).end()) {
if (itemLHSIter->second < topNRuleLabelledCount) {
itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter);
} else {
if (nKept >= N) {
itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter,(itemIter->m_labelsLHSList).end());
} else {
++nKept;
++itemLHSIter;
}
}
}
if ((itemIter->m_labelsLHSList).empty()) {
itemIter = m_labelItems.erase(itemIter);
} else {
++itemIter;
}
}
}
}
};
} // namespace Moses

View File

@ -0,0 +1,71 @@
#pragma once
#include "moses/PP/PhraseProperty.h"
#include "util/exception.hh"
#include <string>
#include <list>
namespace Moses
{
class TargetPreferencesPhrasePropertyItem
{
friend class TargetPreferencesPhraseProperty;
public:
TargetPreferencesPhrasePropertyItem() {};
float GetTargetPreferencesRHSCount() const {
return m_labelsRHSCount;
};
const std::list<size_t> &GetTargetPreferencesRHS() const {
return m_labelsRHS;
};
const std::list< std::pair<size_t,float> > &GetTargetPreferencesLHSList() const {
return m_labelsLHSList;
};
private:
float m_labelsRHSCount;
std::list<size_t> m_labelsRHS; // should be of size nNTs-1 (empty if initial rule, i.e. no right-hand side non-terminals)
std::list< std::pair<size_t,float> > m_labelsLHSList; // list of left-hand sides for this right-hand side, with counts
};
class TargetPreferencesPhraseProperty : public PhraseProperty
{
public:
TargetPreferencesPhraseProperty() {};
virtual void ProcessValue(const std::string &value);
size_t GetNumberOfNonTerminals() const {
return m_nNTs;
}
float GetTotalCount() const {
return m_totalCount;
}
const std::list<TargetPreferencesPhrasePropertyItem> &GetTargetPreferencesItems() const {
return m_labelItems;
};
virtual const std::string *GetValueString() const {
UTIL_THROW2("TargetPreferencesPhraseProperty: value string not available in this phrase property");
return NULL;
};
protected:
size_t m_nNTs;
float m_totalCount;
std::list<TargetPreferencesPhrasePropertyItem> m_labelItems;
};
} // namespace Moses