Framework for additional phrase properties in decoding.

Derive your property class from PhraseProperty. Do any expensive string
processing of the property value in there, not in the feature
implementation, and provide methods to access the information in
appropriate data formats. The property value string will thus have to
be processed only once (on loading) rather than each time the respective
phrase is applied and your feature needs to access the property value.
This commit is contained in:
Matthias Huck 2014-05-19 21:54:08 +01:00
parent aac51cec89
commit 1740478238
10 changed files with 209 additions and 31 deletions

View File

@ -48,6 +48,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/FF/TreeStructureFeature.h"
#include "moses/PP/TreeStructurePhraseProperty.h"
#include "util/exception.hh"
using namespace std;
@ -410,17 +411,15 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica
if (hypo != NULL) {
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
const std::string key = "Tree";
std::string value;
bool hasProperty;
const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
currTarPhr.GetProperty(key, value, hasProperty);
boost::shared_ptr<PhraseProperty> property;
out << " ||| ";
if (hasProperty)
out << " " << value;
else
if (currTarPhr.GetProperty("Tree", property)) {
out << " " << property->GetValueString();
} else {
out << " " << "noTreeInfo";
}
out << std::endl;
}
@ -439,17 +438,15 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica
if (applied != NULL) {
OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
const std::string key = "Tree";
std::string value;
bool hasProperty;
const TargetPhrase &currTarPhr = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
currTarPhr.GetProperty(key, value, hasProperty);
boost::shared_ptr<PhraseProperty> property;
out << " ||| ";
if (hasProperty)
out << " " << value;
else
if (currTarPhr.GetProperty("Tree", property)) {
out << " " << property->GetValueString();
} else {
out << " " << "noTreeInfo";
}
out << std::endl;
}

View File

@ -6,6 +6,7 @@
#include "moses/TargetPhrase.h"
#include <boost/shared_ptr.hpp>
#include <vector>
#include "moses/PP/TreeStructurePhraseProperty.h"
using namespace std;
@ -270,10 +271,9 @@ FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo
, int featureID /* used to index the state in the previous hypotheses */
, ScoreComponentCollection* accumulator) const
{
std::string tree;
bool found = 0;
cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", tree, found);
if (found) {
boost::shared_ptr<PhraseProperty> property;
if (cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", property)) {
const std::string &tree = property->GetValueString();
TreePointer mytree (new InternalTree(tree));
if (m_labelset) {

View File

@ -65,6 +65,7 @@ lib moses :
FF/*.cpp
FF/OSM-Feature/*.cpp
FF/LexicalReordering/*.cpp
PP/*.cpp
: #exceptions
ThreadPool.cpp
SyntacticLanguageModel.cpp

91
moses/PP/Factory.cpp Normal file
View File

@ -0,0 +1,91 @@
#include "moses/PP/Factory.h"
#include "util/exception.hh"
#include <iostream>
#include <vector>
#include "moses/PP/TreeStructurePhraseProperty.h"
namespace Moses
{
class PhrasePropertyCreator
{
public:
virtual ~PhrasePropertyCreator() {}
virtual boost::shared_ptr<PhraseProperty> CreateProperty(const std::string &value) = 0;
protected:
template <class P> boost::shared_ptr<P> Create(P *property);
PhrasePropertyCreator() {}
};
template <class P> boost::shared_ptr<P> PhrasePropertyCreator::Create(P *property)
{
return boost::shared_ptr<P>(property);
}
namespace
{
template <class P> class DefaultPhrasePropertyCreator : public PhrasePropertyCreator
{
public:
boost::shared_ptr<PhraseProperty> CreateProperty(const std::string &value) {
P* property = new P(value);
property->ProcessValue();
return Create(property);
}
};
} // namespace
PhrasePropertyFactory::PhrasePropertyFactory()
{
// Feature with same key as class
#define MOSES_PNAME(name) Add(#name, new DefaultPhrasePropertyCreator< name >());
// Properties with different key than class.
#define MOSES_PNAME2(name, type) Add(name, new DefaultPhrasePropertyCreator< type >());
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
}
PhrasePropertyFactory::~PhrasePropertyFactory()
{
}
void PhrasePropertyFactory::Add(const std::string &name, PhrasePropertyCreator *creator)
{
std::pair<std::string, boost::shared_ptr<PhrasePropertyCreator> > to_ins(name, boost::shared_ptr<PhrasePropertyCreator>(creator));
UTIL_THROW_IF2(!m_registry.insert(to_ins).second, "Phrase property registered twice: " << name);
}
namespace
{
class UnknownPhrasePropertyException : public util::Exception {};
}
boost::shared_ptr<PhraseProperty> PhrasePropertyFactory::ProduceProperty(const std::string &key, const std::string &value) const
{
Registry::const_iterator i = m_registry.find(key);
UTIL_THROW_IF(i == m_registry.end(), UnknownPhrasePropertyException, "Phrase property is not registered: " << key);
return i->second->CreateProperty(value);
}
void PhrasePropertyFactory::PrintPP() const
{
std::cerr << "Registered phrase properties:" << std::endl;
Registry::const_iterator iter;
for (iter = m_registry.begin(); iter != m_registry.end(); ++iter) {
const std::string &ppName = iter->first;
std::cerr << ppName << " ";
}
std::cerr << std::endl;
}
} // namespace Moses

33
moses/PP/Factory.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include "moses/PP/PhraseProperty.h"
#include <string>
#include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp>
namespace Moses
{
class PhrasePropertyCreator;
class PhrasePropertyFactory
{
public:
PhrasePropertyFactory();
~PhrasePropertyFactory();
boost::shared_ptr<PhraseProperty> ProduceProperty(const std::string &key, const std::string &value) const;
void PrintPP() const;
private:
void Add(const std::string &name, PhrasePropertyCreator *creator);
typedef boost::unordered_map<std::string, boost::shared_ptr<PhrasePropertyCreator> > Registry;
Registry m_registry;
};
} // namespace Moses

27
moses/PP/PhraseProperty.h Normal file
View File

@ -0,0 +1,27 @@
#pragma once
#include <string>
#include <iostream>
namespace Moses
{
/** base class for all phrase properties.
*/
class PhraseProperty
{
public:
PhraseProperty(const std::string &value) : m_value(value) {};
virtual void ProcessValue() {};
const std::string &GetValueString() { return m_value; };
protected:
const std::string m_value;
};
} // namespace Moses

View File

@ -0,0 +1,18 @@
#pragma once
#include "moses/PP/PhraseProperty.h"
#include <string>
namespace Moses
{
class TreeStructurePhraseProperty : public PhraseProperty
{
public:
TreeStructurePhraseProperty(const std::string &value) : PhraseProperty(value) {};
};
} // namespace Moses

View File

@ -42,6 +42,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "SentenceStats.h"
#include "ScoreComponentCollection.h"
#include "moses/FF/Factory.h"
#include "moses/PP/Factory.h"
namespace Moses
{
@ -200,6 +201,7 @@ protected:
bool m_adjacentOnly;
FeatureRegistry m_registry;
PhrasePropertyFactory m_phrasePropertyFactory;
StaticData();
@ -734,6 +736,9 @@ public:
const FeatureRegistry &GetFeatureRegistry() const
{ return m_registry; }
const PhrasePropertyFactory &GetPhrasePropertyFactory() const
{ return m_phrasePropertyFactory; }
/** check whether we should be using the old code to support binary phrase-table.
** eventually, we'll stop support the binary phrase-table and delete this legacy code
**/

View File

@ -156,7 +156,6 @@ void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
void TargetPhrase::SetXMLScore(float score)
{
const StaticData &staticData = StaticData::Instance();
const FeatureFunction* prod = PhraseDictionary::GetColl()[0];
size_t numScores = prod->GetNumScoreComponents();
vector <float> scoreVector(numScores,score/numScores);
@ -240,16 +239,22 @@ void TargetPhrase::SetProperties(const StringPiece &str)
}
}
void TargetPhrase::GetProperty(const std::string &key, std::string &value, bool &found) const
void TargetPhrase::SetProperty(const std::string &key, const std::string &value)
{
std::map<std::string, std::string>::const_iterator iter;
const StaticData &staticData = StaticData::Instance();
const PhrasePropertyFactory& phrasePropertyFactory = staticData.GetPhrasePropertyFactory();
m_properties[key] = phrasePropertyFactory.ProduceProperty(key,value);
}
bool TargetPhrase::GetProperty(const std::string &key, boost::shared_ptr<PhraseProperty> &value) const
{
std::map<std::string, boost::shared_ptr<PhraseProperty> >::const_iterator iter;
iter = m_properties.find(key);
if (iter == m_properties.end()) {
found = false;
} else {
found = true;
if (iter != m_properties.end()) {
value = iter->second;
return true;
}
return false;
}
void TargetPhrase::SetRuleSource(const Phrase &ruleSource) const

View File

@ -28,9 +28,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Phrase.h"
#include "ScoreComponentCollection.h"
#include "AlignmentInfo.h"
#include "moses/PP/PhraseProperty.h"
#include "util/string_piece.hh"
#include <boost/shared_ptr.hpp>
#ifdef HAVE_PROTOBUF
#include "rule.pb.h"
#endif
@ -55,7 +57,8 @@ private:
const Word *m_lhsTarget;
mutable Phrase *m_ruleSource; // to be set by the feature function that needs it.
std::map<std::string, std::string> m_properties;
std::map<std::string, boost::shared_ptr<PhraseProperty> > m_properties;
public:
TargetPhrase();
TargetPhrase(const TargetPhrase &copy);
@ -133,10 +136,8 @@ public:
void SetRuleSource(const Phrase &ruleSource) const;
void SetProperties(const StringPiece &str);
void SetProperty(const std::string &key, const std::string &value) {
m_properties[key] = value;
}
void GetProperty(const std::string &key, std::string &value, bool &found) const;
void SetProperty(const std::string &key, const std::string &value);
bool GetProperty(const std::string &key, boost::shared_ptr<PhraseProperty> &value) const;
void Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec);