2012-11-03 03:30:51 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2012- University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This contains extra features that can be added to the scorer. To add a new feature:
|
|
|
|
* 1. Implement a subclass of ScoreFeature
|
2013-05-29 21:16:15 +04:00
|
|
|
* 2. Updated ScoreFeatureManager.configure() to configure your feature, and usage() to
|
2012-11-03 03:30:51 +04:00
|
|
|
* display usage info.
|
|
|
|
* 3. Write unit tests (see ScoreFeatureTest.cpp) and regression tests
|
|
|
|
**/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <map>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
|
|
|
|
#include "util/exception.hh"
|
|
|
|
|
2014-01-29 22:37:42 +04:00
|
|
|
#include "ExtractionPhrasePair.h"
|
2012-11-03 03:30:51 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace MosesTraining
|
2012-11-03 03:30:51 +04:00
|
|
|
{
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
struct MaybeLog {
|
2012-11-03 03:30:51 +04:00
|
|
|
MaybeLog(bool useLog, float negativeLog):
|
|
|
|
m_useLog(useLog), m_negativeLog(negativeLog) {}
|
2013-05-29 21:16:15 +04:00
|
|
|
|
|
|
|
inline float operator() (float a) const {
|
|
|
|
return m_useLog ? m_negativeLog*log(a) : a;
|
|
|
|
}
|
2012-11-03 03:30:51 +04:00
|
|
|
|
|
|
|
float m_useLog;
|
|
|
|
float m_negativeLog;
|
|
|
|
};
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
class ScoreFeatureArgumentException : public util::Exception
|
2012-11-03 03:30:51 +04:00
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
ScoreFeatureArgumentException() throw() {
|
|
|
|
*this << "Unable to configure features: ";
|
|
|
|
}
|
|
|
|
~ScoreFeatureArgumentException() throw() {}
|
2012-11-03 03:30:51 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
/** Passed to each feature to be used to calculate its values */
|
2013-05-29 21:16:15 +04:00
|
|
|
struct ScoreFeatureContext {
|
2012-11-03 03:30:51 +04:00
|
|
|
ScoreFeatureContext(
|
2014-01-29 22:37:42 +04:00
|
|
|
const ExtractionPhrasePair &thePhrasePair,
|
2012-11-03 03:30:51 +04:00
|
|
|
const MaybeLog& theMaybeLog
|
2013-05-29 21:16:15 +04:00
|
|
|
) :
|
2012-11-03 03:30:51 +04:00
|
|
|
phrasePair(thePhrasePair),
|
2013-06-10 21:11:55 +04:00
|
|
|
maybeLog(theMaybeLog) {
|
|
|
|
}
|
2012-11-03 03:30:51 +04:00
|
|
|
|
2014-01-29 22:37:42 +04:00
|
|
|
const ExtractionPhrasePair &phrasePair;
|
2012-11-03 03:30:51 +04:00
|
|
|
MaybeLog maybeLog;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Abstract base class for extra features that can be added to the phrase table
|
|
|
|
* during scoring.
|
|
|
|
**/
|
|
|
|
class ScoreFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
2014-01-29 22:37:42 +04:00
|
|
|
|
|
|
|
/** Some features might need to store properties in ExtractionPhrasePair,
|
2014-05-19 17:35:08 +04:00
|
|
|
* e.g. to pass along external information loaded by a feature
|
2014-01-29 22:37:42 +04:00
|
|
|
* which may distinguish several phrase occurrences based on sentence ID */
|
2014-05-19 17:35:08 +04:00
|
|
|
virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
|
|
|
|
float count,
|
2014-01-29 22:37:42 +04:00
|
|
|
int sentenceId) const {};
|
|
|
|
|
2014-06-11 22:27:18 +04:00
|
|
|
/** Add the values for this score feature. */
|
2013-05-29 21:16:15 +04:00
|
|
|
virtual void add(const ScoreFeatureContext& context,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const = 0;
|
2012-11-03 03:30:51 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
virtual ~ScoreFeature() {}
|
2012-11-03 03:30:51 +04:00
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef boost::shared_ptr<ScoreFeature> ScoreFeaturePtr;
|
|
|
|
class ScoreFeatureManager
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
ScoreFeatureManager():
|
|
|
|
m_includeSentenceId(false) {}
|
|
|
|
|
|
|
|
/** To be appended to the score usage message */
|
|
|
|
const std::string& usage() const;
|
|
|
|
|
|
|
|
/** Pass the unused command-line arguments to configure the extra features */
|
|
|
|
void configure(const std::vector<std::string> args);
|
|
|
|
|
2014-01-29 22:37:42 +04:00
|
|
|
/** Some features might need to store properties in ExtractionPhrasePair,
|
2014-05-19 17:35:08 +04:00
|
|
|
* e.g. to pass along external information loaded by a feature
|
2014-01-29 22:37:42 +04:00
|
|
|
* which may distinguish several phrase occurrences based on sentence ID */
|
2014-05-19 17:35:08 +04:00
|
|
|
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
|
|
|
|
float count,
|
2014-01-29 22:37:42 +04:00
|
|
|
int sentenceId) const;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
/** Add all the features */
|
|
|
|
void addFeatures(const ScoreFeatureContext& context,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
|
|
|
|
|
|
|
const std::vector<ScoreFeaturePtr>& getFeatures() const {
|
|
|
|
return m_features;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Do we need to include sentence ids in phrase pairs? */
|
|
|
|
bool includeSentenceId() const {
|
|
|
|
return m_includeSentenceId;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::vector<ScoreFeaturePtr> m_features;
|
|
|
|
bool m_includeSentenceId;
|
2012-11-03 03:30:51 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|