2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
2010-09-17 18:25:08 +04:00
|
|
|
#include <vector>
|
2015-02-05 19:23:47 +03:00
|
|
|
#include <boost/algorithm/string/predicate.hpp>
|
2015-04-26 18:46:36 +03:00
|
|
|
#include <boost/foreach.hpp>
|
|
|
|
#include <boost/format.hpp>
|
2013-09-10 17:36:21 +04:00
|
|
|
#include "util/exception.hh"
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "ScoreComponentCollection.h"
|
|
|
|
#include "StaticData.h"
|
2014-12-30 16:23:30 +03:00
|
|
|
#include "moses/FF/StatelessFeatureFunction.h"
|
|
|
|
#include "moses/FF/StatefulFeatureFunction.h"
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2010-10-07 02:06:49 +04:00
|
|
|
using namespace std;
|
2015-02-05 19:23:47 +03:00
|
|
|
using namespace boost::algorithm;
|
2010-10-07 02:06:49 +04:00
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-09-08 18:43:18 +04:00
|
|
|
void ScorePair::PlusEquals(const ScorePair &other)
|
|
|
|
{
|
2013-09-27 12:35:24 +04:00
|
|
|
PlusEquals(other.denseScores);
|
|
|
|
std::map<StringPiece, float>::const_iterator iter;
|
|
|
|
for (iter = other.sparseScores.begin(); iter != other.sparseScores.end(); ++iter) {
|
|
|
|
PlusEquals(iter->first, iter->second);
|
|
|
|
}
|
2013-09-08 18:43:18 +04:00
|
|
|
}
|
|
|
|
|
2013-09-08 19:02:59 +04:00
|
|
|
void ScorePair::PlusEquals(const StringPiece &key, float value)
|
2013-09-08 18:43:18 +04:00
|
|
|
{
|
2013-09-27 12:35:24 +04:00
|
|
|
std::map<StringPiece, float>::iterator iter;
|
|
|
|
iter = sparseScores.find(key);
|
|
|
|
if (iter == sparseScores.end()) {
|
|
|
|
sparseScores[key] = value;
|
|
|
|
} else {
|
|
|
|
float &existingval = iter->second;
|
|
|
|
existingval += value;
|
|
|
|
}
|
2013-09-08 18:43:18 +04:00
|
|
|
}
|
2011-11-09 21:16:02 +04:00
|
|
|
|
2013-09-10 17:36:21 +04:00
|
|
|
std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
|
|
|
|
{
|
2013-09-27 12:35:24 +04:00
|
|
|
for (size_t i = 0; i < rhs.denseScores.size(); ++i) {
|
|
|
|
os << rhs.denseScores[i] << ",";
|
|
|
|
}
|
2013-09-10 17:36:21 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
std::map<StringPiece, float>::const_iterator iter;
|
|
|
|
for (iter = rhs.sparseScores.begin(); iter != rhs.sparseScores.end(); ++iter) {
|
|
|
|
os << iter->first << "=" << iter->second << ",";
|
|
|
|
}
|
2013-09-10 17:36:21 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
return os;
|
2013-09-10 17:36:21 +04:00
|
|
|
}
|
|
|
|
|
2015-04-26 18:46:36 +03:00
|
|
|
//ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
|
2011-11-09 21:16:02 +04:00
|
|
|
size_t ScoreComponentCollection::s_denseVectorSize = 0;
|
|
|
|
|
2013-09-25 03:13:49 +04:00
|
|
|
ScoreComponentCollection::
|
2013-09-27 12:35:24 +04:00
|
|
|
ScoreComponentCollection()
|
2013-09-25 03:13:49 +04:00
|
|
|
: m_scores(s_denseVectorSize)
|
2008-06-11 14:52:57 +04:00
|
|
|
{}
|
|
|
|
|
2010-09-28 19:13:50 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
void
|
2013-09-25 03:13:49 +04:00
|
|
|
ScoreComponentCollection::
|
2015-04-26 18:46:36 +03:00
|
|
|
RegisterScoreProducer(FeatureFunction* scoreProducer)
|
2011-11-09 21:16:02 +04:00
|
|
|
{
|
|
|
|
size_t start = s_denseVectorSize;
|
2015-04-26 18:46:36 +03:00
|
|
|
s_denseVectorSize = scoreProducer->SetIndex(s_denseVectorSize);
|
2013-09-27 12:35:24 +04:00
|
|
|
VERBOSE(1, "FeatureFunction: "
|
|
|
|
<< scoreProducer->GetScoreProducerDescription()
|
2015-04-30 08:05:11 +03:00
|
|
|
<< " start: " << start
|
2015-05-02 13:45:24 +03:00
|
|
|
<< " end: " << (s_denseVectorSize-1) << endl);
|
2011-11-09 21:16:02 +04:00
|
|
|
}
|
|
|
|
|
2011-11-16 15:49:31 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
float
|
2013-09-25 03:13:49 +04:00
|
|
|
ScoreComponentCollection::
|
|
|
|
GetWeightedScore() const
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
return m_scores.inner_product(StaticData::Instance().GetAllWeights().m_scores);
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
2010-09-17 19:20:55 +04:00
|
|
|
void ScoreComponentCollection::MultiplyEquals(float scalar)
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
m_scores *= scalar;
|
2010-09-17 19:20:55 +04:00
|
|
|
}
|
2010-10-28 16:41:33 +04:00
|
|
|
|
2011-11-24 23:27:12 +04:00
|
|
|
// Multiply all weights of this sparse producer by a given scalar
|
2013-05-29 21:16:15 +04:00
|
|
|
void ScoreComponentCollection::MultiplyEquals(const FeatureFunction* sp, float scalar)
|
|
|
|
{
|
2012-01-09 16:04:18 +04:00
|
|
|
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
|
2011-11-24 23:27:12 +04:00
|
|
|
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
|
|
|
|
std::stringstream name;
|
|
|
|
name << i->first;
|
2015-02-05 19:23:47 +03:00
|
|
|
if (starts_with(name.str(), prefix))
|
2013-05-29 21:16:15 +04:00
|
|
|
m_scores[i->first] = i->second * scalar;
|
2011-11-24 23:27:12 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-03-01 16:22:12 +04:00
|
|
|
// Count weights belonging to this sparse producer
|
2013-05-29 21:16:15 +04:00
|
|
|
size_t ScoreComponentCollection::GetNumberWeights(const FeatureFunction* sp)
|
|
|
|
{
|
2012-03-01 16:22:12 +04:00
|
|
|
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
|
|
|
|
size_t weights = 0;
|
|
|
|
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
|
|
|
|
std::stringstream name;
|
|
|
|
name << i->first;
|
2015-02-05 19:23:47 +03:00
|
|
|
if (starts_with(name.str(), prefix))
|
2013-05-29 21:16:15 +04:00
|
|
|
weights++;
|
2012-03-01 16:22:12 +04:00
|
|
|
}
|
|
|
|
return weights;
|
|
|
|
}
|
|
|
|
|
2010-10-28 16:41:33 +04:00
|
|
|
void ScoreComponentCollection::DivideEquals(float scalar)
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
m_scores /= scalar;
|
2010-10-28 16:41:33 +04:00
|
|
|
}
|
2011-08-06 18:10:43 +04:00
|
|
|
|
2012-03-14 21:59:59 +04:00
|
|
|
void ScoreComponentCollection::CoreDivideEquals(float scalar)
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
m_scores.coreDivideEquals(scalar);
|
2012-03-14 21:59:59 +04:00
|
|
|
}
|
|
|
|
|
2012-03-13 20:34:48 +04:00
|
|
|
void ScoreComponentCollection::DivideEquals(const ScoreComponentCollection& rhs)
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
m_scores.divideEquals(rhs.m_scores);
|
2012-03-13 20:34:48 +04:00
|
|
|
}
|
|
|
|
|
2010-09-17 19:33:21 +04:00
|
|
|
void ScoreComponentCollection::MultiplyEquals(const ScoreComponentCollection& rhs)
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
m_scores *= rhs.m_scores;
|
2010-09-17 19:33:21 +04:00
|
|
|
}
|
|
|
|
|
2012-05-18 21:58:42 +04:00
|
|
|
void ScoreComponentCollection::MultiplyEqualsBackoff(const ScoreComponentCollection& rhs, float backoff)
|
2012-05-15 20:49:54 +04:00
|
|
|
{
|
2012-05-18 21:58:42 +04:00
|
|
|
m_scores.multiplyEqualsBackoff(rhs.m_scores, backoff);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ScoreComponentCollection::MultiplyEquals(float core_r0, float sparse_r0)
|
|
|
|
{
|
|
|
|
m_scores.multiplyEquals(core_r0, sparse_r0);
|
2012-05-15 20:49:54 +04:00
|
|
|
}
|
|
|
|
|
2010-09-16 11:06:42 +04:00
|
|
|
std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs)
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
os << rhs.m_scores;
|
|
|
|
return os;
|
2010-09-16 11:06:42 +04:00
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
void ScoreComponentCollection::L1Normalise()
|
|
|
|
{
|
2012-04-26 02:48:12 +04:00
|
|
|
m_scores /= m_scores.l1norm_coreFeatures();
|
2010-09-28 19:13:50 +04:00
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
float ScoreComponentCollection::GetL1Norm() const
|
|
|
|
{
|
2011-05-31 19:39:48 +04:00
|
|
|
return m_scores.l1norm();
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
float ScoreComponentCollection::GetL2Norm() const
|
|
|
|
{
|
2011-04-23 16:13:43 +04:00
|
|
|
return m_scores.l2norm();
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
float ScoreComponentCollection::GetLInfNorm() const
|
|
|
|
{
|
2011-12-23 14:55:54 +04:00
|
|
|
return m_scores.linfnorm();
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
size_t ScoreComponentCollection::L1Regularize(float lambda)
|
|
|
|
{
|
2012-07-26 20:32:50 +04:00
|
|
|
return m_scores.l1regularize(lambda);
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void ScoreComponentCollection::L2Regularize(float lambda)
|
|
|
|
{
|
2012-07-26 20:32:50 +04:00
|
|
|
m_scores.l2regularize(lambda);
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
size_t ScoreComponentCollection::SparseL1Regularize(float lambda)
|
|
|
|
{
|
2012-06-01 04:49:42 +04:00
|
|
|
return m_scores.sparseL1regularize(lambda);
|
2012-05-10 21:54:24 +04:00
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void ScoreComponentCollection::SparseL2Regularize(float lambda)
|
|
|
|
{
|
2012-06-01 04:49:42 +04:00
|
|
|
m_scores.sparseL2regularize(lambda);
|
2012-05-10 21:54:24 +04:00
|
|
|
}
|
|
|
|
|
2014-08-06 14:38:51 +04:00
|
|
|
void ScoreComponentCollection::Save(ostream& out, bool multiline) const
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2014-08-06 14:38:51 +04:00
|
|
|
string sep = " ";
|
|
|
|
string linesep = "\n";
|
|
|
|
if (!multiline) {
|
2015-01-14 14:07:42 +03:00
|
|
|
sep = "=";
|
|
|
|
linesep = " ";
|
2014-08-06 14:38:51 +04:00
|
|
|
}
|
2011-11-16 13:13:17 +04:00
|
|
|
|
2015-04-30 08:05:11 +03:00
|
|
|
std::vector<FeatureFunction*> const& all_ff
|
2015-05-02 13:45:24 +03:00
|
|
|
= FeatureFunction::GetFeatureFunctions();
|
|
|
|
BOOST_FOREACH(FeatureFunction const* ff, all_ff) {
|
|
|
|
string name = ff->GetScoreProducerDescription();
|
|
|
|
size_t i = ff->GetIndex();
|
|
|
|
if (ff->GetNumScoreComponents() == 1)
|
|
|
|
out << name << sep << m_scores[i] << linesep;
|
|
|
|
else {
|
|
|
|
size_t stop = i + ff->GetNumScoreComponents();
|
|
|
|
boost::format fmt("%s_%d");
|
|
|
|
for (size_t k = 1; i < stop; ++i, ++k)
|
|
|
|
out << fmt % name % k << sep << m_scores[i] << linesep;
|
2015-04-26 18:46:36 +03:00
|
|
|
}
|
2015-05-02 13:45:24 +03:00
|
|
|
}
|
2011-11-16 13:13:17 +04:00
|
|
|
// write sparse features
|
2014-08-08 19:05:50 +04:00
|
|
|
m_scores.write(out,sep,linesep);
|
2011-11-16 15:49:31 +04:00
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void ScoreComponentCollection::Save(const string& filename) const
|
|
|
|
{
|
2011-11-16 15:49:31 +04:00
|
|
|
ofstream out(filename.c_str());
|
|
|
|
if (!out) {
|
|
|
|
ostringstream msg;
|
|
|
|
msg << "Unable to open " << filename;
|
|
|
|
throw runtime_error(msg.str());
|
|
|
|
}
|
|
|
|
Save(out);
|
2011-11-16 13:13:17 +04:00
|
|
|
out.close();
|
|
|
|
}
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
void
|
2013-09-25 03:13:49 +04:00
|
|
|
ScoreComponentCollection::
|
2014-06-06 23:35:58 +04:00
|
|
|
Assign(const FeatureFunction* sp, const string &line)
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2011-09-20 19:32:26 +04:00
|
|
|
istringstream istr(line);
|
|
|
|
while(istr) {
|
|
|
|
string namestring;
|
|
|
|
FValue value;
|
|
|
|
istr >> namestring;
|
|
|
|
if (!istr) break;
|
|
|
|
istr >> value;
|
|
|
|
FName fname(sp->GetScoreProducerDescription(), namestring);
|
|
|
|
m_scores[fname] = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
void
|
2013-09-25 03:13:49 +04:00
|
|
|
ScoreComponentCollection::
|
2013-09-27 12:35:24 +04:00
|
|
|
Assign(const FeatureFunction* sp, const std::vector<float>& scores)
|
2013-09-25 03:13:49 +04:00
|
|
|
{
|
2015-04-26 18:46:36 +03:00
|
|
|
size_t numScores = sp->GetNumScoreComponents();
|
|
|
|
size_t offset = sp->GetIndex();
|
2013-09-10 17:36:21 +04:00
|
|
|
|
|
|
|
if (scores.size() != numScores) {
|
2013-09-27 12:35:24 +04:00
|
|
|
UTIL_THROW(util::Exception, "Feature function "
|
|
|
|
<< sp->GetScoreProducerDescription() << " specified "
|
|
|
|
<< numScores << " dense scores or weights. Actually has "
|
|
|
|
<< scores.size());
|
2013-09-10 17:36:21 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < scores.size(); ++i) {
|
2015-04-26 18:46:36 +03:00
|
|
|
m_scores[i + offset] = scores[i];
|
2013-09-10 17:36:21 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-07-31 15:32:53 +04:00
|
|
|
void ScoreComponentCollection::InvertDenseFeatures(const FeatureFunction* sp)
|
|
|
|
{
|
|
|
|
|
|
|
|
Scores old_scores = GetScoresForProducer(sp);
|
|
|
|
Scores new_scores(old_scores.size());
|
|
|
|
|
|
|
|
for (size_t i = 0; i != old_scores.size(); ++i) {
|
|
|
|
new_scores[i] = -old_scores[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
Assign(sp, new_scores);
|
|
|
|
}
|
|
|
|
|
2013-05-13 16:19:25 +04:00
|
|
|
void ScoreComponentCollection::ZeroDenseFeatures(const FeatureFunction* sp)
|
|
|
|
{
|
|
|
|
size_t numScores = sp->GetNumScoreComponents();
|
|
|
|
Scores vec(numScores, 0);
|
|
|
|
|
|
|
|
Assign(sp, vec);
|
|
|
|
}
|
|
|
|
|
2013-05-15 14:37:21 +04:00
|
|
|
//! get subset of scores that belong to a certain sparse ScoreProducer
|
|
|
|
FVector ScoreComponentCollection::GetVectorForProducer(const FeatureFunction* sp) const
|
|
|
|
{
|
|
|
|
FVector fv(s_denseVectorSize);
|
|
|
|
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
|
|
|
|
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
|
|
|
|
std::stringstream name;
|
|
|
|
name << i->first;
|
2015-02-05 19:23:47 +03:00
|
|
|
if (starts_with(name.str(), prefix))
|
2013-05-15 14:37:21 +04:00
|
|
|
fv[i->first] = i->second;
|
|
|
|
}
|
|
|
|
return fv;
|
|
|
|
}
|
|
|
|
|
2013-09-08 17:57:31 +04:00
|
|
|
void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const ScorePair &scorePair)
|
|
|
|
{
|
2013-09-27 12:35:24 +04:00
|
|
|
PlusEquals(sp, scorePair.denseScores);
|
2013-09-08 17:57:31 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
std::map<StringPiece, float>::const_iterator iter;
|
|
|
|
for (iter = scorePair.sparseScores.begin(); iter != scorePair.sparseScores.end(); ++iter) {
|
|
|
|
const StringPiece &key = iter->first;
|
|
|
|
float value = iter->second;
|
|
|
|
PlusEquals(sp, key, value);
|
|
|
|
}
|
2013-09-08 17:57:31 +04:00
|
|
|
}
|
|
|
|
|
2014-12-30 16:23:30 +03:00
|
|
|
void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const
|
|
|
|
{
|
|
|
|
std::string lastName = "";
|
|
|
|
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
|
|
|
for( size_t i=0; i<sff.size(); i++ ) {
|
|
|
|
const StatefulFeatureFunction *ff = sff[i];
|
2015-01-08 14:49:53 +03:00
|
|
|
if (ff->IsTuneable()) {
|
2014-12-30 16:23:30 +03:00
|
|
|
OutputFeatureScores( out, ff, lastName );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
|
|
|
for( size_t i=0; i<slf.size(); i++ ) {
|
|
|
|
const StatelessFeatureFunction *ff = slf[i];
|
|
|
|
if (ff->IsTuneable()) {
|
|
|
|
OutputFeatureScores( out, ff, lastName );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
|
2015-01-14 14:07:42 +03:00
|
|
|
, const FeatureFunction *ff
|
|
|
|
, std::string &lastName ) const
|
2014-12-30 16:23:30 +03:00
|
|
|
{
|
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
bool labeledOutput = staticData.IsLabeledNBestList();
|
|
|
|
|
|
|
|
// regular features (not sparse)
|
2015-02-09 16:52:05 +03:00
|
|
|
if (ff->HasTuneableComponents()) {
|
2014-12-30 16:23:30 +03:00
|
|
|
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
|
|
|
|
lastName = ff->GetScoreProducerDescription();
|
|
|
|
out << " " << lastName << "=";
|
|
|
|
}
|
|
|
|
vector<float> scores = GetScoresForProducer( ff );
|
|
|
|
for (size_t j = 0; j<scores.size(); ++j) {
|
2015-02-09 16:52:05 +03:00
|
|
|
if (ff->IsTuneableComponent(j)) {
|
|
|
|
out << " " << scores[j];
|
|
|
|
}
|
2014-12-30 16:23:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// sparse features
|
|
|
|
const FVector scores = GetVectorForProducer( ff );
|
|
|
|
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
|
|
|
|
out << " " << i->first << "= " << i->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|