mosesdecoder/moses/ScoreComponentCollection.cpp

301 lines
7.6 KiB
C++
Raw Normal View History

// $Id$
#include <vector>
2013-09-10 17:36:21 +04:00
#include "util/exception.hh"
#include "ScoreComponentCollection.h"
#include "StaticData.h"
using namespace std;
namespace Moses
{
2013-09-08 18:43:18 +04:00
void ScorePair::PlusEquals(const ScorePair &other)
{
2013-09-27 12:35:24 +04:00
PlusEquals(other.denseScores);
std::map<StringPiece, float>::const_iterator iter;
for (iter = other.sparseScores.begin(); iter != other.sparseScores.end(); ++iter) {
PlusEquals(iter->first, iter->second);
}
2013-09-08 18:43:18 +04:00
}
2013-09-08 19:02:59 +04:00
void ScorePair::PlusEquals(const StringPiece &key, float value)
2013-09-08 18:43:18 +04:00
{
2013-09-27 12:35:24 +04:00
std::map<StringPiece, float>::iterator iter;
iter = sparseScores.find(key);
if (iter == sparseScores.end()) {
sparseScores[key] = value;
} else {
float &existingval = iter->second;
existingval += value;
}
2013-09-08 18:43:18 +04:00
}
2011-11-09 21:16:02 +04:00
2013-09-10 17:36:21 +04:00
std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
{
2013-09-27 12:35:24 +04:00
for (size_t i = 0; i < rhs.denseScores.size(); ++i) {
os << rhs.denseScores[i] << ",";
}
2013-09-10 17:36:21 +04:00
2013-09-27 12:35:24 +04:00
std::map<StringPiece, float>::const_iterator iter;
for (iter = rhs.sparseScores.begin(); iter != rhs.sparseScores.end(); ++iter) {
os << iter->first << "=" << iter->second << ",";
}
2013-09-10 17:36:21 +04:00
2013-09-27 12:35:24 +04:00
return os;
2013-09-10 17:36:21 +04:00
}
2011-11-09 21:16:02 +04:00
ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
size_t ScoreComponentCollection::s_denseVectorSize = 0;
2013-09-25 03:13:49 +04:00
ScoreComponentCollection::
2013-09-27 12:35:24 +04:00
ScoreComponentCollection()
2013-09-25 03:13:49 +04:00
: m_scores(s_denseVectorSize)
{}
2013-09-27 12:35:24 +04:00
void
2013-09-25 03:13:49 +04:00
ScoreComponentCollection::
RegisterScoreProducer(const FeatureFunction* scoreProducer)
2011-11-09 21:16:02 +04:00
{
size_t start = s_denseVectorSize;
size_t end = start + scoreProducer->GetNumScoreComponents();
2013-09-27 12:35:24 +04:00
VERBOSE(1, "FeatureFunction: "
<< scoreProducer->GetScoreProducerDescription()
<< " start: " << start << " end: " << (end-1) << endl);
2011-11-09 21:16:02 +04:00
s_scoreIndexes[scoreProducer] = pair<size_t,size_t>(start,end);
s_denseVectorSize = end;
}
2011-11-16 15:49:31 +04:00
2013-09-27 12:35:24 +04:00
float
2013-09-25 03:13:49 +04:00
ScoreComponentCollection::
GetWeightedScore() const
{
2013-05-29 21:16:15 +04:00
return m_scores.inner_product(StaticData::Instance().GetAllWeights().m_scores);
}
void ScoreComponentCollection::MultiplyEquals(float scalar)
{
2013-05-29 21:16:15 +04:00
m_scores *= scalar;
}
// Multiply all weights of this sparse producer by a given scalar
2013-05-29 21:16:15 +04:00
void ScoreComponentCollection::MultiplyEquals(const FeatureFunction* sp, float scalar)
{
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
2013-05-29 21:16:15 +04:00
m_scores[i->first] = i->second * scalar;
}
}
// Count weights belonging to this sparse producer
2013-05-29 21:16:15 +04:00
size_t ScoreComponentCollection::GetNumberWeights(const FeatureFunction* sp)
{
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
size_t weights = 0;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
2013-05-29 21:16:15 +04:00
weights++;
}
return weights;
}
void ScoreComponentCollection::DivideEquals(float scalar)
{
2013-05-29 21:16:15 +04:00
m_scores /= scalar;
}
2012-03-14 21:59:59 +04:00
void ScoreComponentCollection::CoreDivideEquals(float scalar)
{
2013-05-29 21:16:15 +04:00
m_scores.coreDivideEquals(scalar);
2012-03-14 21:59:59 +04:00
}
void ScoreComponentCollection::DivideEquals(const ScoreComponentCollection& rhs)
{
2013-05-29 21:16:15 +04:00
m_scores.divideEquals(rhs.m_scores);
}
void ScoreComponentCollection::MultiplyEquals(const ScoreComponentCollection& rhs)
{
2013-05-29 21:16:15 +04:00
m_scores *= rhs.m_scores;
}
2012-05-18 21:58:42 +04:00
void ScoreComponentCollection::MultiplyEqualsBackoff(const ScoreComponentCollection& rhs, float backoff)
2012-05-15 20:49:54 +04:00
{
2012-05-18 21:58:42 +04:00
m_scores.multiplyEqualsBackoff(rhs.m_scores, backoff);
}
void ScoreComponentCollection::MultiplyEquals(float core_r0, float sparse_r0)
{
m_scores.multiplyEquals(core_r0, sparse_r0);
2012-05-15 20:49:54 +04:00
}
std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs)
{
2013-05-29 21:16:15 +04:00
os << rhs.m_scores;
return os;
}
2013-05-29 21:16:15 +04:00
void ScoreComponentCollection::L1Normalise()
{
2012-04-26 02:48:12 +04:00
m_scores /= m_scores.l1norm_coreFeatures();
}
2013-05-29 21:16:15 +04:00
float ScoreComponentCollection::GetL1Norm() const
{
return m_scores.l1norm();
}
2013-05-29 21:16:15 +04:00
float ScoreComponentCollection::GetL2Norm() const
{
return m_scores.l2norm();
}
2013-05-29 21:16:15 +04:00
float ScoreComponentCollection::GetLInfNorm() const
{
return m_scores.linfnorm();
}
2013-05-29 21:16:15 +04:00
size_t ScoreComponentCollection::L1Regularize(float lambda)
{
return m_scores.l1regularize(lambda);
}
2013-05-29 21:16:15 +04:00
void ScoreComponentCollection::L2Regularize(float lambda)
{
m_scores.l2regularize(lambda);
}
2013-05-29 21:16:15 +04:00
size_t ScoreComponentCollection::SparseL1Regularize(float lambda)
{
return m_scores.sparseL1regularize(lambda);
}
2013-05-29 21:16:15 +04:00
void ScoreComponentCollection::SparseL2Regularize(float lambda)
{
m_scores.sparseL2regularize(lambda);
}
2013-05-29 21:16:15 +04:00
void ScoreComponentCollection::Save(ostream& out) const
{
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
for (; iter != s_scoreIndexes.end(); ++iter ) {
2011-11-16 15:49:31 +04:00
string name = iter->first->GetScoreProducerDescription();
IndexPair ip = iter->second; // feature indices
2011-11-17 18:55:10 +04:00
if (ip.second-ip.first == 1) {
out << name << " " << m_scores[ip.first] << endl;
} else {
for (size_t i=ip.first; i < ip.second; ++i) {
ostringstream fullname;
fullname << name << "_" << (i + 1 - ip.first);
out << fullname.str() << " " << m_scores[i] << endl;
}
2011-11-16 15:49:31 +04:00
}
}
// write sparse features
m_scores.write(out);
2011-11-16 15:49:31 +04:00
}
2013-05-29 21:16:15 +04:00
void ScoreComponentCollection::Save(const string& filename) const
{
2011-11-16 15:49:31 +04:00
ofstream out(filename.c_str());
if (!out) {
ostringstream msg;
msg << "Unable to open " << filename;
throw runtime_error(msg.str());
}
Save(out);
out.close();
}
2013-09-27 12:35:24 +04:00
void
2013-09-25 03:13:49 +04:00
ScoreComponentCollection::
2014-06-06 23:35:58 +04:00
Assign(const FeatureFunction* sp, const string &line)
2013-05-29 21:16:15 +04:00
{
istringstream istr(line);
while(istr) {
string namestring;
FValue value;
istr >> namestring;
if (!istr) break;
istr >> value;
FName fname(sp->GetScoreProducerDescription(), namestring);
m_scores[fname] = value;
}
}
2013-09-27 12:35:24 +04:00
void
2013-09-25 03:13:49 +04:00
ScoreComponentCollection::
2013-09-27 12:35:24 +04:00
Assign(const FeatureFunction* sp, const std::vector<float>& scores)
2013-09-25 03:13:49 +04:00
{
2013-09-10 17:36:21 +04:00
IndexPair indexes = GetIndexes(sp);
size_t numScores = indexes.second - indexes.first;
if (scores.size() != numScores) {
2013-09-27 12:35:24 +04:00
UTIL_THROW(util::Exception, "Feature function "
<< sp->GetScoreProducerDescription() << " specified "
<< numScores << " dense scores or weights. Actually has "
<< scores.size());
2013-09-10 17:36:21 +04:00
}
for (size_t i = 0; i < scores.size(); ++i) {
m_scores[i + indexes.first] = scores[i];
}
}
void ScoreComponentCollection::InvertDenseFeatures(const FeatureFunction* sp)
{
Scores old_scores = GetScoresForProducer(sp);
Scores new_scores(old_scores.size());
for (size_t i = 0; i != old_scores.size(); ++i) {
new_scores[i] = -old_scores[i];
}
Assign(sp, new_scores);
}
void ScoreComponentCollection::ZeroDenseFeatures(const FeatureFunction* sp)
{
size_t numScores = sp->GetNumScoreComponents();
Scores vec(numScores, 0);
Assign(sp, vec);
}
//! get subset of scores that belong to a certain sparse ScoreProducer
FVector ScoreComponentCollection::GetVectorForProducer(const FeatureFunction* sp) const
{
FVector fv(s_denseVectorSize);
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
fv[i->first] = i->second;
}
return fv;
}
2013-09-08 17:57:31 +04:00
void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const ScorePair &scorePair)
{
2013-09-27 12:35:24 +04:00
PlusEquals(sp, scorePair.denseScores);
2013-09-08 17:57:31 +04:00
2013-09-27 12:35:24 +04:00
std::map<StringPiece, float>::const_iterator iter;
for (iter = scorePair.sparseScores.begin(); iter != scorePair.sparseScores.end(); ++iter) {
const StringPiece &key = iter->first;
float value = iter->second;
PlusEquals(sp, key, value);
}
2013-09-08 17:57:31 +04:00
}
}