Use array for dense features

This commit is contained in:
Barry Haddow 2011-11-09 17:16:02 +00:00
parent 28ac3bffba
commit 5934817d89
9 changed files with 180 additions and 43 deletions

View File

@ -91,6 +91,14 @@ namespace Moses {
FVector::FVector(size_t coreFeatures) :
m_coreFeatures(coreFeatures) {}
void FVector::resize(size_t newsize) {
valarray<FValue> newCoreFeatures(newsize);
for (size_t i = 0; i < min(newsize,m_coreFeatures.size()); ++i) {
newCoreFeatures[i] = m_coreFeatures[i];
}
m_coreFeatures = newCoreFeatures;
}
void FVector::clear() {
m_features.clear();
@ -254,7 +262,9 @@ namespace Moses {
}
FVector& FVector::operator+= (const FVector& rhs) {
assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
set(i->first,i->second + rhs.get(i->first));
}
@ -263,12 +273,18 @@ namespace Moses {
set(i->first,i->second);
}
}
m_coreFeatures += rhs.m_coreFeatures;
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] += rhs.m_coreFeatures[i];
}
}
return *this;
}
FVector& FVector::operator-= (const FVector& rhs) {
assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
set(i->first,i->second - rhs.get(i->first));
}
@ -277,34 +293,56 @@ namespace Moses {
set(i->first,-(i->second));
}
}
m_coreFeatures -= rhs.m_coreFeatures;
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] -= rhs.m_coreFeatures[i];
}
}
return *this;
}
FVector& FVector::operator*= (const FVector& rhs) {
assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
FValue lhsValue = i->second;
FValue rhsValue = rhs.get(i->first);
set(i->first,lhsValue*rhsValue);
}
m_coreFeatures *= rhs.m_coreFeatures;
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] *= rhs.m_coreFeatures[i];
} else {
m_coreFeatures[i] = 0;
}
}
return *this;
}
FVector& FVector::operator/= (const FVector& rhs) {
assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
FValue lhsValue = i->second;
FValue rhsValue = rhs.get(i->first);
set(i->first, lhsValue / rhsValue) ;
}
m_coreFeatures /= rhs.m_coreFeatures;
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] /= rhs.m_coreFeatures[i];
} else {
m_coreFeatures[i] /= 0;
}
}
return *this;
}
FVector& FVector::max_equals(const FVector& rhs) {
assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
set(i->first, max(i->second , rhs.get(i->first) ));
}
@ -314,7 +352,11 @@ namespace Moses {
}
}
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
m_coreFeatures[i] = max(m_coreFeatures[i], rhs.m_coreFeatures[i]);
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] = max(m_coreFeatures[i], rhs.m_coreFeatures[i]);
} else {
m_coreFeatures[i] = max(m_coreFeatures[i],(float)0);
}
}
return *this;
}

View File

@ -112,6 +112,11 @@ namespace Moses {
public:
/** Empty feature vector */
FVector(size_t coreFeatures = 0);
/*
* Change the number of core features
**/
void resize(size_t newsize);
typedef boost::unordered_map<FName,FValue,FNameHash, FNameEquals> FNVmap;
/** Iterators */
@ -141,6 +146,10 @@ namespace Moses {
size_t size() const {
return m_features.size() + m_coreFeatures.size();
}
size_t coreSize() const {
return m_coreFeatures.size();
}
/** Equality */
bool operator== (const FVector& rhs) const;
@ -152,6 +161,7 @@ namespace Moses {
/**arithmetic */
//Element-wise
//If one side has fewer core features, take the missing ones to be 0.
FVector& operator+= (const FVector& rhs);
FVector& operator-= (const FVector& rhs);
FVector& operator*= (const FVector& rhs);

View File

@ -8,10 +8,25 @@ using namespace std;
namespace Moses
{
ScoreComponentCollection::ScoreComponentCollection()
ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
size_t ScoreComponentCollection::s_denseVectorSize = 0;
ScoreComponentCollection::ScoreComponentCollection() : m_scores(s_denseVectorSize)
{}
void ScoreComponentCollection::RegisterScoreProducer
(const ScoreProducer* scoreProducer)
{
assert(scoreProducer->GetNumScoreComponents() != ScoreProducer::unlimited);
size_t start = s_denseVectorSize;
size_t end = start + scoreProducer->GetNumScoreComponents();
VERBOSE(1,"ScoreProducer: " << scoreProducer->GetScoreProducerDescription() << " start: " << start << " end: " << end << endl);
s_scoreIndexes[scoreProducer] = pair<size_t,size_t>(start,end);
s_denseVectorSize = end;
}
float ScoreComponentCollection::GetWeightedScore() const
{
return m_scores.inner_product(StaticData::Instance().GetAllWeights().m_scores);

View File

@ -65,6 +65,16 @@ class ScoreComponentCollection
friend std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs);
private:
FVector m_scores;
typedef std::pair<size_t,size_t> IndexPair;
typedef std::map<const ScoreProducer*,IndexPair> ScoreIndexMap;
static ScoreIndexMap s_scoreIndexes;
static size_t s_denseVectorSize;
static IndexPair GetIndexes(const ScoreProducer* sp)
{
ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
assert(indexIter != s_scoreIndexes.end());
return indexIter->second;
}
public:
//! Create a new score collection with all values set to 0.0
@ -75,22 +85,41 @@ public:
: m_scores(rhs.m_scores)
{}
/**
* Register a ScoreProducer with a fixed number of scores, so that it can
* be allocated space in the dense part of the feature vector.
**/
static void RegisterScoreProducer(const ScoreProducer* scoreProducer);
/** Load from file */
bool Load(const std::string& filename)
{
return m_scores.load(filename);
}
FVector GetScoresVector() const
const FVector& GetScoresVector() const
{
return m_scores;
}
size_t Size()
size_t Size() const
{
return m_scores.size();
}
void Resize()
{
if (m_scores.coreSize() != s_denseVectorSize) {
m_scores.resize(s_denseVectorSize);
}
}
/** Create and FVector with the right number of core features */
static FVector CreateFVector()
{
return FVector(s_denseVectorSize);
}
//! Set all values to 0.0
void ZeroAll()
{
@ -124,10 +153,9 @@ public:
//! produced by sp
void PlusEquals(const ScoreProducer* sp, const ScoreComponentCollection& scores)
{
const std::vector<FName>& names = sp->GetFeatureNames();
for (std::vector<FName>::const_iterator i = names.begin();
i != names.end(); ++i) {
m_scores[*i] += scores.m_scores[*i];
IndexPair indexes = GetIndexes(sp);
for (size_t i = indexes.first; i < indexes.second; ++i) {
m_scores[i] += scores.m_scores[i];
}
}
@ -136,10 +164,10 @@ public:
//! produced by sp
void PlusEquals(const ScoreProducer* sp, const std::vector<float>& scores)
{
const std::vector<FName>& names = sp->GetFeatureNames();
assert(names.size() == scores.size());
IndexPair indexes = GetIndexes(sp);
assert(scores.size() == indexes.second - indexes.first);
for (size_t i = 0; i < scores.size(); ++i) {
m_scores[names[i]] += scores[i];
m_scores[i + indexes.first] += scores[i];
}
}
@ -148,8 +176,9 @@ public:
//! a single value
void PlusEquals(const ScoreProducer* sp, float score)
{
assert(1 == sp->GetNumScoreComponents());
m_scores[sp->GetFeatureNames()[0]] += score;
IndexPair indexes = GetIndexes(sp);
assert(1 == indexes.second - indexes.first);
m_scores[indexes.first] += score;
}
//For features which have an unbounded number of components
@ -162,10 +191,10 @@ public:
void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
{
assert(scores.size() == sp->GetNumScoreComponents());
const std::vector<FName>& names = sp->GetFeatureNames();
IndexPair indexes = GetIndexes(sp);
assert(scores.size() == indexes.second - indexes.first);
for (size_t i = 0; i < scores.size(); ++i) {
m_scores[names[i]] = scores[i];
m_scores[i + indexes.first] = scores[i];
}
}
@ -174,8 +203,9 @@ public:
//! a single value
void Assign(const ScoreProducer* sp, float score)
{
assert(1 == sp->GetNumScoreComponents());
m_scores[sp->GetFeatureNames()[0]] = score;
IndexPair indexes = GetIndexes(sp);
assert(1 == indexes.second - indexes.first);
m_scores[indexes.first] = score;
}
//For features which have an unbounded number of components
@ -214,9 +244,9 @@ public:
size_t components = sp->GetNumScoreComponents();
if (components == ScoreProducer::unlimited) return std::vector<float>();
std::vector<float> res(components);
const std::vector<FName>& names = sp->GetFeatureNames();
for (size_t i = 0; i < names.size(); ++i) {
res[i] = m_scores[names[i]];
IndexPair indexes = GetIndexes(sp);
for (size_t i = 0; i < res.size(); ++i) {
res[i] = m_scores[i + indexes.first];
}
return res;
}
@ -224,7 +254,7 @@ public:
//! get subset of scores that belong to a certain sparse ScoreProducer
FVector GetVectorForProducer(const ScoreProducer* sp) const
{
FVector fv;
FVector fv(s_denseVectorSize);
std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
@ -251,8 +281,9 @@ public:
//! this will return it. If not, this method will throw
float GetScoreForProducer(const ScoreProducer* sp) const
{
assert(sp->GetNumScoreComponents() == 1);
return m_scores[sp->GetFeatureNames()[0]];
IndexPair indexes = GetIndexes(sp);
assert(indexes.second - indexes.first == 1);
return m_scores[indexes.first];
}
//For features which have an unbounded number of components

View File

@ -3,6 +3,7 @@
#include <iostream>
#include <sstream>
#include "ScoreComponentCollection.h"
#include "ScoreProducer.h"
using namespace std;
@ -25,6 +26,10 @@ ScoreProducer::ScoreProducer(const std::string& description, size_t numScoreComp
dstream << ":" << count;
}
m_description = dstream.str();
if (numScoreComponents != unlimited)
{
ScoreComponentCollection::RegisterScoreProducer(this);
}
}
ScoreProducer::~ScoreProducer() {}

View File

@ -701,11 +701,13 @@ void StaticData::SetBooleanParameter( bool *parameter, string parameterName, boo
void StaticData::SetWeight(const ScoreProducer* sp, float weight)
{
m_allWeights.Resize();
m_allWeights.Assign(sp,weight);
}
void StaticData::SetWeights(const ScoreProducer* sp, const std::vector<float>& weights)
{
m_allWeights.Resize();
m_allWeights.Assign(sp,weights);
}

View File

@ -658,7 +658,6 @@ public:
Parameter* GetParameter() {
return m_parameter;
}
void SetAllWeightsScoreComponentCollection(const ScoreComponentCollection &weightsScoreComponentCollection);
WordAlignmentSort GetWordAlignmentSort() const {
return m_wordAlignmentSort;

View File

@ -168,6 +168,39 @@ BOOST_AUTO_TEST_CASE(core_arith)
BOOST_CHECK_CLOSE((FValue)quot[1], -0.4 , TOL);
BOOST_CHECK_CLOSE((FValue)quot[n1], 0.277777777 , TOL);
BOOST_CHECK_CLOSE((FValue)quot[n2], 0 , TOL);
//with different length vectors
FVector f3(2);
FVector f4(1);
f3[0] = 2; f3[1] = -1;
f4[0] = 5;
FVector sum1 = f3 + f4;
FVector sum2 = f4 + f3;
BOOST_CHECK_EQUAL(sum1,sum2);
BOOST_CHECK_CLOSE(sum1[0], 7, TOL);
BOOST_CHECK_CLOSE(sum1[1], -1, TOL);
FVector diff1 = f3 - f4;
FVector diff2 = f4 - f3;
BOOST_CHECK_CLOSE(diff1[0], -3, TOL);
BOOST_CHECK_CLOSE(diff1[1], -1, TOL);
BOOST_CHECK_CLOSE(diff2[0], 3, TOL);
BOOST_CHECK_CLOSE(diff2[1], 1, TOL);
FVector prod1 = f3 * f4;
FVector prod2 = f4 * f3;
BOOST_CHECK_EQUAL(prod1,prod2);
BOOST_CHECK_CLOSE(prod1[0], 10, TOL);
BOOST_CHECK_CLOSE(prod1[1], 0, TOL);
FVector quot1 = f3 / f4;
FVector quot2 = f4 / f3;
BOOST_CHECK_CLOSE(quot1[0], 0.4, TOL);
BOOST_CHECK_EQUAL(quot1[1], -numeric_limits<float>::infinity());
BOOST_CHECK_CLOSE(quot2[0], 2.5, TOL);
BOOST_CHECK_CLOSE(quot2[1], 0, TOL);
}
BOOST_AUTO_TEST_CASE(core_scalar)

View File

@ -131,7 +131,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_1, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1;
FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -161,7 +161,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_1, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2;
FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -342,7 +342,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_3, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1;
FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -374,7 +374,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_3, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2;
FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -558,7 +558,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_4, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1;
FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -590,7 +590,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_4, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2;
FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -680,7 +680,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1;
FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -715,7 +715,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2;
FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -750,7 +750,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs3(featureValueDiffs);
FVector totalUpdate3;
FVector totalUpdate3 = ScoreComponentCollection::CreateFVector();
for (size_t k = 0; k < featureValueDiffs3.size(); ++k) {
featureValueDiffs3[k].MultiplyEquals(alphas3[k]);
cerr << k << ": " << featureValueDiffs3[k].GetScoresVector() << endl;