Remove the 'default' option, and add a unittest for feature vector

This commit is contained in:
bhaddow 2011-08-25 21:02:22 +01:00
parent 43c7c087ba
commit 3a91ef8930
5 changed files with 154 additions and 154 deletions

View File

@ -73,10 +73,6 @@ namespace Moses {
}
size_t FName::hash() const {
/*std::size_t seed = 0;
boost::hash_combine(seed, m_root);
boost::hash_combine(seed, m_name);
return seed;*/
return boost::hash_value(m_id);
}
@ -93,14 +89,10 @@ namespace Moses {
return ! (*this == rhs);
}
FVector::FVector( FValue defaultValue)
{
m_features[DEFAULT_NAME] = defaultValue;
}
FVector::FVector() {}
void FVector::clear() {
m_features.clear();
m_features[DEFAULT_NAME] = DEFAULT;
}
bool FVector::load(const std::string& filename) {
@ -140,10 +132,6 @@ namespace Moses {
}
}
FName FVector::DEFAULT_NAME("DEFAULT","");
const FValue FVector::DEFAULT = 0;
static bool equalsTolerance(FValue lhs, FValue rhs) {
if (lhs == rhs) return true;
static const FValue TOLERANCE = 1e-4;
@ -157,7 +145,6 @@ namespace Moses {
if (this == &rhs) {
return true;
}
if (get(DEFAULT_NAME) != rhs.get(DEFAULT_NAME)) return false;
for (const_iterator i = cbegin(); i != cend(); ++i) {
if (!equalsTolerance(i->second,rhs.get(i->first))) return false;
}
@ -182,7 +169,7 @@ namespace Moses {
}
FValue FVector::operator[](const FName& name) const {
return get(name) + get(DEFAULT_NAME);
return get(name);
}
@ -190,19 +177,7 @@ namespace Moses {
ostream& FVector::print(ostream& out) const {
out << "{";
for (const_iterator i = cbegin(); i != cend(); ++i) {
FValue value = i->second;
if (i->first != DEFAULT_NAME) {
value += get(DEFAULT_NAME);
}
/* if (i->first != DEFAULT_NAME && i->second != 0.0) {
out << i->first << "=" << value << ", ";
}*/
/* if (i->first != DEFAULT_NAME) {
out << i->first << "=" << value << ", ";
}*/
if (i->first != DEFAULT_NAME) {
out << value << ", ";
}
out << i->first << "=" << i->second << ", ";
}
out << "}";
return out;
@ -213,6 +188,7 @@ namespace Moses {
}
const FValue& FVector::get(const FName& name) const {
static const FValue DEFAULT = 0;
const_iterator fi = m_features.find(name);
if (fi == m_features.end()) {
return DEFAULT;
@ -253,7 +229,6 @@ namespace Moses {
}
FVector& FVector::operator+= (const FVector& rhs) {
//default value will take care of itself here.
for (iterator i = begin(); i != end(); ++i) {
set(i->first,i->second + rhs.get(i->first));
}
@ -278,92 +253,37 @@ namespace Moses {
}
FVector& FVector::operator*= (const FVector& rhs) {
FValue lhsDefault = get(DEFAULT_NAME);
FValue rhsDefault = rhs.get(DEFAULT_NAME);
for (iterator i = begin(); i != end(); ++i) {
if (i->first == DEFAULT_NAME) {
set(i->first,lhsDefault*rhsDefault);
} else {
FValue lhsValue = i->second;
FValue rhsValue = rhs.get(i->first);
set(i->first, lhsValue*rhsDefault + rhsValue*lhsDefault + lhsValue*rhsValue);
}
}
if (lhsDefault) {
//Features that have the default value in the lhs
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
if (!hasNonDefaultValue(i->first)) {
set(i->first, lhsDefault*i->second);
}
}
FValue lhsValue = i->second;
FValue rhsValue = rhs.get(i->first);
set(i->first,lhsValue*rhsValue);
}
return *this;
}
FVector& FVector::operator/= (const FVector& rhs) {
FValue lhsDefault = get(DEFAULT_NAME);
FValue rhsDefault = rhs.get(DEFAULT_NAME);
if (lhsDefault && !rhsDefault) {
throw runtime_error("Attempt to divide feature vectors where lhs has default and rhs does not");
}
FValue quotientDefault = 0;
if (rhsDefault) {
quotientDefault = lhsDefault / rhsDefault;
}
for (iterator i = begin(); i != end(); ++i) {
if (i->first == DEFAULT_NAME) {
set(i->first, quotientDefault);
} else {
FValue lhsValue = i->second;
FValue rhsValue = rhs.get(i->first);
set(i->first, (lhsValue + lhsDefault) / (rhsValue + rhsDefault) - quotientDefault);
}
}
if (lhsDefault) {
//Features that have the default value in the lhs
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
if (!hasNonDefaultValue(i->first)) {
set(i->first, lhsDefault / (i->second + rhsDefault) - quotientDefault);
}
}
FValue lhsValue = i->second;
FValue rhsValue = rhs.get(i->first);
set(i->first, lhsValue / rhsValue) ;
}
return *this;
}
FVector& FVector::max_equals(const FVector& rhs) {
FValue lhsDefault = get(DEFAULT_NAME);
FValue rhsDefault = rhs.get(DEFAULT_NAME);
FValue maxDefault = max(lhsDefault,rhsDefault);
for (iterator i = begin(); i != end(); ++i) {
if (i->first == DEFAULT_NAME) {
set(i->first, maxDefault);
} else {
set(i->first, max(i->second + lhsDefault, rhs.get(i->first) + rhsDefault) - maxDefault);
}
set(i->first, max(i->second , rhs.get(i->first) ));
}
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
if (!hasNonDefaultValue(i->first)) {
set(i->first, max(lhsDefault, (i->second + rhsDefault)) - maxDefault);
}
if (!hasNonDefaultValue(i->first)) {
set(i->first, i->second);
}
}
return *this;
}
FVector& FVector::operator+= (const FValue& rhs) {
set(DEFAULT_NAME, get(DEFAULT_NAME) + rhs);
return *this;
}
FVector& FVector::operator-= (const FValue& rhs) {
set(DEFAULT_NAME, get(DEFAULT_NAME) - rhs);
return *this;
}
FVector& FVector::operator*= (const FValue& rhs) {
//NB Could do this with boost::bind ?
//This multiplies the default value, which is what we want
for (iterator i = begin(); i != end(); ++i) {
i->second *= rhs;
}
@ -372,7 +292,6 @@ namespace Moses {
FVector& FVector::operator/= (const FValue& rhs) {
//This dividess the default value, which is what we want
for (iterator i = begin(); i != end(); ++i) {
i->second /= rhs;
}
@ -382,9 +301,6 @@ namespace Moses {
FValue FVector::l1norm() const {
FValue norm = 0;
for (const_iterator i = cbegin(); i != cend(); ++i) {
if ((i->second) && i->first == DEFAULT_NAME) {
throw runtime_error("Cannot take l1norm with non-zero default values");
}
norm += abs(i->second);
}
return norm;
@ -393,9 +309,6 @@ namespace Moses {
FValue FVector::sum() const {
FValue sum = 0;
for (const_iterator i = cbegin(); i != cend(); ++i) {
if ((i->second) && i->first == DEFAULT_NAME) {
throw runtime_error("Cannot take sum with non-zero default values");
}
sum += i->second;
}
return sum;
@ -408,25 +321,9 @@ namespace Moses {
FValue FVector::inner_product(const FVector& rhs) const {
FValue lhsDefault = get(DEFAULT_NAME);
FValue rhsDefault = rhs.get(DEFAULT_NAME);
if (lhsDefault && rhsDefault) {
throw runtime_error("Cannot take inner product if both lhs and rhs have non-zero default values");
}
FValue product = 0.0;
for (const_iterator i = cbegin(); i != cend(); ++i) {
if (i->first != DEFAULT_NAME) {
product += ((i->second + lhsDefault)*(rhs.get(i->first) + rhsDefault));
}
}
if (lhsDefault) {
//Features that have the default value in the rhs
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
if (!hasNonDefaultValue(i->first)) {
product += (i->second + rhsDefault)*lhsDefault;
}
}
product += ((i->second)*(rhs.get(i->first)));
}
return product;
}
@ -447,13 +344,6 @@ namespace Moses {
return FVector(lhs) /= rhs;
}
const FVector operator+(const FVector& lhs, const FValue& rhs) {
return FVector(lhs) += rhs;
}
const FVector operator-(const FVector& lhs, const FValue& rhs) {
return FVector(lhs) -= rhs;
}
const FVector operator*(const FVector& lhs, const FValue& rhs) {
return FVector(lhs) *= rhs;

View File

@ -108,8 +108,8 @@ namespace Moses {
class FVector
{
public:
/** Empty feature vector, possibly with default value */
FVector(FValue defaultValue = DEFAULT);
/** Empty feature vector */
FVector();
typedef boost::unordered_map<FName,FValue,FNameHash, FNameEquals> FNVmap;
/** Iterators */
@ -120,13 +120,9 @@ namespace Moses {
const_iterator cbegin() const {return m_features.cbegin();}
const_iterator cend() const {return m_features.cend();}
//defaults - TODO remove
static FName DEFAULT_NAME;
static const FValue DEFAULT;
bool hasNonDefaultValue(FName name) const { return m_features.find(name) != m_features.end();}
void clear();
bool hasNonDefaultValue(FName name) const { return m_features.find(name) != m_features.end();}
/** Load from file - each line should be 'root[_name] value' */
bool load(const std::string& filename);
@ -155,8 +151,6 @@ namespace Moses {
FVector& operator*= (const FVector& rhs);
FVector& operator/= (const FVector& rhs);
//Scalar
FVector& operator+= (const FValue& rhs);
FVector& operator-= (const FValue& rhs);
FVector& operator*= (const FValue& rhs);
FVector& operator/= (const FValue& rhs);
@ -180,8 +174,7 @@ namespace Moses {
private:
/** Internal get and set. Note that the get() doesn't include the
default value */
/** Internal get and set. */
const FValue& get(const FName& name) const;
void set(const FName& name, const FValue& value);
@ -231,8 +224,6 @@ namespace Moses {
const FVector operator/(const FVector& lhs, const FVector& rhs);
//Scalar operations
const FVector operator+(const FVector& lhs, const FValue& rhs);
const FVector operator-(const FVector& lhs, const FValue& rhs);
const FVector operator*(const FVector& lhs, const FValue& rhs);
const FVector operator/(const FVector& lhs, const FValue& rhs);
@ -257,7 +248,7 @@ namespace Moses {
// If we get here, we know that operator[] was called to perform a write access,
// so we can insert an item in the vector if needed
//std::cerr << "Inserting " << value << " into " << m_name << std::endl;
m_fv->set(m_name,value-m_fv->get(FVector::DEFAULT_NAME));
m_fv->set(m_name,value);
return *this;
}
@ -265,7 +256,7 @@ namespace Moses {
operator FValue() {
// If we get here, we know that operator[] was called to perform a read access,
// so we can simply return the value from the vector
return m_fv->get(m_name) + m_fv->get(FVector::DEFAULT_NAME);
return m_fv->get(m_name);
}
/*operator FValue&() {
@ -273,11 +264,11 @@ namespace Moses {
}*/
FValue operator++() {
return ++m_fv->m_features[m_name] + m_fv->get(FVector::DEFAULT_NAME);
return ++m_fv->m_features[m_name];
}
FValue operator +=(FValue lhs) {
return (m_fv->m_features[m_name] += lhs) + m_fv->get(FVector::DEFAULT_NAME);
return (m_fv->m_features[m_name] += lhs);
}
private:

View File

@ -0,0 +1,118 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010- University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/test/unit_test.hpp>
#include "FeatureVector.h"
using namespace Moses;
using namespace std;
static const float TOL = 0.00001;
BOOST_AUTO_TEST_SUITE(fv)
BOOST_AUTO_TEST_CASE(vector_sum_diff)
{
FVector f1,f2;
FName n1("a");
FName n2("b");
FName n3("c");
FName n4("d");
f1[n1] = 1.2; f1[n2] = 1.4; f1[n3] = -0.1;
f2[n1] = 0.01; f2[n3] = 5.6; f2[n4] = 0.6;
FVector sum = f1 + f2;
FVector diff = f1 - f2;
BOOST_CHECK_CLOSE((FValue)sum[n1], 1.21, TOL);
BOOST_CHECK_CLOSE((FValue)sum[n2], 1.4, TOL);
BOOST_CHECK_CLOSE((FValue)sum[n3], 5.5, TOL);
BOOST_CHECK_CLOSE((FValue)sum[n4], 0.6, TOL);
BOOST_CHECK_CLOSE((FValue)diff[n1], 1.19, TOL);
BOOST_CHECK_CLOSE((FValue)diff[n2], 1.4, TOL);
BOOST_CHECK_CLOSE((FValue)diff[n3], -5.7, TOL);
BOOST_CHECK_CLOSE((FValue)diff[n4], -0.6, TOL);
}
BOOST_AUTO_TEST_CASE(scalar)
{
FVector f1,f2;
FName n1("a");
FName n2("b");
FName n3("c");
FName n4("d");
f1[n1] = 0.2; f1[n2] = 9.178; f1[n3] = -0.1;
f2[n1] = 0.01; f2[n3] = 5.6; f2[n4] = 0.6;
FVector prod1 = f1 * 2;
FVector prod2 = f1 * -0.1;
FVector quot = f2 / 2;
BOOST_CHECK_CLOSE((FValue)prod1[n1], 0.4, TOL);
BOOST_CHECK_CLOSE((FValue)prod1[n2], 18.356, TOL);
BOOST_CHECK_CLOSE((FValue)prod1[n3], -0.2, TOL);
BOOST_CHECK_CLOSE((FValue)prod2[n1], -0.02, TOL);
BOOST_CHECK_CLOSE((FValue)prod2[n2], -0.9178, TOL);
BOOST_CHECK_CLOSE((FValue)prod2[n3], 0.01, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n1], 0.005, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n2], 0, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n3], 2.8, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n4], 0.3, TOL);
}
BOOST_AUTO_TEST_CASE(inc)
{
FVector f1;
FName n1("a");
FName n2("b");
f1[n1] = 2.3; f1[n2] = -0.4;
f1[n1]+=2;
BOOST_CHECK_CLOSE((FValue)f1[n1], 4.3, TOL);
BOOST_CHECK_CLOSE((FValue)f1[n2], -0.4, TOL);
FValue res = ++f1[n2];
BOOST_CHECK_CLOSE(res,0.6, TOL);
BOOST_CHECK_CLOSE((FValue)f1[n1], 4.3, TOL);
BOOST_CHECK_CLOSE((FValue)f1[n2], 0.6, TOL);
}
BOOST_AUTO_TEST_CASE(vector_mult)
{
FVector f1,f2;
FName n1("a");
FName n2("b");
FName n3("c");
FName n4("d");
f1[n1] = 0.2; f1[n2] = 9.178; f1[n3] = -0.1;
f2[n1] = 0.01; f2[n2] = 5.6; f2[n3] = 1; f2[n4] = 0.6;
FVector prod = f1 * f2;
FVector quot = f1/f2;
BOOST_CHECK_CLOSE((FValue)prod[n1], 0.002, TOL);
BOOST_CHECK_CLOSE((FValue)prod[n2], 51.3968, TOL);
BOOST_CHECK_CLOSE((FValue)prod[n3], -0.1, TOL);
BOOST_CHECK_CLOSE((FValue)prod[n4], 0, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n1], 20, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n2], 1.63892865, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n3], -0.1, TOL);
BOOST_CHECK_CLOSE((FValue)quot[n4], 0, TOL);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -134,7 +134,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_1, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1(0);
FVector totalUpdate1;
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -164,7 +164,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_1, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2(0);
FVector totalUpdate2;
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -345,7 +345,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_3, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1(0);
FVector totalUpdate1;
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -377,7 +377,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_3, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2(0);
FVector totalUpdate2;
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -561,7 +561,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_4, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1(0);
FVector totalUpdate1;
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -593,7 +593,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_4, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2(0);
FVector totalUpdate2;
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -683,7 +683,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
FVector totalUpdate1(0);
FVector totalUpdate1;
for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
@ -718,7 +718,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
FVector totalUpdate2(0);
FVector totalUpdate2;
for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
@ -753,7 +753,7 @@ BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers)
cerr << "partial updates:" << endl;
vector< ScoreComponentCollection> featureValueDiffs3(featureValueDiffs);
FVector totalUpdate3(0);
FVector totalUpdate3;
for (size_t k = 0; k < featureValueDiffs3.size(); ++k) {
featureValueDiffs3[k].MultiplyEquals(alphas3[k]);
cerr << k << ": " << featureValueDiffs3[k].GetScoresVector() << endl;

View File

@ -1,10 +1,11 @@
bin_PROGRAMS = moses_test
moses_test_SOURCES = MockHypothesis.cpp \
moses_test_SOURCES = FeatureVectorTest.cpp \
HildrethTest.cpp \
MockHypothesis.cpp \
MosesTest.cpp \
ScoreComponentCollectionTest.cpp \
TargetBigramFeatureTest.cpp \
HildrethTest.cpp \
../mira/Hildreth.cpp
../mira/Hildreth.cpp
moses_test_CPPFLAGS = -W -Wall -I$(top_srcdir)/moses/src -I$(top_srcdir)/mira $(BOOST_CPPFLAGS) -DBOOST_TEST_DYN_LINK
moses_test_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIBS) -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@ -lboost_unit_test_framework