Attempt at fixing sparse features for hgmira

This commit is contained in:
Barry Haddow 2015-04-03 15:46:59 +01:00
parent 6626d59cfc
commit 217f389230
5 changed files with 72 additions and 10 deletions

View File

@ -180,7 +180,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
references_.Load(referenceFiles, vocab_);
SparseVector weights;
wv.ToSparse(&weights);
wv.ToSparse(&weights,num_dense_);
scorer_ = scorer;
static const string kWeights = "weights";
@ -243,7 +243,7 @@ void HypergraphHopeFearDecoder::HopeFear(
{
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
wv.ToSparse(&weights, num_dense_);
const Graph& graph = *(graphs_[sentenceId]);
// ValType hope_scale = 1.0;
@ -338,7 +338,7 @@ void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValTy
HgHypothesis bestHypo;
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
wv.ToSparse(&weights, num_dense_);
vector<ValType> bg(scorer_->NumberOfScores());
//cerr << "Calculating bleu on " << sentenceId << endl;
Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);

View File

@ -77,6 +77,7 @@ unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_f
unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test mira_feature_vector_test : MiraFeatureVectorTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;

View File

@ -0,0 +1,49 @@
#include "MiraFeatureVector.h"
#include "MiraWeightVector.h"
#define BOOST_TEST_MODULE MiraFeatureVector
#include <boost/test/unit_test.hpp>
using namespace MosesTuning;
/* Note that the conversion to and from SparseVector needs to know
how many of the features are really "dense". This is because in hg mira
all features (sparse and dense) are to get rolled in to SparseVector
*/
BOOST_AUTO_TEST_CASE(from_sparse) {
SparseVector sp;
sp.set("dense0", 0.2);
sp.set("dense1", 0.3);
sp.set("sparse0", 0.7);
sp.set("sparse1", 0.9);
sp.set("sparse2", 0.1);
MiraFeatureVector mfv(sp,2);
BOOST_CHECK_EQUAL(mfv.size(),5);
BOOST_CHECK_EQUAL(mfv.feat(0),0);
BOOST_CHECK_EQUAL(mfv.feat(1),1);
BOOST_CHECK_EQUAL(mfv.feat(2),4);
BOOST_CHECK_EQUAL(mfv.feat(3),5);
BOOST_CHECK_EQUAL(mfv.feat(4),6);
BOOST_CHECK_CLOSE(mfv.val(0), 0.2,1e-5);
BOOST_CHECK_CLOSE(mfv.val(1), 0.3,1e-5);
BOOST_CHECK_CLOSE(mfv.val(2), 0.7,1e-5);
BOOST_CHECK_CLOSE(mfv.val(3), 0.9,1e-5);
BOOST_CHECK_CLOSE(mfv.val(4), 0.1,1e-5);
MiraWeightVector mwv;
mwv.update(mfv,1.0);
SparseVector sp2;
mwv.ToSparse(&sp2,2);
//check we get back what we started with
BOOST_CHECK_CLOSE(sp2.get("dense0"), 0.2,1e-5);
BOOST_CHECK_CLOSE(sp2.get("dense1"), 0.3,1e-5);
BOOST_CHECK_CLOSE(sp2.get("sparse0"), 0.7,1e-5);
BOOST_CHECK_CLOSE(sp2.get("sparse1"), 0.9,1e-5);
BOOST_CHECK_CLOSE(sp2.get("sparse2"), 0.1,1e-5);
}

View File

@ -93,11 +93,17 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates;
}
void MiraWeightVector::ToSparse(SparseVector* sparse) const
void MiraWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const
{
for (size_t i = 0; i < m_weights.size(); ++i) {
if(abs(m_weights[i])>1e-8) {
sparse->set(i,m_weights[i]);
if (i < denseSize) {
sparse->set(i,m_weights[i]);
} else {
//The ids in MiraFeatureVector/MiraWeightVector for sparse features
//need to be translated when converting back to SparseVector.
sparse->set(i-denseSize, m_weights[i]);
}
}
}
}
@ -172,12 +178,18 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size();
}
void AvgWeightVector::ToSparse(SparseVector* sparse) const
void AvgWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const
{
for (size_t i = 0; i < size(); ++i) {
ValType w = weight(i);
if(abs(w)>1e-8) {
sparse->set(i,w);
if (i < denseSize) {
sparse->set(i,w);
} else {
//The ids in MiraFeatureVector/MiraWeightVector for sparse features
//need to be translated when converting back to SparseVector.
sparse->set(i-denseSize, w);
}
}
}
}

View File

@ -64,9 +64,9 @@ public:
AvgWeightVector avg();
/**
* Convert to sparse vector, interpreting all features as sparse.
* Convert to sparse vector, interpreting all features as sparse. Only used by hgmira.
**/
void ToSparse(SparseVector* sparse) const;
void ToSparse(SparseVector* sparse, size_t denseSize) const;
friend class AvgWeightVector;
@ -104,7 +104,7 @@ public:
ValType score(const MiraFeatureVector& fv) const;
ValType weight(std::size_t index) const;
std::size_t size() const;
void ToSparse(SparseVector* sparse) const;
void ToSparse(SparseVector* sparse, size_t num_dense) const;
private:
const MiraWeightVector& m_wv;
};