mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-07-14 14:50:41 +03:00
Attempt at fixing sparse features for hgmira
This commit is contained in:
parent
6626d59cfc
commit
217f389230
@ -180,7 +180,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
|
||||
references_.Load(referenceFiles, vocab_);
|
||||
|
||||
SparseVector weights;
|
||||
wv.ToSparse(&weights);
|
||||
wv.ToSparse(&weights,num_dense_);
|
||||
scorer_ = scorer;
|
||||
|
||||
static const string kWeights = "weights";
|
||||
@ -243,7 +243,7 @@ void HypergraphHopeFearDecoder::HopeFear(
|
||||
{
|
||||
size_t sentenceId = *sentenceIdIter_;
|
||||
SparseVector weights;
|
||||
wv.ToSparse(&weights);
|
||||
wv.ToSparse(&weights, num_dense_);
|
||||
const Graph& graph = *(graphs_[sentenceId]);
|
||||
|
||||
// ValType hope_scale = 1.0;
|
||||
@ -338,7 +338,7 @@ void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValTy
|
||||
HgHypothesis bestHypo;
|
||||
size_t sentenceId = *sentenceIdIter_;
|
||||
SparseVector weights;
|
||||
wv.ToSparse(&weights);
|
||||
wv.ToSparse(&weights, num_dense_);
|
||||
vector<ValType> bg(scorer_->NumberOfScores());
|
||||
//cerr << "Calculating bleu on " << sentenceId << endl;
|
||||
Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
|
||||
|
@ -77,6 +77,7 @@ unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_f
|
||||
unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test mira_feature_vector_test : MiraFeatureVectorTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
|
49
mert/MiraFeatureVectorTest.cpp
Normal file
49
mert/MiraFeatureVectorTest.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
#include "MiraFeatureVector.h"
|
||||
#include "MiraWeightVector.h"
|
||||
|
||||
#define BOOST_TEST_MODULE MiraFeatureVector
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
using namespace MosesTuning;
|
||||
|
||||
/* Note that the conversion to and from SparseVector needs to know
|
||||
how many of the features are really "dense". This is because in hg mira
|
||||
all features (sparse and dense) are to get rolled in to SparseVector
|
||||
*/
|
||||
|
||||
BOOST_AUTO_TEST_CASE(from_sparse) {
|
||||
SparseVector sp;
|
||||
sp.set("dense0", 0.2);
|
||||
sp.set("dense1", 0.3);
|
||||
sp.set("sparse0", 0.7);
|
||||
sp.set("sparse1", 0.9);
|
||||
sp.set("sparse2", 0.1);
|
||||
|
||||
MiraFeatureVector mfv(sp,2);
|
||||
BOOST_CHECK_EQUAL(mfv.size(),5);
|
||||
|
||||
BOOST_CHECK_EQUAL(mfv.feat(0),0);
|
||||
BOOST_CHECK_EQUAL(mfv.feat(1),1);
|
||||
BOOST_CHECK_EQUAL(mfv.feat(2),4);
|
||||
BOOST_CHECK_EQUAL(mfv.feat(3),5);
|
||||
BOOST_CHECK_EQUAL(mfv.feat(4),6);
|
||||
|
||||
BOOST_CHECK_CLOSE(mfv.val(0), 0.2,1e-5);
|
||||
BOOST_CHECK_CLOSE(mfv.val(1), 0.3,1e-5);
|
||||
BOOST_CHECK_CLOSE(mfv.val(2), 0.7,1e-5);
|
||||
BOOST_CHECK_CLOSE(mfv.val(3), 0.9,1e-5);
|
||||
BOOST_CHECK_CLOSE(mfv.val(4), 0.1,1e-5);
|
||||
|
||||
MiraWeightVector mwv;
|
||||
mwv.update(mfv,1.0);
|
||||
SparseVector sp2;
|
||||
mwv.ToSparse(&sp2,2);
|
||||
|
||||
//check we get back what we started with
|
||||
BOOST_CHECK_CLOSE(sp2.get("dense0"), 0.2,1e-5);
|
||||
BOOST_CHECK_CLOSE(sp2.get("dense1"), 0.3,1e-5);
|
||||
BOOST_CHECK_CLOSE(sp2.get("sparse0"), 0.7,1e-5);
|
||||
BOOST_CHECK_CLOSE(sp2.get("sparse1"), 0.9,1e-5);
|
||||
BOOST_CHECK_CLOSE(sp2.get("sparse2"), 0.1,1e-5);
|
||||
|
||||
}
|
@ -93,11 +93,17 @@ void MiraWeightVector::update(size_t index, ValType delta)
|
||||
m_lastUpdated[index] = m_numUpdates;
|
||||
}
|
||||
|
||||
void MiraWeightVector::ToSparse(SparseVector* sparse) const
|
||||
void MiraWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const
|
||||
{
|
||||
for (size_t i = 0; i < m_weights.size(); ++i) {
|
||||
if(abs(m_weights[i])>1e-8) {
|
||||
sparse->set(i,m_weights[i]);
|
||||
if (i < denseSize) {
|
||||
sparse->set(i,m_weights[i]);
|
||||
} else {
|
||||
//The ids in MiraFeatureVector/MiraWeightVector for sparse features
|
||||
//need to be translated when converting back to SparseVector.
|
||||
sparse->set(i-denseSize, m_weights[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -172,12 +178,18 @@ size_t AvgWeightVector::size() const
|
||||
return m_wv.m_weights.size();
|
||||
}
|
||||
|
||||
void AvgWeightVector::ToSparse(SparseVector* sparse) const
|
||||
void AvgWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const
|
||||
{
|
||||
for (size_t i = 0; i < size(); ++i) {
|
||||
ValType w = weight(i);
|
||||
if(abs(w)>1e-8) {
|
||||
sparse->set(i,w);
|
||||
if (i < denseSize) {
|
||||
sparse->set(i,w);
|
||||
} else {
|
||||
//The ids in MiraFeatureVector/MiraWeightVector for sparse features
|
||||
//need to be translated when converting back to SparseVector.
|
||||
sparse->set(i-denseSize, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -64,9 +64,9 @@ public:
|
||||
AvgWeightVector avg();
|
||||
|
||||
/**
|
||||
* Convert to sparse vector, interpreting all features as sparse.
|
||||
* Convert to sparse vector, interpreting all features as sparse. Only used by hgmira.
|
||||
**/
|
||||
void ToSparse(SparseVector* sparse) const;
|
||||
void ToSparse(SparseVector* sparse, size_t denseSize) const;
|
||||
|
||||
friend class AvgWeightVector;
|
||||
|
||||
@ -104,7 +104,7 @@ public:
|
||||
ValType score(const MiraFeatureVector& fv) const;
|
||||
ValType weight(std::size_t index) const;
|
||||
std::size_t size() const;
|
||||
void ToSparse(SparseVector* sparse) const;
|
||||
void ToSparse(SparseVector* sparse, size_t num_dense) const;
|
||||
private:
|
||||
const MiraWeightVector& m_wv;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user