diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp index 3e62d8171..5288116d6 100644 --- a/mert/HopeFearDecoder.cpp +++ b/mert/HopeFearDecoder.cpp @@ -180,7 +180,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder references_.Load(referenceFiles, vocab_); SparseVector weights; - wv.ToSparse(&weights); + wv.ToSparse(&weights,num_dense_); scorer_ = scorer; static const string kWeights = "weights"; @@ -243,7 +243,7 @@ void HypergraphHopeFearDecoder::HopeFear( { size_t sentenceId = *sentenceIdIter_; SparseVector weights; - wv.ToSparse(&weights); + wv.ToSparse(&weights, num_dense_); const Graph& graph = *(graphs_[sentenceId]); // ValType hope_scale = 1.0; @@ -338,7 +338,7 @@ void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector bg(scorer_->NumberOfScores()); //cerr << "Calculating bleu on " << sentenceId << endl; Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo); diff --git a/mert/Jamfile b/mert/Jamfile index 4dd2fb540..aff2c78be 100644 --- a/mert/Jamfile +++ b/mert/Jamfile @@ -77,6 +77,7 @@ unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_f unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ; unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ; unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ; +unit-test mira_feature_vector_test : MiraFeatureVectorTest.cpp mert_lib ..//boost_unit_test_framework ; unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ; unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ; unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ; diff --git a/mert/MiraFeatureVectorTest.cpp b/mert/MiraFeatureVectorTest.cpp new file mode 100644 index 000000000..d64ba79a5 --- /dev/null +++ b/mert/MiraFeatureVectorTest.cpp @@ -0,0 +1,49 @@ +#include "MiraFeatureVector.h" +#include "MiraWeightVector.h" + +#define BOOST_TEST_MODULE MiraFeatureVector +#include + +using namespace MosesTuning; + +/* Note that the conversion to and from SparseVector needs to know +how many of the features are really "dense". This is because in hg mira +all features (sparse and dense) are to get rolled in to SparseVector +*/ + +BOOST_AUTO_TEST_CASE(from_sparse) { + SparseVector sp; + sp.set("dense0", 0.2); + sp.set("dense1", 0.3); + sp.set("sparse0", 0.7); + sp.set("sparse1", 0.9); + sp.set("sparse2", 0.1); + + MiraFeatureVector mfv(sp,2); + BOOST_CHECK_EQUAL(mfv.size(),5); + + BOOST_CHECK_EQUAL(mfv.feat(0),0); + BOOST_CHECK_EQUAL(mfv.feat(1),1); + BOOST_CHECK_EQUAL(mfv.feat(2),4); + BOOST_CHECK_EQUAL(mfv.feat(3),5); + BOOST_CHECK_EQUAL(mfv.feat(4),6); + + BOOST_CHECK_CLOSE(mfv.val(0), 0.2,1e-5); + BOOST_CHECK_CLOSE(mfv.val(1), 0.3,1e-5); + BOOST_CHECK_CLOSE(mfv.val(2), 0.7,1e-5); + BOOST_CHECK_CLOSE(mfv.val(3), 0.9,1e-5); + BOOST_CHECK_CLOSE(mfv.val(4), 0.1,1e-5); + + MiraWeightVector mwv; + mwv.update(mfv,1.0); + SparseVector sp2; + mwv.ToSparse(&sp2,2); + + //check we get back what we started with + BOOST_CHECK_CLOSE(sp2.get("dense0"), 0.2,1e-5); + BOOST_CHECK_CLOSE(sp2.get("dense1"), 0.3,1e-5); + BOOST_CHECK_CLOSE(sp2.get("sparse0"), 0.7,1e-5); + BOOST_CHECK_CLOSE(sp2.get("sparse1"), 0.9,1e-5); + BOOST_CHECK_CLOSE(sp2.get("sparse2"), 0.1,1e-5); + +} diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp index eba9617c8..367305c48 100644 --- a/mert/MiraWeightVector.cpp +++ b/mert/MiraWeightVector.cpp @@ -93,11 +93,17 @@ void MiraWeightVector::update(size_t index, ValType delta) m_lastUpdated[index] = m_numUpdates; } -void MiraWeightVector::ToSparse(SparseVector* sparse) const +void MiraWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const { for (size_t i = 0; i < m_weights.size(); ++i) { if(abs(m_weights[i])>1e-8) { - sparse->set(i,m_weights[i]); + if (i < denseSize) { + sparse->set(i,m_weights[i]); + } else { + //The ids in MiraFeatureVector/MiraWeightVector for sparse features + //need to be translated when converting back to SparseVector. + sparse->set(i-denseSize, m_weights[i]); + } } } } @@ -172,12 +178,18 @@ size_t AvgWeightVector::size() const return m_wv.m_weights.size(); } -void AvgWeightVector::ToSparse(SparseVector* sparse) const +void AvgWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const { for (size_t i = 0; i < size(); ++i) { ValType w = weight(i); if(abs(w)>1e-8) { - sparse->set(i,w); + if (i < denseSize) { + sparse->set(i,w); + } else { + //The ids in MiraFeatureVector/MiraWeightVector for sparse features + //need to be translated when converting back to SparseVector. + sparse->set(i-denseSize, w); + } } } } diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h index bbc28704b..8200d6013 100644 --- a/mert/MiraWeightVector.h +++ b/mert/MiraWeightVector.h @@ -64,9 +64,9 @@ public: AvgWeightVector avg(); /** - * Convert to sparse vector, interpreting all features as sparse. + * Convert to sparse vector, interpreting all features as sparse. Only used by hgmira. **/ - void ToSparse(SparseVector* sparse) const; + void ToSparse(SparseVector* sparse, size_t denseSize) const; friend class AvgWeightVector; @@ -104,7 +104,7 @@ public: ValType score(const MiraFeatureVector& fv) const; ValType weight(std::size_t index) const; std::size_t size() const; - void ToSparse(SparseVector* sparse) const; + void ToSparse(SparseVector* sparse, size_t num_dense) const; private: const MiraWeightVector& m_wv; };