use unordered maps

This commit is contained in:
Barry Haddow 2011-11-18 10:17:16 +00:00
parent d57d0c0656
commit a5b800bebe
4 changed files with 36 additions and 10 deletions

View File

@ -100,7 +100,8 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
const string& ref = refs[file_id][ref_id];
vector<string> refTokens = Tokenize(ref);
std::pair< size_t, std::map< Phrase, size_t > > ref_pair;
m_refs[ref_id] = pair<size_t,NGrams>();
pair<size_t,NGrams>& ref_pair = m_refs[ref_id];
ref_pair.first = refTokens.size();
for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
@ -114,7 +115,6 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
ref_pair.second[ngram] += 1;
}
}
m_refs[ref_id] = ref_pair;
}
}
}
@ -227,7 +227,7 @@ void BleuScoreFeature::GetNgramMatchCounts(Phrase& phrase,
std::vector< size_t >& ret_matches,
size_t skip_first) const
{
std::map< Phrase, size_t >::const_iterator ref_ngram_counts_iter;
NGrams::const_iterator ref_ngram_counts_iter;
size_t ngram_start_idx, ngram_end_idx;
// Chiang et al (2008) use unclipped counts of ngram matches
@ -254,10 +254,10 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
std::vector< size_t >& ret_matches,
size_t skip_first) const
{
std::map< Phrase, size_t >::const_iterator ref_ngram_counts_iter;
NGrams::const_iterator ref_ngram_counts_iter;
size_t ngram_start_idx, ngram_end_idx;
std::map<size_t, std::map<Phrase, size_t> > ngram_matches;
Matches ngram_matches;
for (size_t end_idx = skip_first; end_idx < phrase.GetSize(); end_idx++) {
for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
if (order > end_idx) break;
@ -277,7 +277,7 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
// clip ngram matches
for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
std::map<Phrase, size_t>::const_iterator iter;
NGrams::const_iterator iter;
// iterate over ngram counts for every ngram order
for (iter=ngram_matches[order].begin(); iter != ngram_matches[order].end(); ++iter) {
@ -300,7 +300,7 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
std::map< Phrase, size_t >::const_iterator reference_ngrams_iter;
NGrams::const_iterator reference_ngrams_iter;
const BleuScoreState& ps = dynamic_cast<const BleuScoreState&>(*prev_state);
BleuScoreState* new_state = new BleuScoreState(ps);
//cerr << "PS: " << ps << endl;

View File

@ -1,11 +1,12 @@
#ifndef BLUESCOREFEATURE_H
#define BLUESCOREFEATURE_H
#include <map>
#include <utility>
#include <string>
#include <vector>
#include <boost/unordered_map.hpp>
#include "FeatureFunction.h"
#include "FFState.h"
@ -38,10 +39,14 @@ private:
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
typedef std::map< Phrase, size_t > NGrams;
class BleuScoreFeature : public StatefulFeatureFunction {
public:
typedef boost::unordered_map< Phrase, size_t > NGrams;
typedef boost::unordered_map<size_t, std::pair<size_t,NGrams> > RefCounts;
typedef boost::unordered_map<size_t, NGrams> Matches;
BleuScoreFeature():
StatefulFeatureFunction("BleuScore",1),
m_count_history(BleuScoreState::bleu_order),
@ -109,7 +114,7 @@ private:
float m_ref_length_history;
size_t m_cur_source_length;
std::map< size_t, std::pair< size_t, NGrams > > m_refs;
RefCounts m_refs;
NGrams m_cur_ref_ngrams;
size_t m_cur_ref_length;

View File

@ -27,6 +27,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include <list>
#include <string>
#include <boost/functional/hash.hpp>
#include "Word.h"
#include "WordsBitmap.h"
#include "TypeDef.h"
@ -177,6 +180,13 @@ public:
}
};
inline size_t hash_value(const Phrase& phrase) {
size_t seed = 0;
for (size_t i = 0; i < phrase.GetSize(); ++i) {
boost::hash_combine(seed, phrase.GetWord(i));
}
return seed;
}
}
#endif

View File

@ -26,6 +26,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <iostream>
#include <vector>
#include <list>
#include "util/murmur_hash.hh"
#include "TypeDef.h"
#include "Factor.h"
#include "Util.h"
@ -134,6 +137,9 @@ public:
void CreateUnknownWord(const Word &sourceWord);
inline size_t hash() const {
return util::MurmurHashNative(m_factorArray, MAX_NUM_FACTORS, m_isNonTerminal);
}
};
struct WordComparer {
@ -143,6 +149,11 @@ struct WordComparer {
}
};
inline size_t hash_value(const Word& word) {
return word.hash();
}
}
#endif