use hash of strings and unordered set for distinct hypos

This commit is contained in:
Hieu Hoang 2016-08-18 17:24:53 +01:00
parent e6c0bac366
commit 3695e30ac0
2 changed files with 11 additions and 5 deletions

View File

@ -5,6 +5,8 @@
* Author: hieu
*/
#include <boost/foreach.hpp>
#include <boost/functional/hash.hpp>
#include <boost/unordered_set.hpp>
#include <vector>
#include <sstream>
#include "Manager.h"
@ -218,7 +220,7 @@ std::string Manager::OutputNBest()
{
arcLists.Sort();
set<string> distinctHypos;
boost::unordered_set<size_t> distinctHypos;
TrellisPaths<TrellisPath> contenders;
m_search->AddInitialTrellisPaths(contenders);
@ -227,7 +229,7 @@ std::string Manager::OutputNBest()
// MAIN LOOP
stringstream out;
Moses2::FixPrecision(out);
Moses2::FixPrecision(out);
size_t maxIter = system.options.nbest.nbest_size * system.options.nbest.factor;
size_t bestInd = 0;
@ -243,8 +245,10 @@ std::string Manager::OutputNBest()
if (system.options.nbest.only_distinct) {
string tgtPhrase = path->OutputTargetPhrase(system);
//cerr << "tgtPhrase=" << tgtPhrase << endl;
boost::hash<std::string> string_hash;
size_t hash = string_hash(tgtPhrase);
if (distinctHypos.insert(tgtPhrase).second) {
if (distinctHypos.insert(hash).second) {
ok = true;
}
}

View File

@ -24,7 +24,7 @@ namespace SCFG
KBestExtractor::KBestExtractor(const SCFG::Manager &mgr)
:m_mgr(mgr)
{
set<string> distinctHypos;
boost::unordered_set<size_t> distinctHypos;
ArcLists &arcLists = mgr.arcLists;
const Stack &lastStack = mgr.GetStacks().GetLastStack();
@ -53,8 +53,10 @@ KBestExtractor::KBestExtractor(const SCFG::Manager &mgr)
if (mgr.system.options.nbest.only_distinct) {
string tgtPhrase = path->Output();
//cerr << "tgtPhrase=" << tgtPhrase << endl;
boost::hash<std::string> string_hash;
size_t hash = string_hash(tgtPhrase);
if (distinctHypos.insert(tgtPhrase).second) {
if (distinctHypos.insert(hash).second) {
ok = true;
}
}