mosesdecoder/moses2/Word.cpp

137 lines
2.9 KiB
C++
Raw Normal View History

2015-10-23 18:46:35 +03:00
/*
* Word.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
2016-08-26 23:48:10 +03:00
#include <boost/functional/hash_fwd.hpp>
2015-12-18 21:38:24 +03:00
#include <sstream>
2015-10-26 17:58:59 +03:00
#include <vector>
2015-10-23 18:46:35 +03:00
#include "Word.h"
2015-11-18 14:08:32 +03:00
#include "System.h"
2015-11-11 19:23:49 +03:00
#include "legacy/Util2.h"
2015-10-24 14:39:15 +03:00
#include "util/murmur_hash.hh"
2015-10-23 18:46:35 +03:00
2015-10-26 17:58:59 +03:00
using namespace std;
2015-12-10 23:49:30 +03:00
namespace Moses2
{
2016-03-31 23:00:16 +03:00
Word::Word()
{
Init<const Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
2015-10-23 18:46:35 +03:00
}
2015-12-07 19:49:02 +03:00
Word::Word(const Word &copy)
{
2016-03-31 23:00:16 +03:00
memcpy(m_factors, copy.m_factors, sizeof(const Factor *) * MAX_NUM_FACTORS);
2015-12-07 19:49:02 +03:00
}
2016-03-31 23:00:16 +03:00
Word::~Word()
{
// TODO Auto-generated destructor stub
2015-10-23 18:46:35 +03:00
}
2016-03-31 23:00:16 +03:00
void Word::CreateFromString(FactorCollection &vocab, const System &system,
2017-02-01 03:27:14 +03:00
const std::string &str)
2015-10-26 17:58:59 +03:00
{
2016-03-31 23:00:16 +03:00
vector<string> toks = Tokenize(str, "|");
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
//cerr << "tok=" << tok << endl;
const Factor *factor = vocab.AddFactor(tok, system, false);
m_factors[i] = factor;
}
2016-08-26 19:19:39 +03:00
// null the rest
for (size_t i = toks.size(); i < MAX_NUM_FACTORS; ++i) {
2017-02-01 03:27:14 +03:00
m_factors[i] = NULL;
2016-08-26 19:19:39 +03:00
}
2015-10-26 17:58:59 +03:00
}
2015-10-24 14:39:15 +03:00
size_t Word::hash() const
{
2016-03-31 23:00:16 +03:00
uint64_t seed = 0;
size_t ret = util::MurmurHashNative(m_factors,
2017-02-01 03:27:14 +03:00
sizeof(Factor*) * MAX_NUM_FACTORS, seed);
2016-03-31 23:00:16 +03:00
return ret;
2015-10-24 14:39:15 +03:00
}
2016-08-26 23:48:10 +03:00
size_t Word::hash(const std::vector<FactorType> &factors) const
{
size_t seed = 0;
for (size_t i = 0; i < factors.size(); ++i) {
2017-02-01 03:27:14 +03:00
FactorType factorType = factors[i];
const Factor *factor = m_factors[factorType];
boost::hash_combine(seed, factor);
2016-08-26 23:48:10 +03:00
}
return seed;
}
2015-11-26 03:53:12 +03:00
int Word::Compare(const Word &compare) const
2015-10-24 14:39:15 +03:00
{
2015-11-26 03:53:12 +03:00
2016-03-31 23:00:16 +03:00
int cmp = memcmp(m_factors, compare.m_factors,
2017-02-01 03:27:14 +03:00
sizeof(Factor*) * MAX_NUM_FACTORS);
2016-03-31 23:00:16 +03:00
return cmp;
2015-11-26 03:53:12 +03:00
2016-03-31 23:00:16 +03:00
/*
int ret = m_factors[0]->GetString().compare(compare.m_factors[0]->GetString());
return ret;
*/
2015-10-24 14:39:15 +03:00
}
2015-10-26 19:32:47 +03:00
2015-11-25 20:35:22 +03:00
bool Word::operator<(const Word &compare) const
{
2016-03-31 23:00:16 +03:00
int cmp = Compare(compare);
return (cmp < 0);
2015-11-25 20:35:22 +03:00
}
2016-06-20 16:59:31 +03:00
std::string Word::Debug(const System &system) const
2015-10-26 19:32:47 +03:00
{
2016-06-20 16:59:31 +03:00
stringstream out;
2016-03-31 23:00:16 +03:00
bool outputAlready = false;
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) {
2016-04-21 18:22:09 +03:00
const Factor *factor = m_factors[i];
2016-03-31 23:00:16 +03:00
if (factor) {
if (outputAlready) {
out << "|";
}
out << *factor;
outputAlready = true;
}
}
2016-06-18 00:54:32 +03:00
2016-06-20 16:59:31 +03:00
return out.str();
2016-04-21 18:22:09 +03:00
}
2016-08-29 20:47:41 +03:00
void Word::OutputToStream(const System &system, std::ostream &out) const
2016-06-11 03:31:40 +03:00
{
2016-08-29 20:47:41 +03:00
const std::vector<FactorType> &factorTypes = system.options.output.factor_order;
out << *m_factors[ factorTypes[0] ];
for (size_t i = 1; i < factorTypes.size(); ++i) {
2017-02-01 03:27:14 +03:00
FactorType factorType = factorTypes[i];
2016-08-29 20:47:41 +03:00
const Factor *factor = m_factors[factorType];
out << "|" << *factor;
2016-06-11 03:31:40 +03:00
}
}
2015-12-18 21:38:24 +03:00
std::string Word::GetString(const FactorList &factorTypes) const
{
2016-03-31 23:00:16 +03:00
assert(factorTypes.size());
std::stringstream ret;
ret << m_factors[factorTypes[0]]->GetString();
for (size_t i = 1; i < factorTypes.size(); ++i) {
FactorType factorType = factorTypes[i];
ret << "|" << m_factors[factorType];
}
return ret.str();
2015-12-18 21:38:24 +03:00
}
2015-12-10 23:49:30 +03:00
}