2015-10-23 18:46:35 +03:00
|
|
|
/*
|
|
|
|
* Word.cpp
|
|
|
|
*
|
|
|
|
* Created on: 23 Oct 2015
|
|
|
|
* Author: hieu
|
|
|
|
*/
|
|
|
|
|
2015-12-18 21:38:24 +03:00
|
|
|
#include <sstream>
|
2015-10-26 17:58:59 +03:00
|
|
|
#include <vector>
|
2015-10-23 18:46:35 +03:00
|
|
|
#include "Word.h"
|
2015-11-18 14:08:32 +03:00
|
|
|
#include "System.h"
|
2015-11-11 19:23:49 +03:00
|
|
|
#include "legacy/Util2.h"
|
2015-10-24 14:39:15 +03:00
|
|
|
#include "util/murmur_hash.hh"
|
2015-10-23 18:46:35 +03:00
|
|
|
|
2015-10-26 17:58:59 +03:00
|
|
|
using namespace std;
|
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
namespace Moses2
|
|
|
|
{
|
|
|
|
|
2015-10-23 18:46:35 +03:00
|
|
|
Word::Word() {
|
2015-11-13 01:51:13 +03:00
|
|
|
Init<const Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
|
2015-10-23 18:46:35 +03:00
|
|
|
}
|
|
|
|
|
2015-12-07 19:49:02 +03:00
|
|
|
Word::Word(const Word ©)
|
|
|
|
{
|
|
|
|
memcpy(m_factors, copy.m_factors, sizeof(const Factor *) * MAX_NUM_FACTORS);
|
|
|
|
}
|
|
|
|
|
2015-10-23 18:46:35 +03:00
|
|
|
Word::~Word() {
|
|
|
|
// TODO Auto-generated destructor stub
|
|
|
|
}
|
|
|
|
|
2015-11-18 14:08:32 +03:00
|
|
|
void Word::CreateFromString(FactorCollection &vocab, const System &system, const std::string &str)
|
2015-10-26 17:58:59 +03:00
|
|
|
{
|
2015-11-12 02:28:18 +03:00
|
|
|
vector<string> toks = Tokenize(str, "|");
|
2015-10-26 17:58:59 +03:00
|
|
|
for (size_t i = 0; i < toks.size(); ++i) {
|
2015-10-26 19:02:07 +03:00
|
|
|
const string &tok = toks[i];
|
|
|
|
//cerr << "tok=" << tok << endl;
|
2015-11-18 16:07:16 +03:00
|
|
|
const Factor *factor = vocab.AddFactor(tok, system, false);
|
2015-10-26 17:58:59 +03:00
|
|
|
m_factors[i] = factor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-24 14:39:15 +03:00
|
|
|
size_t Word::hash() const
|
|
|
|
{
|
|
|
|
uint64_t seed = 0;
|
2015-11-13 01:51:13 +03:00
|
|
|
size_t ret = util::MurmurHashNative(m_factors, sizeof(Factor*) * MAX_NUM_FACTORS, seed);
|
2015-10-24 14:39:15 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-11-26 03:53:12 +03:00
|
|
|
int Word::Compare(const Word &compare) const
|
2015-10-24 14:39:15 +03:00
|
|
|
{
|
2015-11-26 03:53:12 +03:00
|
|
|
|
2015-11-13 01:51:13 +03:00
|
|
|
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Factor*) * MAX_NUM_FACTORS);
|
2015-11-26 03:53:12 +03:00
|
|
|
return cmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
int ret = m_factors[0]->GetString().compare(compare.m_factors[0]->GetString());
|
|
|
|
return ret;
|
|
|
|
*/
|
2015-10-24 14:39:15 +03:00
|
|
|
}
|
2015-10-26 19:32:47 +03:00
|
|
|
|
2015-11-25 20:35:22 +03:00
|
|
|
bool Word::operator<(const Word &compare) const
|
|
|
|
{
|
2015-11-26 03:53:12 +03:00
|
|
|
int cmp = Compare(compare);
|
2015-11-25 20:35:22 +03:00
|
|
|
return (cmp < 0);
|
|
|
|
}
|
|
|
|
|
2015-10-26 19:32:47 +03:00
|
|
|
std::ostream& operator<<(std::ostream &out, const Word &obj)
|
|
|
|
{
|
2015-10-26 21:42:42 +03:00
|
|
|
bool outputAlready = false;
|
2015-10-26 19:32:47 +03:00
|
|
|
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) {
|
2015-11-13 01:51:13 +03:00
|
|
|
const Factor *factor = obj.m_factors[i];
|
2015-10-26 21:42:42 +03:00
|
|
|
if (factor) {
|
|
|
|
if (outputAlready) {
|
|
|
|
out << "|";
|
|
|
|
}
|
|
|
|
out << *factor;
|
|
|
|
outputAlready = true;
|
|
|
|
}
|
2015-10-26 19:32:47 +03:00
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
2015-12-10 23:49:30 +03:00
|
|
|
|
2015-12-18 21:38:24 +03:00
|
|
|
std::string Word::GetString(const FactorList &factorTypes) const
|
|
|
|
{
|
|
|
|
assert(factorTypes.size());
|
|
|
|
std::stringstream ret;
|
|
|
|
|
|
|
|
ret << m_factors[ factorTypes[0] ]->GetString();
|
|
|
|
for (size_t i = 1; i < factorTypes.size(); ++i) {
|
|
|
|
FactorType factorType = factorTypes[i];
|
|
|
|
ret << "|" << m_factors[factorType];
|
|
|
|
}
|
|
|
|
return ret.str();
|
|
|
|
}
|
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
}
|
|
|
|
|