use FactorCollection for vocab

This commit is contained in:
Hieu Hoang 2015-10-26 14:58:59 +00:00
parent 6d72539a22
commit 790c98360f
14 changed files with 47 additions and 130 deletions

View File

@ -32,6 +32,9 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.1025143565" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
@ -39,8 +42,11 @@
<option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="z"/>

View File

@ -1,18 +0,0 @@
/*
* Factor.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include "Factor.h"
Factor::Factor() {
// TODO Auto-generated constructor stub
}
Factor::~Factor() {
// TODO Auto-generated destructor stub
}

View File

@ -1,37 +0,0 @@
/*
* Factor.h
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#pragma once
#include "util/string_piece.hh"
#include "util/string_piece_hash.hh"
class Factor {
public:
Factor();
Factor(const StringPiece &string)
:m_string(string)
{}
virtual ~Factor();
size_t hash() const
{
size_t ret = hash_value(m_string);
return ret;
}
inline bool operator==(const Factor &compare) const {
return m_string == compare.m_string;
}
protected:
StringPiece m_string;
};

View File

@ -5,7 +5,6 @@ import option ;
import path ;
exe moses2 :
Factor.cpp
FeatureFunction.cpp
Hypothesis.cpp
InputPath.cpp
@ -22,7 +21,6 @@ import path ;
System.cpp
TargetPhrase.cpp
TargetPhrases.cpp
Vocab.cpp
Weights.cpp
Word.cpp

View File

@ -18,7 +18,9 @@ Manager::Manager(System &system, const std::string &inputStr)
,m_initRange(NOT_FOUND, NOT_FOUND)
,m_initPhrase(system.GetManagerPool(), system, 0)
{
m_input = Phrase::CreateFromString(m_pool, inputStr);
Moses::FactorCollection &vocab = system.GetVocab();
m_input = Phrase::CreateFromString(m_pool, vocab, inputStr);
m_inputPaths.Init(*m_input, system);
const std::vector<const PhraseTable*> &pts = system.GetPhraseTables();

View File

@ -7,13 +7,12 @@
#include <vector>
#include "Phrase.h"
#include "Word.h"
#include "Vocab.h"
#include "moses/Util.h"
#include "util/pool.hh"
using namespace std;
Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str)
Phrase *Phrase::CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str)
{
vector<string> toks = Moses::Tokenize(str);
size_t size = toks.size();
@ -21,14 +20,15 @@ Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str)
ret = new (pool.Allocate<Phrase>()) Phrase(pool, size);
ret->CreateFromString(toks);
ret->CreateFromString(vocab, toks);
return ret;
}
void Phrase::CreateFromString(const std::vector<std::string> &toks)
void Phrase::CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks)
{
for (size_t i = 0; i < m_size; ++i) {
Word &word = (*this)[i];
word.CreateFromString(vocab, toks[i]);
}
}

View File

@ -11,6 +11,7 @@
#include <string>
#include "Word.h"
#include "util/pool.hh"
#include "moses/FactorCollection.h"
class PhraseBase
{
@ -24,7 +25,7 @@ class SubPhrase;
class Phrase : public PhraseBase
{
public:
static Phrase *CreateFromString(util::Pool &pool, const std::string &str);
static Phrase *CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str);
Phrase(util::Pool &pool, size_t size);
virtual ~Phrase();
@ -45,7 +46,7 @@ protected:
size_t m_size;
Word *m_words;
void CreateFromString(const std::vector<std::string> &toks);
void CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks);
};

View File

@ -79,6 +79,8 @@ void PhraseTable::Load(System &system)
{
m_path = "/Users/hieu/workspace/experiment/issues/sample-models/phrase-model/phrase-table";
Moses::FactorCollection &vocab = system.GetVocab();
util::Pool tmpPool;
vector<string> toks;
Moses::InputFileStream strme(m_path);
@ -88,7 +90,7 @@ void PhraseTable::Load(System &system)
Moses::TokenizeMultiCharSeparator(toks, line, "|||");
assert(toks.size() >= 3);
Phrase *source = Phrase::CreateFromString(tmpPool, toks[0]);
Phrase *source = Phrase::CreateFromString(tmpPool, vocab, toks[0]);
TargetPhrase *target = TargetPhrase::CreateFromString(system.GetSystemPool(), system, toks[1]);
target->GetScores().CreateFromString(toks[2], *this, system);
m_root.AddRule(*source, target);

View File

@ -7,7 +7,6 @@
#pragma once
#include <vector>
#include "Vocab.h"
#include "Weights.h"
#include "util/pool.hh"
#include "moses/FactorCollection.h"
@ -39,9 +38,11 @@ public:
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
{ return m_statefulFeatureFunctions; }
Moses::FactorCollection &GetVocab() const
{ return m_vocab; }
protected:
Vocab m_vocab;
Moses::FactorCollection m_factors;
mutable Moses::FactorCollection m_vocab;
std::vector<const FeatureFunction*> m_featureFunctions;
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
std::vector<const PhraseTable*> m_phraseTables;

View File

@ -16,10 +16,12 @@ using namespace std;
TargetPhrase *TargetPhrase::CreateFromString(util::Pool &pool, System &system, const std::string &str)
{
Moses::FactorCollection &vocab = system.GetVocab();
vector<string> toks = Moses::Tokenize(str);
size_t size = toks.size();
TargetPhrase *ret = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, size);
ret->Phrase::CreateFromString(toks);
ret->Phrase::CreateFromString(vocab, toks);
return ret;
}

View File

@ -1,26 +0,0 @@
/*
* Vocab.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include "Vocab.h"
Vocab::Vocab() {
// TODO Auto-generated constructor stub
}
Vocab::~Vocab() {
// TODO Auto-generated destructor stub
}
const Factor *Vocab::AddFactor(const StringPiece &string)
{
Factor in(string);
std::pair<Set::iterator, bool> ret = m_set.insert(in);
const Factor &out = *ret.first;
return &out;
}

View File

@ -1,30 +0,0 @@
/*
* Vocab.h
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#ifndef VOCAB_H_
#define VOCAB_H_
#include <boost/unordered_set.hpp>
#include "util/string_piece.hh"
#include "moses/Util.h"
#include "Factor.h"
class Vocab {
public:
Vocab();
virtual ~Vocab();
const Factor *AddFactor(const StringPiece &string);
protected:
typedef boost::unordered_set<Factor, Moses::UnorderedComparer<Factor>, Moses::UnorderedComparer<Factor> > Set;
Set m_set;
};
#endif /* VOCAB_H_ */

View File

@ -5,27 +5,39 @@
* Author: hieu
*/
#include <vector>
#include "Word.h"
#include "Util.h"
#include "util/murmur_hash.hh"
using namespace std;
Word::Word() {
Init<Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
Init<const Moses::Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
}
Word::~Word() {
// TODO Auto-generated destructor stub
}
void Word::CreateFromString(Moses::FactorCollection &vocab, const std::string &str)
{
vector<string> toks = Moses::Tokenize(str, "|");
for (size_t i = 0; i < toks.size(); ++i) {
const Moses::Factor *factor = vocab.AddFactor(toks[i], false);
m_factors[i] = factor;
}
}
size_t Word::hash() const
{
uint64_t seed = 0;
size_t ret = util::MurmurHashNative(m_factors, sizeof(Factor*) * MAX_NUM_FACTORS, seed);
size_t ret = util::MurmurHashNative(m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS, seed);
return ret;
}
bool Word::operator==(const Word &compare) const
{
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Factor*) * MAX_NUM_FACTORS);
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS);
return cmp == 0;
}

View File

@ -7,18 +7,22 @@
#pragma once
#include "Factor.h"
#include "TypeDef.h"
#include "moses/Factor.h"
#include "moses/FactorCollection.h"
class Word {
public:
Word();
virtual ~Word();
void CreateFromString(Moses::FactorCollection &vocab, const std::string &str);
size_t hash() const;
bool operator==(const Word &compare) const;
protected:
Factor *m_factors[MAX_NUM_FACTORS];
const Moses::Factor *m_factors[MAX_NUM_FACTORS];
};