use FactorCollection for vocab

This commit is contained in:
Hieu Hoang 2015-10-26 14:58:59 +00:00
parent 6d72539a22
commit 790c98360f
14 changed files with 47 additions and 130 deletions

View File

@ -32,6 +32,9 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/> <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/> <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option> </option>
<option id="gnu.cpp.compiler.option.preprocessor.def.1025143565" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/> <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool> </tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/> <tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
@ -39,8 +42,11 @@
<option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths"> <option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/> <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/> <listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option> </option>
<option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs"> <option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="moses"/> <listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="util"/> <listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="z"/> <listOptionValue builtIn="false" value="z"/>

View File

@ -1,18 +0,0 @@
/*
* Factor.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include "Factor.h"
Factor::Factor() {
// TODO Auto-generated constructor stub
}
Factor::~Factor() {
// TODO Auto-generated destructor stub
}

View File

@ -1,37 +0,0 @@
/*
* Factor.h
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#pragma once
#include "util/string_piece.hh"
#include "util/string_piece_hash.hh"
class Factor {
public:
Factor();
Factor(const StringPiece &string)
:m_string(string)
{}
virtual ~Factor();
size_t hash() const
{
size_t ret = hash_value(m_string);
return ret;
}
inline bool operator==(const Factor &compare) const {
return m_string == compare.m_string;
}
protected:
StringPiece m_string;
};

View File

@ -5,7 +5,6 @@ import option ;
import path ; import path ;
exe moses2 : exe moses2 :
Factor.cpp
FeatureFunction.cpp FeatureFunction.cpp
Hypothesis.cpp Hypothesis.cpp
InputPath.cpp InputPath.cpp
@ -22,7 +21,6 @@ import path ;
System.cpp System.cpp
TargetPhrase.cpp TargetPhrase.cpp
TargetPhrases.cpp TargetPhrases.cpp
Vocab.cpp
Weights.cpp Weights.cpp
Word.cpp Word.cpp

View File

@ -18,7 +18,9 @@ Manager::Manager(System &system, const std::string &inputStr)
,m_initRange(NOT_FOUND, NOT_FOUND) ,m_initRange(NOT_FOUND, NOT_FOUND)
,m_initPhrase(system.GetManagerPool(), system, 0) ,m_initPhrase(system.GetManagerPool(), system, 0)
{ {
m_input = Phrase::CreateFromString(m_pool, inputStr); Moses::FactorCollection &vocab = system.GetVocab();
m_input = Phrase::CreateFromString(m_pool, vocab, inputStr);
m_inputPaths.Init(*m_input, system); m_inputPaths.Init(*m_input, system);
const std::vector<const PhraseTable*> &pts = system.GetPhraseTables(); const std::vector<const PhraseTable*> &pts = system.GetPhraseTables();

View File

@ -7,13 +7,12 @@
#include <vector> #include <vector>
#include "Phrase.h" #include "Phrase.h"
#include "Word.h" #include "Word.h"
#include "Vocab.h"
#include "moses/Util.h" #include "moses/Util.h"
#include "util/pool.hh" #include "util/pool.hh"
using namespace std; using namespace std;
Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str) Phrase *Phrase::CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str)
{ {
vector<string> toks = Moses::Tokenize(str); vector<string> toks = Moses::Tokenize(str);
size_t size = toks.size(); size_t size = toks.size();
@ -21,14 +20,15 @@ Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str)
ret = new (pool.Allocate<Phrase>()) Phrase(pool, size); ret = new (pool.Allocate<Phrase>()) Phrase(pool, size);
ret->CreateFromString(toks); ret->CreateFromString(vocab, toks);
return ret; return ret;
} }
void Phrase::CreateFromString(const std::vector<std::string> &toks) void Phrase::CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks)
{ {
for (size_t i = 0; i < m_size; ++i) { for (size_t i = 0; i < m_size; ++i) {
Word &word = (*this)[i]; Word &word = (*this)[i];
word.CreateFromString(vocab, toks[i]);
} }
} }

View File

@ -11,6 +11,7 @@
#include <string> #include <string>
#include "Word.h" #include "Word.h"
#include "util/pool.hh" #include "util/pool.hh"
#include "moses/FactorCollection.h"
class PhraseBase class PhraseBase
{ {
@ -24,7 +25,7 @@ class SubPhrase;
class Phrase : public PhraseBase class Phrase : public PhraseBase
{ {
public: public:
static Phrase *CreateFromString(util::Pool &pool, const std::string &str); static Phrase *CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str);
Phrase(util::Pool &pool, size_t size); Phrase(util::Pool &pool, size_t size);
virtual ~Phrase(); virtual ~Phrase();
@ -45,7 +46,7 @@ protected:
size_t m_size; size_t m_size;
Word *m_words; Word *m_words;
void CreateFromString(const std::vector<std::string> &toks); void CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks);
}; };

View File

@ -79,6 +79,8 @@ void PhraseTable::Load(System &system)
{ {
m_path = "/Users/hieu/workspace/experiment/issues/sample-models/phrase-model/phrase-table"; m_path = "/Users/hieu/workspace/experiment/issues/sample-models/phrase-model/phrase-table";
Moses::FactorCollection &vocab = system.GetVocab();
util::Pool tmpPool; util::Pool tmpPool;
vector<string> toks; vector<string> toks;
Moses::InputFileStream strme(m_path); Moses::InputFileStream strme(m_path);
@ -88,7 +90,7 @@ void PhraseTable::Load(System &system)
Moses::TokenizeMultiCharSeparator(toks, line, "|||"); Moses::TokenizeMultiCharSeparator(toks, line, "|||");
assert(toks.size() >= 3); assert(toks.size() >= 3);
Phrase *source = Phrase::CreateFromString(tmpPool, toks[0]); Phrase *source = Phrase::CreateFromString(tmpPool, vocab, toks[0]);
TargetPhrase *target = TargetPhrase::CreateFromString(system.GetSystemPool(), system, toks[1]); TargetPhrase *target = TargetPhrase::CreateFromString(system.GetSystemPool(), system, toks[1]);
target->GetScores().CreateFromString(toks[2], *this, system); target->GetScores().CreateFromString(toks[2], *this, system);
m_root.AddRule(*source, target); m_root.AddRule(*source, target);

View File

@ -7,7 +7,6 @@
#pragma once #pragma once
#include <vector> #include <vector>
#include "Vocab.h"
#include "Weights.h" #include "Weights.h"
#include "util/pool.hh" #include "util/pool.hh"
#include "moses/FactorCollection.h" #include "moses/FactorCollection.h"
@ -39,9 +38,11 @@ public:
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
{ return m_statefulFeatureFunctions; } { return m_statefulFeatureFunctions; }
Moses::FactorCollection &GetVocab() const
{ return m_vocab; }
protected: protected:
Vocab m_vocab; mutable Moses::FactorCollection m_vocab;
Moses::FactorCollection m_factors;
std::vector<const FeatureFunction*> m_featureFunctions; std::vector<const FeatureFunction*> m_featureFunctions;
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions; std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
std::vector<const PhraseTable*> m_phraseTables; std::vector<const PhraseTable*> m_phraseTables;

View File

@ -16,10 +16,12 @@ using namespace std;
TargetPhrase *TargetPhrase::CreateFromString(util::Pool &pool, System &system, const std::string &str) TargetPhrase *TargetPhrase::CreateFromString(util::Pool &pool, System &system, const std::string &str)
{ {
Moses::FactorCollection &vocab = system.GetVocab();
vector<string> toks = Moses::Tokenize(str); vector<string> toks = Moses::Tokenize(str);
size_t size = toks.size(); size_t size = toks.size();
TargetPhrase *ret = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, size); TargetPhrase *ret = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, size);
ret->Phrase::CreateFromString(toks); ret->Phrase::CreateFromString(vocab, toks);
return ret; return ret;
} }

View File

@ -1,26 +0,0 @@
/*
* Vocab.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include "Vocab.h"
Vocab::Vocab() {
// TODO Auto-generated constructor stub
}
Vocab::~Vocab() {
// TODO Auto-generated destructor stub
}
const Factor *Vocab::AddFactor(const StringPiece &string)
{
Factor in(string);
std::pair<Set::iterator, bool> ret = m_set.insert(in);
const Factor &out = *ret.first;
return &out;
}

View File

@ -1,30 +0,0 @@
/*
* Vocab.h
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#ifndef VOCAB_H_
#define VOCAB_H_
#include <boost/unordered_set.hpp>
#include "util/string_piece.hh"
#include "moses/Util.h"
#include "Factor.h"
class Vocab {
public:
Vocab();
virtual ~Vocab();
const Factor *AddFactor(const StringPiece &string);
protected:
typedef boost::unordered_set<Factor, Moses::UnorderedComparer<Factor>, Moses::UnorderedComparer<Factor> > Set;
Set m_set;
};
#endif /* VOCAB_H_ */

View File

@ -5,27 +5,39 @@
* Author: hieu * Author: hieu
*/ */
#include <vector>
#include "Word.h" #include "Word.h"
#include "Util.h" #include "Util.h"
#include "util/murmur_hash.hh" #include "util/murmur_hash.hh"
using namespace std;
Word::Word() { Word::Word() {
Init<Factor*>(m_factors, MAX_NUM_FACTORS, NULL); Init<const Moses::Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
} }
Word::~Word() { Word::~Word() {
// TODO Auto-generated destructor stub // TODO Auto-generated destructor stub
} }
void Word::CreateFromString(Moses::FactorCollection &vocab, const std::string &str)
{
vector<string> toks = Moses::Tokenize(str, "|");
for (size_t i = 0; i < toks.size(); ++i) {
const Moses::Factor *factor = vocab.AddFactor(toks[i], false);
m_factors[i] = factor;
}
}
size_t Word::hash() const size_t Word::hash() const
{ {
uint64_t seed = 0; uint64_t seed = 0;
size_t ret = util::MurmurHashNative(m_factors, sizeof(Factor*) * MAX_NUM_FACTORS, seed); size_t ret = util::MurmurHashNative(m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS, seed);
return ret; return ret;
} }
bool Word::operator==(const Word &compare) const bool Word::operator==(const Word &compare) const
{ {
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Factor*) * MAX_NUM_FACTORS); int cmp = memcmp(m_factors, compare.m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS);
return cmp == 0; return cmp == 0;
} }

View File

@ -7,18 +7,22 @@
#pragma once #pragma once
#include "Factor.h"
#include "TypeDef.h" #include "TypeDef.h"
#include "moses/Factor.h"
#include "moses/FactorCollection.h"
class Word { class Word {
public: public:
Word(); Word();
virtual ~Word(); virtual ~Word();
void CreateFromString(Moses::FactorCollection &vocab, const std::string &str);
size_t hash() const; size_t hash() const;
bool operator==(const Word &compare) const; bool operator==(const Word &compare) const;
protected: protected:
Factor *m_factors[MAX_NUM_FACTORS]; const Moses::Factor *m_factors[MAX_NUM_FACTORS];
}; };