mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-05 02:22:21 +03:00
use FactorCollection for vocab
This commit is contained in:
parent
6d72539a22
commit
790c98360f
@ -32,6 +32,9 @@
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.1025143565" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
@ -39,8 +42,11 @@
|
||||
<option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/moses/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/util/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="boost_system"/>
|
||||
<listOptionValue builtIn="false" value="boost_thread"/>
|
||||
<listOptionValue builtIn="false" value="moses"/>
|
||||
<listOptionValue builtIn="false" value="util"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
* Factor.cpp
|
||||
*
|
||||
* Created on: 23 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Factor.h"
|
||||
|
||||
Factor::Factor() {
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Factor::~Factor() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Factor.h
|
||||
*
|
||||
* Created on: 23 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/string_piece.hh"
|
||||
#include "util/string_piece_hash.hh"
|
||||
|
||||
class Factor {
|
||||
public:
|
||||
Factor();
|
||||
|
||||
Factor(const StringPiece &string)
|
||||
:m_string(string)
|
||||
{}
|
||||
|
||||
virtual ~Factor();
|
||||
|
||||
size_t hash() const
|
||||
{
|
||||
size_t ret = hash_value(m_string);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline bool operator==(const Factor &compare) const {
|
||||
return m_string == compare.m_string;
|
||||
}
|
||||
|
||||
protected:
|
||||
StringPiece m_string;
|
||||
|
||||
};
|
||||
|
@ -5,7 +5,6 @@ import option ;
|
||||
import path ;
|
||||
|
||||
exe moses2 :
|
||||
Factor.cpp
|
||||
FeatureFunction.cpp
|
||||
Hypothesis.cpp
|
||||
InputPath.cpp
|
||||
@ -22,7 +21,6 @@ import path ;
|
||||
System.cpp
|
||||
TargetPhrase.cpp
|
||||
TargetPhrases.cpp
|
||||
Vocab.cpp
|
||||
Weights.cpp
|
||||
Word.cpp
|
||||
|
||||
|
@ -18,7 +18,9 @@ Manager::Manager(System &system, const std::string &inputStr)
|
||||
,m_initRange(NOT_FOUND, NOT_FOUND)
|
||||
,m_initPhrase(system.GetManagerPool(), system, 0)
|
||||
{
|
||||
m_input = Phrase::CreateFromString(m_pool, inputStr);
|
||||
Moses::FactorCollection &vocab = system.GetVocab();
|
||||
|
||||
m_input = Phrase::CreateFromString(m_pool, vocab, inputStr);
|
||||
m_inputPaths.Init(*m_input, system);
|
||||
|
||||
const std::vector<const PhraseTable*> &pts = system.GetPhraseTables();
|
||||
|
@ -7,13 +7,12 @@
|
||||
#include <vector>
|
||||
#include "Phrase.h"
|
||||
#include "Word.h"
|
||||
#include "Vocab.h"
|
||||
#include "moses/Util.h"
|
||||
#include "util/pool.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str)
|
||||
Phrase *Phrase::CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str)
|
||||
{
|
||||
vector<string> toks = Moses::Tokenize(str);
|
||||
size_t size = toks.size();
|
||||
@ -21,14 +20,15 @@ Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str)
|
||||
|
||||
ret = new (pool.Allocate<Phrase>()) Phrase(pool, size);
|
||||
|
||||
ret->CreateFromString(toks);
|
||||
ret->CreateFromString(vocab, toks);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Phrase::CreateFromString(const std::vector<std::string> &toks)
|
||||
void Phrase::CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks)
|
||||
{
|
||||
for (size_t i = 0; i < m_size; ++i) {
|
||||
Word &word = (*this)[i];
|
||||
word.CreateFromString(vocab, toks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <string>
|
||||
#include "Word.h"
|
||||
#include "util/pool.hh"
|
||||
#include "moses/FactorCollection.h"
|
||||
|
||||
class PhraseBase
|
||||
{
|
||||
@ -24,7 +25,7 @@ class SubPhrase;
|
||||
class Phrase : public PhraseBase
|
||||
{
|
||||
public:
|
||||
static Phrase *CreateFromString(util::Pool &pool, const std::string &str);
|
||||
static Phrase *CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str);
|
||||
|
||||
Phrase(util::Pool &pool, size_t size);
|
||||
virtual ~Phrase();
|
||||
@ -45,7 +46,7 @@ protected:
|
||||
size_t m_size;
|
||||
Word *m_words;
|
||||
|
||||
void CreateFromString(const std::vector<std::string> &toks);
|
||||
void CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks);
|
||||
|
||||
};
|
||||
|
||||
|
@ -79,6 +79,8 @@ void PhraseTable::Load(System &system)
|
||||
{
|
||||
m_path = "/Users/hieu/workspace/experiment/issues/sample-models/phrase-model/phrase-table";
|
||||
|
||||
Moses::FactorCollection &vocab = system.GetVocab();
|
||||
|
||||
util::Pool tmpPool;
|
||||
vector<string> toks;
|
||||
Moses::InputFileStream strme(m_path);
|
||||
@ -88,7 +90,7 @@ void PhraseTable::Load(System &system)
|
||||
Moses::TokenizeMultiCharSeparator(toks, line, "|||");
|
||||
assert(toks.size() >= 3);
|
||||
|
||||
Phrase *source = Phrase::CreateFromString(tmpPool, toks[0]);
|
||||
Phrase *source = Phrase::CreateFromString(tmpPool, vocab, toks[0]);
|
||||
TargetPhrase *target = TargetPhrase::CreateFromString(system.GetSystemPool(), system, toks[1]);
|
||||
target->GetScores().CreateFromString(toks[2], *this, system);
|
||||
m_root.AddRule(*source, target);
|
||||
|
@ -7,7 +7,6 @@
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include "Vocab.h"
|
||||
#include "Weights.h"
|
||||
#include "util/pool.hh"
|
||||
#include "moses/FactorCollection.h"
|
||||
@ -39,9 +38,11 @@ public:
|
||||
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
|
||||
{ return m_statefulFeatureFunctions; }
|
||||
|
||||
Moses::FactorCollection &GetVocab() const
|
||||
{ return m_vocab; }
|
||||
|
||||
protected:
|
||||
Vocab m_vocab;
|
||||
Moses::FactorCollection m_factors;
|
||||
mutable Moses::FactorCollection m_vocab;
|
||||
std::vector<const FeatureFunction*> m_featureFunctions;
|
||||
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
|
||||
std::vector<const PhraseTable*> m_phraseTables;
|
||||
|
@ -16,10 +16,12 @@ using namespace std;
|
||||
|
||||
TargetPhrase *TargetPhrase::CreateFromString(util::Pool &pool, System &system, const std::string &str)
|
||||
{
|
||||
Moses::FactorCollection &vocab = system.GetVocab();
|
||||
|
||||
vector<string> toks = Moses::Tokenize(str);
|
||||
size_t size = toks.size();
|
||||
TargetPhrase *ret = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, size);
|
||||
ret->Phrase::CreateFromString(toks);
|
||||
ret->Phrase::CreateFromString(vocab, toks);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1,26 +0,0 @@
|
||||
/*
|
||||
* Vocab.cpp
|
||||
*
|
||||
* Created on: 23 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "Vocab.h"
|
||||
|
||||
Vocab::Vocab() {
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Vocab::~Vocab() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
const Factor *Vocab::AddFactor(const StringPiece &string)
|
||||
{
|
||||
Factor in(string);
|
||||
std::pair<Set::iterator, bool> ret = m_set.insert(in);
|
||||
const Factor &out = *ret.first;
|
||||
return &out;
|
||||
}
|
||||
|
@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Vocab.h
|
||||
*
|
||||
* Created on: 23 Oct 2015
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#ifndef VOCAB_H_
|
||||
#define VOCAB_H_
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include "util/string_piece.hh"
|
||||
#include "moses/Util.h"
|
||||
#include "Factor.h"
|
||||
|
||||
class Vocab {
|
||||
public:
|
||||
Vocab();
|
||||
virtual ~Vocab();
|
||||
|
||||
const Factor *AddFactor(const StringPiece &string);
|
||||
|
||||
protected:
|
||||
typedef boost::unordered_set<Factor, Moses::UnorderedComparer<Factor>, Moses::UnorderedComparer<Factor> > Set;
|
||||
Set m_set;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif /* VOCAB_H_ */
|
@ -5,27 +5,39 @@
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "Word.h"
|
||||
#include "Util.h"
|
||||
#include "util/murmur_hash.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Word::Word() {
|
||||
Init<Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
|
||||
Init<const Moses::Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
|
||||
}
|
||||
|
||||
Word::~Word() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Word::CreateFromString(Moses::FactorCollection &vocab, const std::string &str)
|
||||
{
|
||||
vector<string> toks = Moses::Tokenize(str, "|");
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
const Moses::Factor *factor = vocab.AddFactor(toks[i], false);
|
||||
m_factors[i] = factor;
|
||||
}
|
||||
}
|
||||
|
||||
size_t Word::hash() const
|
||||
{
|
||||
uint64_t seed = 0;
|
||||
size_t ret = util::MurmurHashNative(m_factors, sizeof(Factor*) * MAX_NUM_FACTORS, seed);
|
||||
size_t ret = util::MurmurHashNative(m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS, seed);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Word::operator==(const Word &compare) const
|
||||
{
|
||||
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Factor*) * MAX_NUM_FACTORS);
|
||||
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS);
|
||||
return cmp == 0;
|
||||
}
|
||||
|
@ -7,18 +7,22 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Factor.h"
|
||||
#include "TypeDef.h"
|
||||
#include "moses/Factor.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
|
||||
class Word {
|
||||
public:
|
||||
Word();
|
||||
virtual ~Word();
|
||||
|
||||
void CreateFromString(Moses::FactorCollection &vocab, const std::string &str);
|
||||
|
||||
size_t hash() const;
|
||||
bool operator==(const Word &compare) const;
|
||||
|
||||
protected:
|
||||
Factor *m_factors[MAX_NUM_FACTORS];
|
||||
const Moses::Factor *m_factors[MAX_NUM_FACTORS];
|
||||
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user