mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-08 04:27:53 +03:00
use FactorCollection for vocab
This commit is contained in:
parent
6d72539a22
commit
790c98360f
@ -32,6 +32,9 @@
|
|||||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||||
</option>
|
</option>
|
||||||
|
<option id="gnu.cpp.compiler.option.preprocessor.def.1025143565" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
|
||||||
|
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
|
||||||
|
</option>
|
||||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||||
</tool>
|
</tool>
|
||||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||||
@ -39,8 +42,11 @@
|
|||||||
<option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
<option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/moses/Debug""/>
|
<listOptionValue builtIn="false" value=""${workspace_loc:}/moses/Debug""/>
|
||||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/util/Debug""/>
|
<listOptionValue builtIn="false" value=""${workspace_loc:}/util/Debug""/>
|
||||||
|
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||||
</option>
|
</option>
|
||||||
<option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
<option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||||
|
<listOptionValue builtIn="false" value="boost_system"/>
|
||||||
|
<listOptionValue builtIn="false" value="boost_thread"/>
|
||||||
<listOptionValue builtIn="false" value="moses"/>
|
<listOptionValue builtIn="false" value="moses"/>
|
||||||
<listOptionValue builtIn="false" value="util"/>
|
<listOptionValue builtIn="false" value="util"/>
|
||||||
<listOptionValue builtIn="false" value="z"/>
|
<listOptionValue builtIn="false" value="z"/>
|
||||||
|
@ -1,18 +0,0 @@
|
|||||||
/*
|
|
||||||
* Factor.cpp
|
|
||||||
*
|
|
||||||
* Created on: 23 Oct 2015
|
|
||||||
* Author: hieu
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "Factor.h"
|
|
||||||
|
|
||||||
Factor::Factor() {
|
|
||||||
// TODO Auto-generated constructor stub
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Factor::~Factor() {
|
|
||||||
// TODO Auto-generated destructor stub
|
|
||||||
}
|
|
||||||
|
|
@ -1,37 +0,0 @@
|
|||||||
/*
|
|
||||||
* Factor.h
|
|
||||||
*
|
|
||||||
* Created on: 23 Oct 2015
|
|
||||||
* Author: hieu
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "util/string_piece.hh"
|
|
||||||
#include "util/string_piece_hash.hh"
|
|
||||||
|
|
||||||
class Factor {
|
|
||||||
public:
|
|
||||||
Factor();
|
|
||||||
|
|
||||||
Factor(const StringPiece &string)
|
|
||||||
:m_string(string)
|
|
||||||
{}
|
|
||||||
|
|
||||||
virtual ~Factor();
|
|
||||||
|
|
||||||
size_t hash() const
|
|
||||||
{
|
|
||||||
size_t ret = hash_value(m_string);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool operator==(const Factor &compare) const {
|
|
||||||
return m_string == compare.m_string;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
StringPiece m_string;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
@ -5,7 +5,6 @@ import option ;
|
|||||||
import path ;
|
import path ;
|
||||||
|
|
||||||
exe moses2 :
|
exe moses2 :
|
||||||
Factor.cpp
|
|
||||||
FeatureFunction.cpp
|
FeatureFunction.cpp
|
||||||
Hypothesis.cpp
|
Hypothesis.cpp
|
||||||
InputPath.cpp
|
InputPath.cpp
|
||||||
@ -22,7 +21,6 @@ import path ;
|
|||||||
System.cpp
|
System.cpp
|
||||||
TargetPhrase.cpp
|
TargetPhrase.cpp
|
||||||
TargetPhrases.cpp
|
TargetPhrases.cpp
|
||||||
Vocab.cpp
|
|
||||||
Weights.cpp
|
Weights.cpp
|
||||||
Word.cpp
|
Word.cpp
|
||||||
|
|
||||||
|
@ -18,7 +18,9 @@ Manager::Manager(System &system, const std::string &inputStr)
|
|||||||
,m_initRange(NOT_FOUND, NOT_FOUND)
|
,m_initRange(NOT_FOUND, NOT_FOUND)
|
||||||
,m_initPhrase(system.GetManagerPool(), system, 0)
|
,m_initPhrase(system.GetManagerPool(), system, 0)
|
||||||
{
|
{
|
||||||
m_input = Phrase::CreateFromString(m_pool, inputStr);
|
Moses::FactorCollection &vocab = system.GetVocab();
|
||||||
|
|
||||||
|
m_input = Phrase::CreateFromString(m_pool, vocab, inputStr);
|
||||||
m_inputPaths.Init(*m_input, system);
|
m_inputPaths.Init(*m_input, system);
|
||||||
|
|
||||||
const std::vector<const PhraseTable*> &pts = system.GetPhraseTables();
|
const std::vector<const PhraseTable*> &pts = system.GetPhraseTables();
|
||||||
|
@ -7,13 +7,12 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "Phrase.h"
|
#include "Phrase.h"
|
||||||
#include "Word.h"
|
#include "Word.h"
|
||||||
#include "Vocab.h"
|
|
||||||
#include "moses/Util.h"
|
#include "moses/Util.h"
|
||||||
#include "util/pool.hh"
|
#include "util/pool.hh"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str)
|
Phrase *Phrase::CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str)
|
||||||
{
|
{
|
||||||
vector<string> toks = Moses::Tokenize(str);
|
vector<string> toks = Moses::Tokenize(str);
|
||||||
size_t size = toks.size();
|
size_t size = toks.size();
|
||||||
@ -21,14 +20,15 @@ Phrase *Phrase::CreateFromString(util::Pool &pool, const std::string &str)
|
|||||||
|
|
||||||
ret = new (pool.Allocate<Phrase>()) Phrase(pool, size);
|
ret = new (pool.Allocate<Phrase>()) Phrase(pool, size);
|
||||||
|
|
||||||
ret->CreateFromString(toks);
|
ret->CreateFromString(vocab, toks);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Phrase::CreateFromString(const std::vector<std::string> &toks)
|
void Phrase::CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < m_size; ++i) {
|
for (size_t i = 0; i < m_size; ++i) {
|
||||||
Word &word = (*this)[i];
|
Word &word = (*this)[i];
|
||||||
|
word.CreateFromString(vocab, toks[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include "Word.h"
|
#include "Word.h"
|
||||||
#include "util/pool.hh"
|
#include "util/pool.hh"
|
||||||
|
#include "moses/FactorCollection.h"
|
||||||
|
|
||||||
class PhraseBase
|
class PhraseBase
|
||||||
{
|
{
|
||||||
@ -24,7 +25,7 @@ class SubPhrase;
|
|||||||
class Phrase : public PhraseBase
|
class Phrase : public PhraseBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static Phrase *CreateFromString(util::Pool &pool, const std::string &str);
|
static Phrase *CreateFromString(util::Pool &pool, Moses::FactorCollection &vocab, const std::string &str);
|
||||||
|
|
||||||
Phrase(util::Pool &pool, size_t size);
|
Phrase(util::Pool &pool, size_t size);
|
||||||
virtual ~Phrase();
|
virtual ~Phrase();
|
||||||
@ -45,7 +46,7 @@ protected:
|
|||||||
size_t m_size;
|
size_t m_size;
|
||||||
Word *m_words;
|
Word *m_words;
|
||||||
|
|
||||||
void CreateFromString(const std::vector<std::string> &toks);
|
void CreateFromString(Moses::FactorCollection &vocab, const std::vector<std::string> &toks);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -79,6 +79,8 @@ void PhraseTable::Load(System &system)
|
|||||||
{
|
{
|
||||||
m_path = "/Users/hieu/workspace/experiment/issues/sample-models/phrase-model/phrase-table";
|
m_path = "/Users/hieu/workspace/experiment/issues/sample-models/phrase-model/phrase-table";
|
||||||
|
|
||||||
|
Moses::FactorCollection &vocab = system.GetVocab();
|
||||||
|
|
||||||
util::Pool tmpPool;
|
util::Pool tmpPool;
|
||||||
vector<string> toks;
|
vector<string> toks;
|
||||||
Moses::InputFileStream strme(m_path);
|
Moses::InputFileStream strme(m_path);
|
||||||
@ -88,7 +90,7 @@ void PhraseTable::Load(System &system)
|
|||||||
Moses::TokenizeMultiCharSeparator(toks, line, "|||");
|
Moses::TokenizeMultiCharSeparator(toks, line, "|||");
|
||||||
assert(toks.size() >= 3);
|
assert(toks.size() >= 3);
|
||||||
|
|
||||||
Phrase *source = Phrase::CreateFromString(tmpPool, toks[0]);
|
Phrase *source = Phrase::CreateFromString(tmpPool, vocab, toks[0]);
|
||||||
TargetPhrase *target = TargetPhrase::CreateFromString(system.GetSystemPool(), system, toks[1]);
|
TargetPhrase *target = TargetPhrase::CreateFromString(system.GetSystemPool(), system, toks[1]);
|
||||||
target->GetScores().CreateFromString(toks[2], *this, system);
|
target->GetScores().CreateFromString(toks[2], *this, system);
|
||||||
m_root.AddRule(*source, target);
|
m_root.AddRule(*source, target);
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "Vocab.h"
|
|
||||||
#include "Weights.h"
|
#include "Weights.h"
|
||||||
#include "util/pool.hh"
|
#include "util/pool.hh"
|
||||||
#include "moses/FactorCollection.h"
|
#include "moses/FactorCollection.h"
|
||||||
@ -39,9 +38,11 @@ public:
|
|||||||
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
|
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
|
||||||
{ return m_statefulFeatureFunctions; }
|
{ return m_statefulFeatureFunctions; }
|
||||||
|
|
||||||
|
Moses::FactorCollection &GetVocab() const
|
||||||
|
{ return m_vocab; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Vocab m_vocab;
|
mutable Moses::FactorCollection m_vocab;
|
||||||
Moses::FactorCollection m_factors;
|
|
||||||
std::vector<const FeatureFunction*> m_featureFunctions;
|
std::vector<const FeatureFunction*> m_featureFunctions;
|
||||||
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
|
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
|
||||||
std::vector<const PhraseTable*> m_phraseTables;
|
std::vector<const PhraseTable*> m_phraseTables;
|
||||||
|
@ -16,10 +16,12 @@ using namespace std;
|
|||||||
|
|
||||||
TargetPhrase *TargetPhrase::CreateFromString(util::Pool &pool, System &system, const std::string &str)
|
TargetPhrase *TargetPhrase::CreateFromString(util::Pool &pool, System &system, const std::string &str)
|
||||||
{
|
{
|
||||||
|
Moses::FactorCollection &vocab = system.GetVocab();
|
||||||
|
|
||||||
vector<string> toks = Moses::Tokenize(str);
|
vector<string> toks = Moses::Tokenize(str);
|
||||||
size_t size = toks.size();
|
size_t size = toks.size();
|
||||||
TargetPhrase *ret = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, size);
|
TargetPhrase *ret = new (pool.Allocate<TargetPhrase>()) TargetPhrase(pool, system, size);
|
||||||
ret->Phrase::CreateFromString(toks);
|
ret->Phrase::CreateFromString(vocab, toks);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
/*
|
|
||||||
* Vocab.cpp
|
|
||||||
*
|
|
||||||
* Created on: 23 Oct 2015
|
|
||||||
* Author: hieu
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "Vocab.h"
|
|
||||||
|
|
||||||
Vocab::Vocab() {
|
|
||||||
// TODO Auto-generated constructor stub
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Vocab::~Vocab() {
|
|
||||||
// TODO Auto-generated destructor stub
|
|
||||||
}
|
|
||||||
|
|
||||||
const Factor *Vocab::AddFactor(const StringPiece &string)
|
|
||||||
{
|
|
||||||
Factor in(string);
|
|
||||||
std::pair<Set::iterator, bool> ret = m_set.insert(in);
|
|
||||||
const Factor &out = *ret.first;
|
|
||||||
return &out;
|
|
||||||
}
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
|||||||
/*
|
|
||||||
* Vocab.h
|
|
||||||
*
|
|
||||||
* Created on: 23 Oct 2015
|
|
||||||
* Author: hieu
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef VOCAB_H_
|
|
||||||
#define VOCAB_H_
|
|
||||||
|
|
||||||
#include <boost/unordered_set.hpp>
|
|
||||||
#include "util/string_piece.hh"
|
|
||||||
#include "moses/Util.h"
|
|
||||||
#include "Factor.h"
|
|
||||||
|
|
||||||
class Vocab {
|
|
||||||
public:
|
|
||||||
Vocab();
|
|
||||||
virtual ~Vocab();
|
|
||||||
|
|
||||||
const Factor *AddFactor(const StringPiece &string);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
typedef boost::unordered_set<Factor, Moses::UnorderedComparer<Factor>, Moses::UnorderedComparer<Factor> > Set;
|
|
||||||
Set m_set;
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* VOCAB_H_ */
|
|
@ -5,27 +5,39 @@
|
|||||||
* Author: hieu
|
* Author: hieu
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
#include "Word.h"
|
#include "Word.h"
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
#include "util/murmur_hash.hh"
|
#include "util/murmur_hash.hh"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
Word::Word() {
|
Word::Word() {
|
||||||
Init<Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
|
Init<const Moses::Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
Word::~Word() {
|
Word::~Word() {
|
||||||
// TODO Auto-generated destructor stub
|
// TODO Auto-generated destructor stub
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Word::CreateFromString(Moses::FactorCollection &vocab, const std::string &str)
|
||||||
|
{
|
||||||
|
vector<string> toks = Moses::Tokenize(str, "|");
|
||||||
|
for (size_t i = 0; i < toks.size(); ++i) {
|
||||||
|
const Moses::Factor *factor = vocab.AddFactor(toks[i], false);
|
||||||
|
m_factors[i] = factor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
size_t Word::hash() const
|
size_t Word::hash() const
|
||||||
{
|
{
|
||||||
uint64_t seed = 0;
|
uint64_t seed = 0;
|
||||||
size_t ret = util::MurmurHashNative(m_factors, sizeof(Factor*) * MAX_NUM_FACTORS, seed);
|
size_t ret = util::MurmurHashNative(m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS, seed);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Word::operator==(const Word &compare) const
|
bool Word::operator==(const Word &compare) const
|
||||||
{
|
{
|
||||||
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Factor*) * MAX_NUM_FACTORS);
|
int cmp = memcmp(m_factors, compare.m_factors, sizeof(Moses::Factor*) * MAX_NUM_FACTORS);
|
||||||
return cmp == 0;
|
return cmp == 0;
|
||||||
}
|
}
|
||||||
|
@ -7,18 +7,22 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Factor.h"
|
|
||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
|
#include "moses/Factor.h"
|
||||||
|
#include "moses/FactorCollection.h"
|
||||||
|
|
||||||
class Word {
|
class Word {
|
||||||
public:
|
public:
|
||||||
Word();
|
Word();
|
||||||
virtual ~Word();
|
virtual ~Word();
|
||||||
|
|
||||||
|
void CreateFromString(Moses::FactorCollection &vocab, const std::string &str);
|
||||||
|
|
||||||
size_t hash() const;
|
size_t hash() const;
|
||||||
bool operator==(const Word &compare) const;
|
bool operator==(const Word &compare) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Factor *m_factors[MAX_NUM_FACTORS];
|
const Moses::Factor *m_factors[MAX_NUM_FACTORS];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user